[patch 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
Paul Menzel
pmenzel at molgen.mpg.de
Thu Apr 20 09:47:31 PDT 2023
Dear Thomas,
Am 20.04.23 um 17:57 schrieb Thomas Gleixner:
> On Thu, Apr 20 2023 at 07:51, Sean Christopherson wrote:
>> On Thu, Apr 20, 2023, Thomas Gleixner wrote:
>>> On Thu, Apr 20 2023 at 10:23, Andrew Cooper wrote:
>>>> On 20/04/2023 9:32 am, Thomas Gleixner wrote:
>>>>> On Wed, Apr 19, 2023, Andrew Cooper wrote:
>>>>>> This was changed in x2APIC, which made the x2APIC_ID immutable.
>>>
>>>>> I'm pondering to simply deny parallel mode if x2APIC is not there.
>>>>
>>>> I'm not sure if that will help much.
>>>
>>> Spoilsport.
>>
>> LOL, well let me pile on then. x2APIC IDs aren't immutable on AMD hardware. The
>> ID is read-only when the CPU is in x2APIC mode, but any changes made to the ID
>> while the CPU is in xAPIC mode survive the transition to x2APIC. From the APM:
>>
>> A value previously written by software to the 8-bit APIC_ID register (MMIO offset
>> 30h) is converted by hardware into the appropriate format and reflected into the
>> 32-bit x2APIC_ID register (MSR 802h).
>>
>> FWIW, my observations from testing on bare metal are that the xAPIC ID is effectively
>> read-only (writes are dropped) on Intel CPUs as far back as Haswell, while the above
>> behavior described in the APM holds true on at least Rome and Milan.
>>
>> My guess is that Intel's uArch specific behavior of the xAPIC ID being read-only
>> was introduced when x2APIC came along, but I didn't test farther back than Haswell.
>
> I'm not so worried about modern hardware. The horrorshow is the old muck
> as demonstrated and of course there is virt :)
>
> Something like the completely untested below should just work whatever
> APIC ID the BIOS decided to dice.
>
> That might just work on SEV too without that GHCB muck, but what do I
> know.
>
> Thanks,
>
> tglx
> ---
> --- a/arch/x86/include/asm/apicdef.h
> +++ b/arch/x86/include/asm/apicdef.h
> @@ -138,7 +138,8 @@
> #define APIC_EILVT_MASKED (1 << 16)
>
> #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
> -#define APIC_BASE_MSR 0x800
> +#define APIC_BASE_MSR 0x800
> +#define APIC_X2APIC_ID_MSR 0x802
> #define XAPIC_ENABLE (1UL << 11)
> #define X2APIC_ENABLE (1UL << 10)
>
> @@ -162,6 +163,7 @@
> #define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
> #define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
>
> +#ifndef __ASSEMBLY__
> /*
> * the local APIC register structure, memory mapped. Not terribly well
> * tested, but we might eventually use this one in the future - the
> @@ -435,4 +437,5 @@ enum apic_delivery_modes {
> APIC_DELIVERY_MODE_EXTINT = 7,
> };
>
> +#endif /* !__ASSEMBLY__ */
> #endif /* _ASM_X86_APICDEF_H */
> --- a/arch/x86/include/asm/smp.h
> +++ b/arch/x86/include/asm/smp.h
> @@ -195,14 +195,13 @@ extern void nmi_selftest(void);
> #endif
>
> extern unsigned int smpboot_control;
> +extern unsigned long apic_mmio_base;
>
> #endif /* !__ASSEMBLY__ */
>
> /* Control bits for startup_64 */
> -#define STARTUP_APICID_CPUID_1F 0x80000000
> -#define STARTUP_APICID_CPUID_0B 0x40000000
> -#define STARTUP_APICID_CPUID_01 0x20000000
> -#define STARTUP_APICID_SEV_ES 0x10000000
> +#define STARTUP_READ_APICID 0x80000000
> +#define STARTUP_APICID_SEV_ES 0x40000000
>
> /* Top 8 bits are reserved for control */
> #define STARTUP_PARALLEL_MASK 0xFF000000
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -101,6 +101,8 @@ static int apic_extnmi __ro_after_init =
> */
> static bool virt_ext_dest_id __ro_after_init;
>
> +unsigned long apic_mmio_base __ro_after_init;
> +
> /*
> * Map cpu index to physical APIC ID
> */
> @@ -2164,6 +2166,7 @@ void __init register_lapic_address(unsig
>
> if (!x2apic_mode) {
> set_fixmap_nocache(FIX_APIC_BASE, address);
> + apic_mmio_base = APIC_BASE;
> apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
> APIC_BASE, address);
> }
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -24,8 +24,10 @@
> #include "../entry/calling.h"
> #include <asm/export.h>
> #include <asm/nospec-branch.h>
> +#include <asm/apicdef.h>
> #include <asm/fixmap.h>
> #include <asm/smp.h>
> +
> #include <asm/sev-common.h>
>
> /*
> @@ -237,37 +239,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_
>
> #ifdef CONFIG_SMP
> /*
> - * For parallel boot, the APIC ID is retrieved from CPUID, and then
> - * used to look up the CPU number. For booting a single CPU, the
> - * CPU number is encoded in smpboot_control.
> + * For parallel boot, the APIC ID is either retrieved the APIC or
> + * from CPUID, and then used to look up the CPU number.
> + * For booting a single CPU, the CPU number is encoded in
> + * smpboot_control.
> *
> - * Bit 31 STARTUP_APICID_CPUID_1F flag (use CPUID 0x1f)
> - * Bit 30 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b)
> - * Bit 29 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01)
> - * Bit 28 STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
> + * Bit 31 STARTUP_APICID_READ (Read APICID from APIC)
> + * Bit 30 STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
> * Bit 0-23 CPU# if STARTUP_APICID_CPUID_xx flags are not set
> */
> movl smpboot_control(%rip), %ecx
> + testl $STARTUP_READ_APICID, %ecx
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> testl $STARTUP_APICID_SEV_ES, %ecx
> jnz .Luse_sev_cpuid_0b
> #endif
> - testl $STARTUP_APICID_CPUID_1F, %ecx
> - jnz .Luse_cpuid_1f
> - testl $STARTUP_APICID_CPUID_0B, %ecx
> - jnz .Luse_cpuid_0b
> - testl $STARTUP_APICID_CPUID_01, %ecx
> - jnz .Luse_cpuid_01
> andl $(~STARTUP_PARALLEL_MASK), %ecx
> jmp .Lsetup_cpu
>
> -.Luse_cpuid_01:
> - mov $0x01, %eax
> - cpuid
> - mov %ebx, %edx
> - shr $24, %edx
> - jmp .Lsetup_AP
> -
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> .Luse_sev_cpuid_0b:
> /* Set the GHCB MSR to request CPUID 0x0B_EDX */
> @@ -292,24 +281,30 @@ SYM_INNER_LABEL(secondary_startup_64_no_
> jmp .Lsetup_AP
> #endif
>
> -.Luse_cpuid_0b:
> - mov $0x0B, %eax
> - xorl %ecx, %ecx
> - cpuid
> - jmp .Lsetup_AP
> +.Lread_apicid:
> + mov $MSR_IA32_APICBASE, %ecx
> + rdmsr
> + testl $X2APIC_ENABLE, %eax
> + jnz read_apicid_msr
> +
> + /* Read the APIC ID from the fix-mapped MMIO space. */
> + movq apic_mmio_base(%rip), %rcx
> + addq $APIC_ID, %rcx
> + movl (%rcx), %eax
> + shr $24, %eax
> + jnz .Lread_apicid
>
> -.Luse_cpuid_1f:
> - mov $0x1f, %eax
> - xorl %ecx, %ecx
> - cpuid
> +.Lread_apicid_msr:
> + mov $APIC_X2APIC_ID_MSR, %ecx
> + rdmsr
>
> .Lsetup_AP:
> - /* EDX contains the APIC ID of the current CPU */
> + /* EAX contains the APIC ID of the current CPU */
> xorq %rcx, %rcx
> leaq cpuid_to_apicid(%rip), %rbx
>
> .Lfind_cpunr:
> - cmpl (%rbx,%rcx,4), %edx
> + cmpl (%rbx,%rcx,4), %eax
> jz .Lsetup_cpu
> inc %ecx
> #ifdef CONFIG_FORCE_NR_CPUS
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1253,41 +1253,22 @@ bool __init arch_cpuhp_init_parallel_bri
> return false;
> }
>
> - /* Encrypted guests require special CPUID handling. */
> + /* Encrypted guests require special handling. */
> if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
> switch (cc_get_vendor()) {
> case CC_VENDOR_AMD:
> ctrl = STARTUP_APICID_SEV_ES;
> if (topology_extended_leaf == 0x0b)
> - goto setup;
> + break;
> fallthrough;
> default:
> pr_info("Parallel CPU startup disabled due to guest state encryption\n");
> return false;
> }
> + } else {
> + ctrl = STARTUP_READ_APICID;
> }
>
> - switch (topology_extended_leaf) {
> - case 0x0b:
> - ctrl = STARTUP_APICID_CPUID_0B;
> - break;
> - case 0x1f:
> - ctrl = STARTUP_APICID_CPUID_1F;
> - break;
> - case 0x00:
> - /* For !x2APIC mode 8 bits from leaf 0x01 are sufficient. */
> - if (!x2apic_mode) {
> - ctrl = STARTUP_APICID_CPUID_01;
> - break;
> - }
> - fallthrough;
> - default:
> - pr_info("Parallel CPU startup disabled. Unsupported topology leaf %u\n",
> - topology_extended_leaf);
> - return false;
> - }
> -
> -setup:
> pr_debug("Parallel CPU startup enabled: 0x%08x\n", ctrl);
> smpboot_control = ctrl;
> return true;
I quickly applied it on top of your branch, but I am getting:
```
$ wget https://lore.kernel.org/lkml/87v8hq35sk.ffs@tglx/raw
$ patch -p1 < raw
$ make
[…]
LD .tmp_vmlinux.kallsyms1
ld: arch/x86/kernel/head_64.o: in function `secondary_startup_64_no_verify':
(.head.text+0xbf): undefined reference to `read_apicid_msr'
make[1]: *** [scripts/Makefile.vmlinux:35: vmlinux] Error 1
make: *** [Makefile:1249: vmlinux] Error 2
```
Kind regards,
Paul
More information about the linux-riscv
mailing list