[patch 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

Paul Menzel pmenzel at molgen.mpg.de
Thu Apr 20 09:47:31 PDT 2023


Dear Thomas,


Am 20.04.23 um 17:57 schrieb Thomas Gleixner:
> On Thu, Apr 20 2023 at 07:51, Sean Christopherson wrote:
>> On Thu, Apr 20, 2023, Thomas Gleixner wrote:
>>> On Thu, Apr 20 2023 at 10:23, Andrew Cooper wrote:
>>>> On 20/04/2023 9:32 am, Thomas Gleixner wrote:
>>>>> On Wed, Apr 19, 2023, Andrew Cooper wrote:
>>>>>> This was changed in x2APIC, which made the x2APIC_ID immutable.
>>>
>>>>> I'm pondering to simply deny parallel mode if x2APIC is not there.
>>>>
>>>> I'm not sure if that will help much.
>>>
>>> Spoilsport.
>>
>> LOL, well let me pile on then.  x2APIC IDs aren't immutable on AMD hardware.  The
>> ID is read-only when the CPU is in x2APIC mode, but any changes made to the ID
>> while the CPU is in xAPIC mode survive the transition to x2APIC.  From the APM:
>>
>>    A value previously written by software to the 8-bit APIC_ID register (MMIO offset
>>    30h) is converted by hardware into the appropriate format and reflected into the
>>    32-bit x2APIC_ID register (MSR 802h).
>>
>> FWIW, my observations from testing on bare metal are that the xAPIC ID is effectively
>> read-only (writes are dropped) on Intel CPUs as far back as Haswell, while the above
>> behavior described in the APM holds true on at least Rome and Milan.
>>
>> My guess is that Intel's uArch specific behavior of the xAPIC ID being read-only
>> was introduced when x2APIC came along, but I didn't test farther back than Haswell.
> 
> I'm not so worried about modern hardware. The horrorshow is the old muck
> as demonstrated and of course there is virt :)
> 
> Something like the completely untested below should just work whatever
> APIC ID the BIOS decided to dice.
> 
> That might just work on SEV too without that GHCB muck, but what do I
> know.
> 
> Thanks,
> 
>          tglx
> ---
> --- a/arch/x86/include/asm/apicdef.h
> +++ b/arch/x86/include/asm/apicdef.h
> @@ -138,7 +138,8 @@
>   #define		APIC_EILVT_MASKED	(1 << 16)
>   
>   #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
> -#define APIC_BASE_MSR	0x800
> +#define APIC_BASE_MSR		0x800
> +#define APIC_X2APIC_ID_MSR	0x802
>   #define XAPIC_ENABLE	(1UL << 11)
>   #define X2APIC_ENABLE	(1UL << 10)
>   
> @@ -162,6 +163,7 @@
>   #define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
>   #define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
>   
> +#ifndef __ASSEMBLY__
>   /*
>    * the local APIC register structure, memory mapped. Not terribly well
>    * tested, but we might eventually use this one in the future - the
> @@ -435,4 +437,5 @@ enum apic_delivery_modes {
>   	APIC_DELIVERY_MODE_EXTINT	= 7,
>   };
>   
> +#endif /* !__ASSEMBLY__ */
>   #endif /* _ASM_X86_APICDEF_H */
> --- a/arch/x86/include/asm/smp.h
> +++ b/arch/x86/include/asm/smp.h
> @@ -195,14 +195,13 @@ extern void nmi_selftest(void);
>   #endif
>   
>   extern unsigned int smpboot_control;
> +extern unsigned long apic_mmio_base;
>   
>   #endif /* !__ASSEMBLY__ */
>   
>   /* Control bits for startup_64 */
> -#define STARTUP_APICID_CPUID_1F 0x80000000
> -#define STARTUP_APICID_CPUID_0B 0x40000000
> -#define STARTUP_APICID_CPUID_01 0x20000000
> -#define STARTUP_APICID_SEV_ES	0x10000000
> +#define STARTUP_READ_APICID	0x80000000
> +#define STARTUP_APICID_SEV_ES	0x40000000
>   
>   /* Top 8 bits are reserved for control */
>   #define STARTUP_PARALLEL_MASK	0xFF000000
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -101,6 +101,8 @@ static int apic_extnmi __ro_after_init =
>    */
>   static bool virt_ext_dest_id __ro_after_init;
>   
> +unsigned long apic_mmio_base __ro_after_init;
> +
>   /*
>    * Map cpu index to physical APIC ID
>    */
> @@ -2164,6 +2166,7 @@ void __init register_lapic_address(unsig
>   
>   	if (!x2apic_mode) {
>   		set_fixmap_nocache(FIX_APIC_BASE, address);
> +		apic_mmio_base = APIC_BASE;
>   		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
>   			    APIC_BASE, address);
>   	}
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -24,8 +24,10 @@
>   #include "../entry/calling.h"
>   #include <asm/export.h>
>   #include <asm/nospec-branch.h>
> +#include <asm/apicdef.h>
>   #include <asm/fixmap.h>
>   #include <asm/smp.h>
> +
>   #include <asm/sev-common.h>
>   
>   /*
> @@ -237,37 +239,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_
>   
>   #ifdef CONFIG_SMP
>   	/*
> -	 * For parallel boot, the APIC ID is retrieved from CPUID, and then
> -	 * used to look up the CPU number.  For booting a single CPU, the
> -	 * CPU number is encoded in smpboot_control.
> +	 * For parallel boot, the APIC ID is either retrieved the APIC or
> +	 * from CPUID, and then used to look up the CPU number.
> +	 * For booting a single CPU, the CPU number is encoded in
> +	 * smpboot_control.
>   	 *
> -	 * Bit 31	STARTUP_APICID_CPUID_1F flag (use CPUID 0x1f)
> -	 * Bit 30	STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b)
> -	 * Bit 29	STARTUP_APICID_CPUID_01 flag (use CPUID 0x01)
> -	 * Bit 28	STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
> +	 * Bit 31	STARTUP_APICID_READ (Read APICID from APIC)
> +	 * Bit 30	STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
>   	 * Bit 0-23	CPU# if STARTUP_APICID_CPUID_xx flags are not set
>   	 */
>   	movl	smpboot_control(%rip), %ecx
> +	testl	$STARTUP_READ_APICID, %ecx
>   #ifdef CONFIG_AMD_MEM_ENCRYPT
>   	testl	$STARTUP_APICID_SEV_ES, %ecx
>   	jnz	.Luse_sev_cpuid_0b
>   #endif
> -	testl	$STARTUP_APICID_CPUID_1F, %ecx
> -	jnz	.Luse_cpuid_1f
> -	testl	$STARTUP_APICID_CPUID_0B, %ecx
> -	jnz	.Luse_cpuid_0b
> -	testl	$STARTUP_APICID_CPUID_01, %ecx
> -	jnz	.Luse_cpuid_01
>   	andl	$(~STARTUP_PARALLEL_MASK), %ecx
>   	jmp	.Lsetup_cpu
>   
> -.Luse_cpuid_01:
> -	mov	$0x01, %eax
> -	cpuid
> -	mov	%ebx, %edx
> -	shr	$24, %edx
> -	jmp	.Lsetup_AP
> -
>   #ifdef CONFIG_AMD_MEM_ENCRYPT
>   .Luse_sev_cpuid_0b:
>   	/* Set the GHCB MSR to request CPUID 0x0B_EDX */
> @@ -292,24 +281,30 @@ SYM_INNER_LABEL(secondary_startup_64_no_
>   	jmp	.Lsetup_AP
>   #endif
>   
> -.Luse_cpuid_0b:
> -	mov	$0x0B, %eax
> -	xorl	%ecx, %ecx
> -	cpuid
> -	jmp	.Lsetup_AP
> +.Lread_apicid:
> +	mov	$MSR_IA32_APICBASE, %ecx
> +	rdmsr
> +	testl	$X2APIC_ENABLE, %eax
> +	jnz	read_apicid_msr
> +
> +	/* Read the APIC ID from the fix-mapped MMIO space. */
> +	movq	apic_mmio_base(%rip), %rcx
> +	addq	$APIC_ID, %rcx
> +	movl	(%rcx), %eax
> +	shr	$24, %eax
> +	jnz	.Lread_apicid
>   
> -.Luse_cpuid_1f:
> -	mov	$0x1f, %eax
> -	xorl	%ecx, %ecx
> -	cpuid
> +.Lread_apicid_msr:
> +	mov	$APIC_X2APIC_ID_MSR, %ecx
> +	rdmsr
>   
>   .Lsetup_AP:
> -	/* EDX contains the APIC ID of the current CPU */
> +	/* EAX contains the APIC ID of the current CPU */
>   	xorq	%rcx, %rcx
>   	leaq	cpuid_to_apicid(%rip), %rbx
>   
>   .Lfind_cpunr:
> -	cmpl	(%rbx,%rcx,4), %edx
> +	cmpl	(%rbx,%rcx,4), %eax
>   	jz	.Lsetup_cpu
>   	inc	%ecx
>   #ifdef CONFIG_FORCE_NR_CPUS
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1253,41 +1253,22 @@ bool __init arch_cpuhp_init_parallel_bri
>   		return false;
>   	}
>   
> -	/* Encrypted guests require special CPUID handling. */
> +	/* Encrypted guests require special handling. */
>   	if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
>   		switch (cc_get_vendor()) {
>   		case CC_VENDOR_AMD:
>   			ctrl = STARTUP_APICID_SEV_ES;
>   			if (topology_extended_leaf == 0x0b)
> -				goto setup;
> +				break;
>   			fallthrough;
>   		default:
>   			pr_info("Parallel CPU startup disabled due to guest state encryption\n");
>   			return false;
>   		}
> +	} else {
> +		ctrl = STARTUP_READ_APICID;
>   	}
>   
> -	switch (topology_extended_leaf) {
> -	case 0x0b:
> -		ctrl = STARTUP_APICID_CPUID_0B;
> -		break;
> -	case 0x1f:
> -		ctrl = STARTUP_APICID_CPUID_1F;
> -		break;
> -	case 0x00:
> -		/* For !x2APIC mode 8 bits from leaf 0x01 are sufficient. */
> -		if (!x2apic_mode) {
> -			ctrl = STARTUP_APICID_CPUID_01;
> -			break;
> -		}
> -		fallthrough;
> -	default:
> -		pr_info("Parallel CPU startup disabled. Unsupported topology leaf %u\n",
> -			topology_extended_leaf);
> -		return false;
> -	}
> -
> -setup:
>   	pr_debug("Parallel CPU startup enabled: 0x%08x\n", ctrl);
>   	smpboot_control = ctrl;
>   	return true;

I quickly applied it on top of your branch, but I am getting:

```
$ wget https://lore.kernel.org/lkml/87v8hq35sk.ffs@tglx/raw
$ patch -p1 < raw
$ make
[…]
   LD      .tmp_vmlinux.kallsyms1
ld: arch/x86/kernel/head_64.o: in function `secondary_startup_64_no_verify':
(.head.text+0xbf): undefined reference to `read_apicid_msr'
make[1]: *** [scripts/Makefile.vmlinux:35: vmlinux] Error 1
make: *** [Makefile:1249: vmlinux] Error 2
```


Kind regards,

Paul



More information about the linux-riscv mailing list