[PATCH 7/7] ARM: redo TTBR setup code for LPAE
Gregory CLEMENT
gregory.clement at free-electrons.com
Wed Aug 5 08:26:25 PDT 2015
Hi Russell,
On 06/05/2015 12:30, Russell King wrote:
> Re-engineer the LPAE TTBR setup code. Rather than passing some shifted
> address in order to fit in a CPU register, pass either a full physical
> address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1).
>
> This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of
> cpu_set_ttbr() in the secondary CPU startup code path (which was there
> to re-set TTBR1 to the appropriate high physical address space on
> Keystone2.
It seems that since the merge of this commit the secondary CPUs on
Armada XP failed to come online in Big Endian (my configuration is
mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). Once I found that this
commit introduced the regression I also tested but with LPAE but I
had the same result: only the first CPU boot.
I don't know if there is something wrong with this patch or if it
reveals something which was wrong in the Armada XP port.
At a point I suspected something around the change in the
get_arch_pgd function but I didn't find anything useful.
Any guidance would be greatly appreciate.
Thanks,
Gregory
>
> Signed-off-by: Russell King <rmk+kernel at arm.linux.org.uk>
> ---
> arch/arm/include/asm/memory.h | 16 ---------------
> arch/arm/include/asm/proc-fns.h | 7 -------
> arch/arm/include/asm/smp.h | 2 +-
> arch/arm/kernel/head-nommu.S | 2 +-
> arch/arm/kernel/head.S | 42 +++++++++++++++++++++++++++++-----------
> arch/arm/kernel/smp.c | 10 ++++++----
> arch/arm/mach-keystone/platsmp.c | 13 -------------
> arch/arm/mm/proc-v7-2level.S | 6 +++---
> arch/arm/mm/proc-v7-3level.S | 14 +++++---------
> arch/arm/mm/proc-v7.S | 26 ++++++++++++-------------
> 10 files changed, 60 insertions(+), 78 deletions(-)
>
> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
> index 184def0e1652..3a72d69b3255 100644
> --- a/arch/arm/include/asm/memory.h
> +++ b/arch/arm/include/asm/memory.h
> @@ -18,8 +18,6 @@
> #include <linux/types.h>
> #include <linux/sizes.h>
>
> -#include <asm/cache.h>
> -
> #ifdef CONFIG_NEED_MACH_MEMORY_H
> #include <mach/memory.h>
> #endif
> @@ -133,20 +131,6 @@
> #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
>
> /*
> - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed
> - * around in head.S and proc-*.S are shifted by this amount, in order to
> - * leave spare high bits for systems with physical address extension. This
> - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
> - * gives us about 38-bits or so.
> - */
> -#ifdef CONFIG_ARM_LPAE
> -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT
> -#else
> -#define ARCH_PGD_SHIFT 0
> -#endif
> -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1)
> -
> -/*
> * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
> * memory. This is used for XIP and NoMMU kernels, and on platforms that don't
> * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
> diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
> index 5324c1112f3a..8877ad5ffe10 100644
> --- a/arch/arm/include/asm/proc-fns.h
> +++ b/arch/arm/include/asm/proc-fns.h
> @@ -125,13 +125,6 @@ extern void cpu_resume(void);
> ttbr; \
> })
>
> -#define cpu_set_ttbr(nr, val) \
> - do { \
> - u64 ttbr = val; \
> - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \
> - : : "r" (ttbr)); \
> - } while (0)
> -
> #define cpu_get_pgd() \
> ({ \
> u64 pg = cpu_get_ttbr(0); \
> diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
> index 18f5a554134f..487aa08f31ee 100644
> --- a/arch/arm/include/asm/smp.h
> +++ b/arch/arm/include/asm/smp.h
> @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void);
> struct secondary_data {
> union {
> unsigned long mpu_rgn_szr;
> - unsigned long pgdir;
> + u64 pgdir;
> };
> unsigned long swapper_pg_dir;
> void *stack;
> diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
> index aebfbf79a1a3..84da14b7cd04 100644
> --- a/arch/arm/kernel/head-nommu.S
> +++ b/arch/arm/kernel/head-nommu.S
> @@ -123,7 +123,7 @@ ENTRY(secondary_startup)
> ENDPROC(secondary_startup)
>
> ENTRY(__secondary_switched)
> - ldr sp, [r7, #8] @ set up the stack pointer
> + ldr sp, [r7, #12] @ set up the stack pointer
> mov fp, #0
> b secondary_start_kernel
> ENDPROC(__secondary_switched)
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 3637973a9708..7304b4c44b52 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -131,13 +131,30 @@ ENTRY(stext)
> * The following calls CPU specific code in a position independent
> * manner. See arch/arm/mm/proc-*.S for details. r10 = base of
> * xxx_proc_info structure selected by __lookup_processor_type
> - * above. On return, the CPU will be ready for the MMU to be
> - * turned on, and r0 will hold the CPU control register value.
> + * above.
> + *
> + * The processor init function will be called with:
> + * r1 - machine type
> + * r2 - boot data (atags/dt) pointer
> + * r4 - translation table base (low word)
> + * r5 - translation table base (high word, if LPAE)
> + * r8 - translation table base 1 (pfn if LPAE)
> + * r9 - cpuid
> + * r13 - virtual address for __enable_mmu -> __turn_mmu_on
> + *
> + * On return, the CPU will be ready for the MMU to be turned on,
> + * r0 will hold the CPU control register value, r1, r2, r4, and
> + * r9 will be preserved. r5 will also be preserved if LPAE.
> */
> ldr r13, =__mmap_switched @ address to jump to after
> @ mmu has been enabled
> adr lr, BSYM(1f) @ return (PIC) address
> +#ifdef CONFIG_ARM_LPAE
> + mov r5, #0 @ high TTBR0
> + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn
> +#else
> mov r8, r4 @ set TTBR1 to swapper_pg_dir
> +#endif
> ldr r12, [r10, #PROCINFO_INITFUNC]
> add r12, r12, r10
> ret r12
> @@ -158,7 +175,7 @@ ENDPROC(stext)
> *
> * Returns:
> * r0, r3, r5-r7 corrupted
> - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
> + * r4 = physical page table address
> */
> __create_page_tables:
> pgtbl r4, r8 @ page table address
> @@ -333,7 +350,6 @@ __create_page_tables:
> #endif
> #ifdef CONFIG_ARM_LPAE
> sub r4, r4, #0x1000 @ point to the PGD table
> - mov r4, r4, lsr #ARCH_PGD_SHIFT
> #endif
> ret lr
> ENDPROC(__create_page_tables)
> @@ -381,9 +397,9 @@ ENTRY(secondary_startup)
> adr r4, __secondary_data
> ldmia r4, {r5, r7, r12} @ address to jump to after
> sub lr, r4, r5 @ mmu has been enabled
> - ldr r4, [r7, lr] @ get secondary_data.pgdir
> - add r7, r7, #4
> - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir
> + add r3, r7, lr
> + ldrd r4, [r3, #0] @ get secondary_data.pgdir
> + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir
> adr lr, BSYM(__enable_mmu) @ return address
> mov r13, r12 @ __secondary_switched address
> ldr r12, [r10, #PROCINFO_INITFUNC]
> @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm)
> * r6 = &secondary_data
> */
> ENTRY(__secondary_switched)
> - ldr sp, [r7, #4] @ get secondary_data.stack
> + ldr sp, [r7, #12] @ get secondary_data.stack
> mov fp, #0
> b secondary_start_kernel
> ENDPROC(__secondary_switched)
> @@ -416,12 +432,14 @@ __secondary_data:
> /*
> * Setup common bits before finally enabling the MMU. Essentially
> * this is just loading the page table pointer and domain access
> - * registers.
> + * registers. All these registers need to be preserved by the
> + * processor setup function (or set in the case of r0)
> *
> * r0 = cp#15 control register
> * r1 = machine ID
> * r2 = atags or dtb pointer
> - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
> + * r4 = TTBR pointer (low word)
> + * r5 = TTBR pointer (high word if LPAE)
> * r9 = processor ID
> * r13 = *virtual* address to jump to upon completion
> */
> @@ -440,7 +458,9 @@ __enable_mmu:
> #ifdef CONFIG_CPU_ICACHE_DISABLE
> bic r0, r0, #CR_I
> #endif
> -#ifndef CONFIG_ARM_LPAE
> +#ifdef CONFIG_ARM_LPAE
> + mcrr p15, 0, r4, r5, c2 @ load TTBR0
> +#else
> mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
> domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
> domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index cca5b8758185..90dfbedfbfb8 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops)
>
> static unsigned long get_arch_pgd(pgd_t *pgd)
> {
> - phys_addr_t pgdir = virt_to_idmap(pgd);
> - BUG_ON(pgdir & ARCH_PGD_MASK);
> - return pgdir >> ARCH_PGD_SHIFT;
> +#ifdef CONFIG_ARM_LPAE
> + return __phys_to_pfn(virt_to_phys(pgd));
> +#else
> + return virt_to_phys(pgd);
> +#endif
> }
>
> int __cpu_up(unsigned int cpu, struct task_struct *idle)
> @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
> #endif
>
> #ifdef CONFIG_MMU
> - secondary_data.pgdir = get_arch_pgd(idmap_pgd);
> + secondary_data.pgdir = virt_to_phys(idmap_pgd);
> secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
> #endif
> sync_cache_w(&secondary_data);
> diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
> index 5f46a7cf907b..4bbb18463bfd 100644
> --- a/arch/arm/mach-keystone/platsmp.c
> +++ b/arch/arm/mach-keystone/platsmp.c
> @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu,
> return error;
> }
>
> -#ifdef CONFIG_ARM_LPAE
> -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
> -{
> - pgd_t *pgd0 = pgd_offset_k(0);
> - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
> - local_flush_tlb_all();
> -}
> -#else
> -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
> -{}
> -#endif
> -
> struct smp_operations keystone_smp_ops __initdata = {
> .smp_boot_secondary = keystone_smp_boot_secondary,
> - .smp_secondary_init = keystone_smp_secondary_initmem,
> };
> diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
> index 10405b8d31af..fa385140715f 100644
> --- a/arch/arm/mm/proc-v7-2level.S
> +++ b/arch/arm/mm/proc-v7-2level.S
> @@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext)
> * Macro for setting up the TTBRx and TTBCR registers.
> * - \ttb0 and \ttb1 updated with the corresponding flags.
> */
> - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp
> + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
> mcr p15, 0, \zero, c2, c0, 2 @ TTB control register
> - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP)
> - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP)
> + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
> + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
> ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP)
> ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP)
> mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1
> diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
> index d3daed0ae0ad..5e5720e8bc5f 100644
> --- a/arch/arm/mm/proc-v7-3level.S
> +++ b/arch/arm/mm/proc-v7-3level.S
> @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext)
> * Macro for setting up the TTBRx and TTBCR registers.
> * - \ttbr1 updated.
> */
> - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp
> + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
> ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address
> - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT
> - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET?
> - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register
> + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET?
> + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister
> orr \tmp, \tmp, #TTB_EAE
> ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP)
> ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP)
> @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext)
> */
> orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ
> mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR
> - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits
> - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits
> + mov \tmp, \ttbr1, lsr #20
> + mov \ttbr1, \ttbr1, lsl #12
> addls \ttbr1, \ttbr1, #TTBR1_OFFSET
> mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1
> - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits
> - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits
> - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0
> .endm
>
> /*
> diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> index 3d1054f11a8a..873230912894 100644
> --- a/arch/arm/mm/proc-v7.S
> +++ b/arch/arm/mm/proc-v7.S
> @@ -343,9 +343,9 @@ __v7_setup:
> and r10, r0, #0xff000000 @ ARM?
> teq r10, #0x41000000
> bne 3f
> - and r5, r0, #0x00f00000 @ variant
> + and r3, r0, #0x00f00000 @ variant
> and r6, r0, #0x0000000f @ revision
> - orr r6, r6, r5, lsr #20-4 @ combine variant and revision
> + orr r6, r6, r3, lsr #20-4 @ combine variant and revision
> ubfx r0, r0, #4, #12 @ primary part number
>
> /* Cortex-A8 Errata */
> @@ -354,7 +354,7 @@ __v7_setup:
> bne 2f
> #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
>
> - teq r5, #0x00100000 @ only present in r1p*
> + teq r3, #0x00100000 @ only present in r1p*
> mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
> orreq r10, r10, #(1 << 6) @ set IBE to 1
> mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
> @@ -395,7 +395,7 @@ __v7_setup:
> mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
> #endif
> #ifdef CONFIG_ARM_ERRATA_743622
> - teq r5, #0x00200000 @ only present in r2p*
> + teq r3, #0x00200000 @ only present in r2p*
> mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
> orreq r10, r10, #1 << 6 @ set bit #6
> mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
> @@ -425,10 +425,10 @@ __v7_setup:
> mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
> #ifdef CONFIG_MMU
> mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs
> - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup
> - ldr r5, =PRRR @ PRRR
> + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup
> + ldr r3, =PRRR @ PRRR
> ldr r6, =NMRR @ NMRR
> - mcr p15, 0, r5, c10, c2, 0 @ write PRRR
> + mcr p15, 0, r3, c10, c2, 0 @ write PRRR
> mcr p15, 0, r6, c10, c2, 1 @ write NMRR
> #endif
> dsb @ Complete invalidations
> @@ -437,22 +437,22 @@ __v7_setup:
> and r0, r0, #(0xf << 12) @ ThumbEE enabled field
> teq r0, #(1 << 12) @ check if ThumbEE is present
> bne 1f
> - mov r5, #0
> - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0
> + mov r3, #0
> + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0
> mrc p14, 6, r0, c0, c0, 0 @ load TEECR
> orr r0, r0, #1 @ set the 1st bit in order to
> mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access
> 1:
> #endif
> - adr r5, v7_crval
> - ldmia r5, {r5, r6}
> + adr r3, v7_crval
> + ldmia r3, {r3, r6}
> ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables
> #ifdef CONFIG_SWP_EMULATE
> - orr r5, r5, #(1 << 10) @ set SW bit in "clear"
> + orr r3, r3, #(1 << 10) @ set SW bit in "clear"
> bic r6, r6, #(1 << 10) @ clear it in "mmuset"
> #endif
> mrc p15, 0, r0, c1, c0, 0 @ read control register
> - bic r0, r0, r5 @ clear bits them
> + bic r0, r0, r3 @ clear bits them
> orr r0, r0, r6 @ set them
> THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
> ret lr @ return to head.S:__ret
>
--
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
More information about the linux-arm-kernel
mailing list