[PATCHv3 4/7] arm64: Move some head.text functions to executable section

Mark Rutland mark.rutland at arm.com
Thu Aug 21 03:34:55 PDT 2014


Hi Laura,

On Thu, Aug 21, 2014 at 02:20:36AM +0100, Laura Abbott wrote:
> The code in the head.text section of the kernel exists in the
> same section as the swapper_pg_dir which means it needs the
> same page table permissions. The swapper_pg_dir needs to be
> writeable but shouldn't be executable.

I think we can drop the above. As far as I can tell as of commit
bd00cd5f8c8c (arm64: place initial page tables above the kernel) it's no
longer relevant.

> The head.text section
> is intended to be run at early bootup before any of the regular
> kernel mappings have been setup so there is no issue at bootup.
> The suspend/resume/hotplug code path requires some of these
> head.S functions to run however which means they need to be
> executable. We can't easily move all of the head.text to
> an executable section, so split it into two parts: that which
> is used only at early head.S bootup and that which is used
> after bootup. There is a small bit of code duplication because
> of some relocation issues related to accessing code more than
> 1MB away.

>From a cursory glance it looks like the only things we need write access
to in .head.text are __cpu_boot_mode and __switch_data. Can't we instead
place those in .data and make .head.text executable?

We currently find them with adr, which should be easy to replace with
adrp + add to get around relocation issues.

Thanks,
Mark.

> Signed-off-by: Laura Abbott <lauraa at codeaurora.org>
> ---
>  arch/arm64/kernel/head.S        | 424 +++++++++++++++++++++-------------------
>  arch/arm64/kernel/vmlinux.lds.S |   1 +
>  2 files changed, 228 insertions(+), 197 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 61bc210..dbdb378 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -238,7 +238,7 @@ ENTRY(stext)
>         mov     x0, x22
>         bl      lookup_processor_type
>         mov     x23, x0                         // x23=current cpu_table
> -       cbz     x23, __error_p                  // invalid processor (x23=0)?
> +       cbz     x23, __h_error_p                // invalid processor (x23=0)?
>         bl      __vet_fdt
>         bl      __create_page_tables            // x25=TTBR0, x26=TTBR1
>         /*
> @@ -250,12 +250,236 @@ ENTRY(stext)
>          */
>         ldr     x27, __switch_data              // address to jump to after
>                                                 // MMU has been enabled
> -       adr     lr, __enable_mmu                // return (PIC) address
> +       adr     lr, __h_enable_mmu              // return (PIC) address
>         ldr     x12, [x23, #CPU_INFO_SETUP]
>         add     x12, x12, x28                   // __virt_to_phys
>         br      x12                             // initialise processor
>  ENDPROC(stext)
> 
> +__h_error_p:
> +ENDPROC(__h_error_p)
> +
> +__h_error:
> +1:
> +       nop
> +       b       1b
> +ENDPROC(__h_error)
> +
> +__h_enable_mmu:
> +       ldr     x5, =vectors
> +       msr     vbar_el1, x5
> +       msr     ttbr0_el1, x25                  // load TTBR0
> +       msr     ttbr1_el1, x26                  // load TTBR1
> +       isb
> +       b       __h_turn_mmu_on
> +ENDPROC(__h_enable_mmu)
> +
> +       .align  4
> +__h_turn_mmu_on:
> +       msr     sctlr_el1, x0
> +       isb
> +       br      x27
> +ENDPROC(__h_turn_mmu_on)
> +
> +/*
> + * Determine validity of the x21 FDT pointer.
> + * The dtb must be 8-byte aligned and live in the first 512M of memory.
> + */
> +__vet_fdt:
> +       tst     x21, #0x7
> +       b.ne    1f
> +       cmp     x21, x24
> +       b.lt    1f
> +       mov     x0, #(1 << 29)
> +       add     x0, x0, x24
> +       cmp     x21, x0
> +       b.ge    1f
> +       ret
> +1:
> +       mov     x21, #0
> +       ret
> +ENDPROC(__vet_fdt)
> +/*
> + * Macro to create a table entry to the next page.
> + *
> + *     tbl:    page table address
> + *     virt:   virtual address
> + *     shift:  #imm page table shift
> + *     ptrs:   #imm pointers per table page
> + *
> + * Preserves:  virt
> + * Corrupts:   tmp1, tmp2
> + * Returns:    tbl -> next level table page address
> + */
> +       .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
> +       lsr     \tmp1, \virt, #\shift
> +       and     \tmp1, \tmp1, #\ptrs - 1        // table index
> +       add     \tmp2, \tbl, #PAGE_SIZE
> +       orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
> +       str     \tmp2, [\tbl, \tmp1, lsl #3]
> +       add     \tbl, \tbl, #PAGE_SIZE          // next level table page
> +       .endm
> +
> +/*
> + * Macro to populate the PGD (and possibily PUD) for the corresponding
> + * block entry in the next level (tbl) for the given virtual address.
> + *
> + * Preserves:  tbl, next, virt
> + * Corrupts:   tmp1, tmp2
> + */
> +       .macro  create_pgd_entry, tbl, virt, tmp1, tmp2
> +       create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
> +#if SWAPPER_PGTABLE_LEVELS == 3
> +       create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
> +#endif
> +       .endm
> +
> +/*
> + * Macro to populate block entries in the page table for the start..end
> + * virtual range (inclusive).
> + *
> + * Preserves:  tbl, flags
> + * Corrupts:   phys, start, end, pstate
> + */
> +       .macro  create_block_map, tbl, flags, phys, start, end
> +       lsr     \phys, \phys, #BLOCK_SHIFT
> +       lsr     \start, \start, #BLOCK_SHIFT
> +       and     \start, \start, #PTRS_PER_PTE - 1       // table index
> +       orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
> +       lsr     \end, \end, #BLOCK_SHIFT
> +       and     \end, \end, #PTRS_PER_PTE - 1           // table end index
> +9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
> +       add     \start, \start, #1                      // next entry
> +       add     \phys, \phys, #BLOCK_SIZE               // next block
> +       cmp     \start, \end
> +       b.ls    9999b
> +       .endm
> +
> +/*
> + * Setup the initial page tables. We only setup the barest amount which is
> + * required to get the kernel running. The following sections are required:
> + *   - identity mapping to enable the MMU (low address, TTBR0)
> + *   - first few MB of the kernel linear mapping to jump to once the MMU has
> + *     been enabled, including the FDT blob (TTBR1)
> + *   - pgd entry for fixed mappings (TTBR1)
> + */
> +__create_page_tables:
> +       pgtbl   x25, x26, x28                   // idmap_pg_dir and swapper_pg_dir addresses
> +       mov     x27, lr
> +
> +       /*
> +        * Invalidate the idmap and swapper page tables to avoid potential
> +        * dirty cache lines being evicted.
> +        */
> +       mov     x0, x25
> +       add     x1, x26, #SWAPPER_DIR_SIZE
> +       bl      __inval_cache_range
> +
> +       /*
> +        * Clear the idmap and swapper page tables.
> +        */
> +       mov     x0, x25
> +       add     x6, x26, #SWAPPER_DIR_SIZE
> +1:     stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       cmp     x0, x6
> +       b.lo    1b
> +
> +       ldr     x7, =MM_MMUFLAGS
> +
> +       /*
> +        * Create the identity mapping.
> +        */
> +       mov     x0, x25                         // idmap_pg_dir
> +       ldr     x3, =KERNEL_START
> +       add     x3, x3, x28                     // __pa(KERNEL_START)
> +       create_pgd_entry x0, x3, x5, x6
> +       ldr     x6, =KERNEL_END
> +       mov     x5, x3                          // __pa(KERNEL_START)
> +       add     x6, x6, x28                     // __pa(KERNEL_END)
> +       create_block_map x0, x7, x3, x5, x6
> +
> +       /*
> +        * Map the kernel image (starting with PHYS_OFFSET).
> +        */
> +       mov     x0, x26                         // swapper_pg_dir
> +       mov     x5, #PAGE_OFFSET
> +       create_pgd_entry x0, x5, x3, x6
> +       ldr     x6, =KERNEL_END
> +       mov     x3, x24                         // phys offset
> +       create_block_map x0, x7, x3, x5, x6
> +
> +       /*
> +        * Map the FDT blob (maximum 2MB; must be within 512MB of
> +        * PHYS_OFFSET).
> +        */
> +       mov     x3, x21                         // FDT phys address
> +       and     x3, x3, #~((1 << 21) - 1)       // 2MB aligned
> +       mov     x6, #PAGE_OFFSET
> +       sub     x5, x3, x24                     // subtract PHYS_OFFSET
> +       tst     x5, #~((1 << 29) - 1)           // within 512MB?
> +       csel    x21, xzr, x21, ne               // zero the FDT pointer
> +       b.ne    1f
> +       add     x5, x5, x6                      // __va(FDT blob)
> +       add     x6, x5, #1 << 21                // 2MB for the FDT blob
> +       sub     x6, x6, #1                      // inclusive range
> +       create_block_map x0, x7, x3, x5, x6
> +1:
> +       /*
> +        * Since the page tables have been populated with non-cacheable
> +        * accesses (MMU disabled), invalidate the idmap and swapper page
> +        * tables again to remove any speculatively loaded cache lines.
> +        */
> +       mov     x0, x25
> +       add     x1, x26, #SWAPPER_DIR_SIZE
> +       bl      __inval_cache_range
> +
> +       mov     lr, x27
> +       ret
> +ENDPROC(__create_page_tables)
> +       .ltorg
> +
> +       .align  3
> +       .type   __switch_data, %object
> +__switch_data:
> +       .quad   __mmap_switched
> +       .quad   __bss_start                     // x6
> +       .quad   __bss_stop                      // x7
> +       .quad   processor_id                    // x4
> +       .quad   __fdt_pointer                   // x5
> +       .quad   memstart_addr                   // x6
> +       .quad   init_thread_union + THREAD_START_SP // sp
> +
> +/*
> + * The following fragment of code is executed with the MMU on in MMU mode, and
> + * uses absolute addresses; this is not position independent.
> + */
> +__mmap_switched:
> +       adr     x3, __switch_data + 8
> +
> +       ldp     x6, x7, [x3], #16
> +1:     cmp     x6, x7
> +       b.hs    2f
> +       str     xzr, [x6], #8                   // Clear BSS
> +       b       1b
> +2:
> +       ldp     x4, x5, [x3], #16
> +       ldr     x6, [x3], #8
> +       ldr     x16, [x3]
> +       mov     sp, x16
> +       str     x22, [x4]                       // Save processor ID
> +       str     x21, [x5]                       // Save FDT pointer
> +       str     x24, [x6]                       // Save PHYS_OFFSET
> +       mov     x29, #0
> +       b       start_kernel
> +ENDPROC(__mmap_switched)
> +
> +/*
> + * end 'true' head section, begin head section that can be read only
> + */
> +       .section ".latehead.text","ax"
>  /*
>   * If we're fortunate enough to boot at EL2, ensure that the world is
>   * sane before dropping to EL1.
> @@ -497,183 +721,6 @@ ENDPROC(__calc_phys_offset)
>         .quad   PAGE_OFFSET
> 
>  /*
> - * Macro to create a table entry to the next page.
> - *
> - *     tbl:    page table address
> - *     virt:   virtual address
> - *     shift:  #imm page table shift
> - *     ptrs:   #imm pointers per table page
> - *
> - * Preserves:  virt
> - * Corrupts:   tmp1, tmp2
> - * Returns:    tbl -> next level table page address
> - */
> -       .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
> -       lsr     \tmp1, \virt, #\shift
> -       and     \tmp1, \tmp1, #\ptrs - 1        // table index
> -       add     \tmp2, \tbl, #PAGE_SIZE
> -       orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
> -       str     \tmp2, [\tbl, \tmp1, lsl #3]
> -       add     \tbl, \tbl, #PAGE_SIZE          // next level table page
> -       .endm
> -
> -/*
> - * Macro to populate the PGD (and possibily PUD) for the corresponding
> - * block entry in the next level (tbl) for the given virtual address.
> - *
> - * Preserves:  tbl, next, virt
> - * Corrupts:   tmp1, tmp2
> - */
> -       .macro  create_pgd_entry, tbl, virt, tmp1, tmp2
> -       create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
> -#if SWAPPER_PGTABLE_LEVELS == 3
> -       create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
> -#endif
> -       .endm
> -
> -/*
> - * Macro to populate block entries in the page table for the start..end
> - * virtual range (inclusive).
> - *
> - * Preserves:  tbl, flags
> - * Corrupts:   phys, start, end, pstate
> - */
> -       .macro  create_block_map, tbl, flags, phys, start, end
> -       lsr     \phys, \phys, #BLOCK_SHIFT
> -       lsr     \start, \start, #BLOCK_SHIFT
> -       and     \start, \start, #PTRS_PER_PTE - 1       // table index
> -       orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
> -       lsr     \end, \end, #BLOCK_SHIFT
> -       and     \end, \end, #PTRS_PER_PTE - 1           // table end index
> -9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
> -       add     \start, \start, #1                      // next entry
> -       add     \phys, \phys, #BLOCK_SIZE               // next block
> -       cmp     \start, \end
> -       b.ls    9999b
> -       .endm
> -
> -/*
> - * Setup the initial page tables. We only setup the barest amount which is
> - * required to get the kernel running. The following sections are required:
> - *   - identity mapping to enable the MMU (low address, TTBR0)
> - *   - first few MB of the kernel linear mapping to jump to once the MMU has
> - *     been enabled, including the FDT blob (TTBR1)
> - *   - pgd entry for fixed mappings (TTBR1)
> - */
> -__create_page_tables:
> -       pgtbl   x25, x26, x28                   // idmap_pg_dir and swapper_pg_dir addresses
> -       mov     x27, lr
> -
> -       /*
> -        * Invalidate the idmap and swapper page tables to avoid potential
> -        * dirty cache lines being evicted.
> -        */
> -       mov     x0, x25
> -       add     x1, x26, #SWAPPER_DIR_SIZE
> -       bl      __inval_cache_range
> -
> -       /*
> -        * Clear the idmap and swapper page tables.
> -        */
> -       mov     x0, x25
> -       add     x6, x26, #SWAPPER_DIR_SIZE
> -1:     stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       cmp     x0, x6
> -       b.lo    1b
> -
> -       ldr     x7, =MM_MMUFLAGS
> -
> -       /*
> -        * Create the identity mapping.
> -        */
> -       mov     x0, x25                         // idmap_pg_dir
> -       ldr     x3, =KERNEL_START
> -       add     x3, x3, x28                     // __pa(KERNEL_START)
> -       create_pgd_entry x0, x3, x5, x6
> -       ldr     x6, =KERNEL_END
> -       mov     x5, x3                          // __pa(KERNEL_START)
> -       add     x6, x6, x28                     // __pa(KERNEL_END)
> -       create_block_map x0, x7, x3, x5, x6
> -
> -       /*
> -        * Map the kernel image (starting with PHYS_OFFSET).
> -        */
> -       mov     x0, x26                         // swapper_pg_dir
> -       mov     x5, #PAGE_OFFSET
> -       create_pgd_entry x0, x5, x3, x6
> -       ldr     x6, =KERNEL_END
> -       mov     x3, x24                         // phys offset
> -       create_block_map x0, x7, x3, x5, x6
> -
> -       /*
> -        * Map the FDT blob (maximum 2MB; must be within 512MB of
> -        * PHYS_OFFSET).
> -        */
> -       mov     x3, x21                         // FDT phys address
> -       and     x3, x3, #~((1 << 21) - 1)       // 2MB aligned
> -       mov     x6, #PAGE_OFFSET
> -       sub     x5, x3, x24                     // subtract PHYS_OFFSET
> -       tst     x5, #~((1 << 29) - 1)           // within 512MB?
> -       csel    x21, xzr, x21, ne               // zero the FDT pointer
> -       b.ne    1f
> -       add     x5, x5, x6                      // __va(FDT blob)
> -       add     x6, x5, #1 << 21                // 2MB for the FDT blob
> -       sub     x6, x6, #1                      // inclusive range
> -       create_block_map x0, x7, x3, x5, x6
> -1:
> -       /*
> -        * Since the page tables have been populated with non-cacheable
> -        * accesses (MMU disabled), invalidate the idmap and swapper page
> -        * tables again to remove any speculatively loaded cache lines.
> -        */
> -       mov     x0, x25
> -       add     x1, x26, #SWAPPER_DIR_SIZE
> -       bl      __inval_cache_range
> -
> -       mov     lr, x27
> -       ret
> -ENDPROC(__create_page_tables)
> -       .ltorg
> -
> -       .align  3
> -       .type   __switch_data, %object
> -__switch_data:
> -       .quad   __mmap_switched
> -       .quad   __bss_start                     // x6
> -       .quad   __bss_stop                      // x7
> -       .quad   processor_id                    // x4
> -       .quad   __fdt_pointer                   // x5
> -       .quad   memstart_addr                   // x6
> -       .quad   init_thread_union + THREAD_START_SP // sp
> -
> -/*
> - * The following fragment of code is executed with the MMU on in MMU mode, and
> - * uses absolute addresses; this is not position independent.
> - */
> -__mmap_switched:
> -       adr     x3, __switch_data + 8
> -
> -       ldp     x6, x7, [x3], #16
> -1:     cmp     x6, x7
> -       b.hs    2f
> -       str     xzr, [x6], #8                   // Clear BSS
> -       b       1b
> -2:
> -       ldp     x4, x5, [x3], #16
> -       ldr     x6, [x3], #8
> -       ldr     x16, [x3]
> -       mov     sp, x16
> -       str     x22, [x4]                       // Save processor ID
> -       str     x21, [x5]                       // Save FDT pointer
> -       str     x24, [x6]                       // Save PHYS_OFFSET
> -       mov     x29, #0
> -       b       start_kernel
> -ENDPROC(__mmap_switched)
> -
> -/*
>   * Exception handling. Something went wrong and we can't proceed. We ought to
>   * tell the user, but since we don't have any guarantee that we're even
>   * running on the right architecture, we do virtually nothing.
> @@ -721,21 +768,4 @@ __lookup_processor_type_data:
>         .quad   cpu_table
>         .size   __lookup_processor_type_data, . - __lookup_processor_type_data
> 
> -/*
> - * Determine validity of the x21 FDT pointer.
> - * The dtb must be 8-byte aligned and live in the first 512M of memory.
> - */
> -__vet_fdt:
> -       tst     x21, #0x7
> -       b.ne    1f
> -       cmp     x21, x24
> -       b.lt    1f
> -       mov     x0, #(1 << 29)
> -       add     x0, x0, x24
> -       cmp     x21, x0
> -       b.ge    1f
> -       ret
> -1:
> -       mov     x21, #0
> -       ret
> -ENDPROC(__vet_fdt)
> +
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 97f0c04..2b674c5 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -56,6 +56,7 @@ SECTIONS
>         }
>         .text : {                       /* Real text segment            */
>                 _stext = .;             /* Text and read-only data      */
> +                       *(.latehead.text)
>                         __exception_text_start = .;
>                         *(.exception.text)
>                         __exception_text_end = .;
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 



More information about the linux-arm-kernel mailing list