[PATCH v5 04/10] ARM: mm: Type-annotate all cache assembly routines
Ard Biesheuvel
ardb at kernel.org
Mon Apr 15 09:35:15 PDT 2024
Hi Linus,
On Mon, 15 Apr 2024 at 15:43, Linus Walleij <linus.walleij at linaro.org> wrote:
>
> Tag all references to assembly functions with SYM_TYPED_FUNC_START()
> and SYM_FUNC_END() so they also become CFI-safe.
>
> When we add SYM_TYPED_FUNC_START() to assembly calls, a function
> prototype signature will be emitted into the object file at
> (pc-4) at the call site, so that the KCFI runtime check can compare
> this to the expected call. Example:
>
> 8011ae38: a540670c .word 0xa540670c
>
> 8011ae3c <v7_flush_icache_all>:
> 8011ae3c: e3a00000 mov r0, #0
> 8011ae40: ee070f11 mcr 15, 0, r0, cr7, cr1, {0}
> 8011ae44: e12fff1e bx lr
>
> This means no "fallthrough" code can enter a SYM_TYPED_FUNC_START()
> call from above it: there will be a function prototype signature
> there, so those are consistently converted to a branch or ret lr
> depending on context.
>
> Tested-by: Kees Cook <keescook at chromium.org>
> Signed-off-by: Linus Walleij <linus.walleij at linaro.org>
> ---
> arch/arm/mm/cache-fa.S | 39 +++++++++++++++++++++-------------
> arch/arm/mm/cache-nop.S | 51 ++++++++++++++++++++++++++-------------------
> arch/arm/mm/cache-v4.S | 47 ++++++++++++++++++++++++-----------------
> arch/arm/mm/cache-v4wb.S | 39 ++++++++++++++++++++--------------
> arch/arm/mm/cache-v4wt.S | 47 ++++++++++++++++++++++++-----------------
> arch/arm/mm/cache-v6.S | 41 ++++++++++++++++++++----------------
> arch/arm/mm/cache-v7.S | 49 ++++++++++++++++++++++---------------------
> arch/arm/mm/cache-v7m.S | 45 ++++++++++++++++++++-------------------
> arch/arm/mm/proc-arm1020.S | 39 +++++++++++++++++++++-------------
> arch/arm/mm/proc-arm1020e.S | 40 ++++++++++++++++++++++-------------
> arch/arm/mm/proc-arm1022.S | 39 +++++++++++++++++++++-------------
> arch/arm/mm/proc-arm1026.S | 40 ++++++++++++++++++++++-------------
> arch/arm/mm/proc-arm920.S | 40 +++++++++++++++++++++--------------
> arch/arm/mm/proc-arm922.S | 40 +++++++++++++++++++++--------------
> arch/arm/mm/proc-arm925.S | 38 ++++++++++++++++++++-------------
> arch/arm/mm/proc-arm926.S | 38 ++++++++++++++++++++-------------
> arch/arm/mm/proc-arm940.S | 42 ++++++++++++++++++++++---------------
> arch/arm/mm/proc-arm946.S | 38 ++++++++++++++++++++-------------
> arch/arm/mm/proc-feroceon.S | 48 +++++++++++++++++++++++++-----------------
> arch/arm/mm/proc-mohawk.S | 38 ++++++++++++++++++++-------------
> arch/arm/mm/proc-xsc3.S | 39 +++++++++++++++++++++-------------
> arch/arm/mm/proc-xscale.S | 40 +++++++++++++++++++++--------------
> 22 files changed, 544 insertions(+), 373 deletions(-)
>
> diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
> index 71c64e92dead..c3642d5daf38 100644
> --- a/arch/arm/mm/cache-fa.S
> +++ b/arch/arm/mm/cache-fa.S
> @@ -12,6 +12,7 @@
> */
> #include <linux/linkage.h>
> #include <linux/init.h>
> +#include <linux/cfi_types.h>
> #include <asm/assembler.h>
> #include <asm/page.h>
>
> @@ -39,11 +40,11 @@
> *
> * Unconditionally clean and invalidate the entire icache.
> */
> -ENTRY(fa_flush_icache_all)
> +SYM_TYPED_FUNC_START(fa_flush_icache_all)
> mov r0, #0
> mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
> ret lr
> -ENDPROC(fa_flush_icache_all)
> +SYM_FUNC_END(fa_flush_icache_all)
>
> /*
> * flush_user_cache_all()
> @@ -51,14 +52,16 @@ ENDPROC(fa_flush_icache_all)
> * Clean and invalidate all cache entries in a particular address
> * space.
> */
> -ENTRY(fa_flush_user_cache_all)
> - /* FALLTHROUGH */
> +SYM_TYPED_FUNC_START(fa_flush_user_cache_all)
> + b fa_flush_kern_cache_all
> +SYM_FUNC_END(fa_flush_user_cache_all)
> +
Given that the prototypes are identical (and therefore compatible as
far as kCFI is concerned), we might just define an alias
SYM_FUNC_ALIAS(fa_flush_user_cache_all, fa_flush_kern_cache_all)
rather than generate different code.
> /*
> * flush_kern_cache_all()
> *
> * Clean and invalidate the entire cache.
> */
> -ENTRY(fa_flush_kern_cache_all)
> +SYM_TYPED_FUNC_START(fa_flush_kern_cache_all)
> mov ip, #0
> mov r2, #VM_EXEC
> __flush_whole_cache:
> @@ -69,6 +72,7 @@ __flush_whole_cache:
> mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
> mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
> ret lr
> +SYM_FUNC_END(fa_flush_kern_cache_all)
>
> /*
> * flush_user_cache_range(start, end, flags)
> @@ -80,7 +84,7 @@ __flush_whole_cache:
> * - end - end address (exclusive, page aligned)
> * - flags - vma_area_struct flags describing address space
> */
> -ENTRY(fa_flush_user_cache_range)
> +SYM_TYPED_FUNC_START(fa_flush_user_cache_range)
> mov ip, #0
> sub r3, r1, r0 @ calculate total size
> cmp r3, #CACHE_DLIMIT @ total size >= limit?
> @@ -97,6 +101,7 @@ ENTRY(fa_flush_user_cache_range)
> mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
> mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
> ret lr
> +SYM_FUNC_END(fa_flush_user_cache_range)
>
> /*
> * coherent_kern_range(start, end)
> @@ -108,8 +113,9 @@ ENTRY(fa_flush_user_cache_range)
> * - start - virtual start address
> * - end - virtual end address
> */
> -ENTRY(fa_coherent_kern_range)
> - /* fall through */
> +SYM_TYPED_FUNC_START(fa_coherent_kern_range)
> + b fa_coherent_user_range
> +SYM_FUNC_END(fa_coherent_kern_range)
>
Same here.
> /*
> * coherent_user_range(start, end)
> @@ -121,7 +127,7 @@ ENTRY(fa_coherent_kern_range)
> * - start - virtual start address
> * - end - virtual end address
> */
> -ENTRY(fa_coherent_user_range)
> +SYM_TYPED_FUNC_START(fa_coherent_user_range)
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
> mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
> @@ -133,6 +139,7 @@ ENTRY(fa_coherent_user_range)
> mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
> mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
> ret lr
> +SYM_FUNC_END(fa_coherent_user_range)
>
> /*
> * flush_kern_dcache_area(void *addr, size_t size)
> @@ -143,7 +150,7 @@ ENTRY(fa_coherent_user_range)
> * - addr - kernel address
> * - size - size of region
> */
> -ENTRY(fa_flush_kern_dcache_area)
> +SYM_TYPED_FUNC_START(fa_flush_kern_dcache_area)
> add r1, r0, r1
> 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
> add r0, r0, #CACHE_DLINESIZE
> @@ -153,6 +160,7 @@ ENTRY(fa_flush_kern_dcache_area)
> mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
> mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
> ret lr
> +SYM_FUNC_END(fa_flush_kern_dcache_area)
>
> /*
> * dma_inv_range(start, end)
> @@ -203,7 +211,7 @@ fa_dma_clean_range:
> * - start - virtual start address of region
> * - end - virtual end address of region
> */
> -ENTRY(fa_dma_flush_range)
> +SYM_TYPED_FUNC_START(fa_dma_flush_range)
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> @@ -212,6 +220,7 @@ ENTRY(fa_dma_flush_range)
> mov r0, #0
> mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
> ret lr
> +SYM_FUNC_END(fa_dma_flush_range)
>
> /*
> * dma_map_area(start, size, dir)
> @@ -219,13 +228,13 @@ ENTRY(fa_dma_flush_range)
> * - size - size of region
> * - dir - DMA direction
> */
> -ENTRY(fa_dma_map_area)
> +SYM_TYPED_FUNC_START(fa_dma_map_area)
> add r1, r1, r0
> cmp r2, #DMA_TO_DEVICE
> beq fa_dma_clean_range
> bcs fa_dma_inv_range
> b fa_dma_flush_range
> -ENDPROC(fa_dma_map_area)
> +SYM_FUNC_END(fa_dma_map_area)
>
> /*
> * dma_unmap_area(start, size, dir)
> @@ -233,9 +242,9 @@ ENDPROC(fa_dma_map_area)
> * - size - size of region
> * - dir - DMA direction
> */
> -ENTRY(fa_dma_unmap_area)
> +SYM_TYPED_FUNC_START(fa_dma_unmap_area)
> ret lr
> -ENDPROC(fa_dma_unmap_area)
> +SYM_FUNC_END(fa_dma_unmap_area)
>
> .globl fa_flush_kern_cache_louis
> .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all
> diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
> index 72d939ef8798..56e94091a55f 100644
> --- a/arch/arm/mm/cache-nop.S
> +++ b/arch/arm/mm/cache-nop.S
> @@ -1,47 +1,56 @@
> /* SPDX-License-Identifier: GPL-2.0-only */
> #include <linux/linkage.h>
> #include <linux/init.h>
> +#include <linux/cfi_types.h>
> #include <asm/assembler.h>
>
> #include "proc-macros.S"
>
> -ENTRY(nop_flush_icache_all)
> +SYM_TYPED_FUNC_START(nop_flush_icache_all)
> ret lr
> -ENDPROC(nop_flush_icache_all)
> +SYM_FUNC_END(nop_flush_icache_all)
>
> - .globl nop_flush_kern_cache_all
> - .equ nop_flush_kern_cache_all, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_flush_kern_cache_all)
> + ret lr
> +SYM_FUNC_END(nop_flush_kern_cache_all)
>
> .globl nop_flush_kern_cache_louis
> .equ nop_flush_kern_cache_louis, nop_flush_icache_all
>
> - .globl nop_flush_user_cache_all
> - .equ nop_flush_user_cache_all, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_flush_user_cache_all)
> + ret lr
> +SYM_FUNC_END(nop_flush_user_cache_all)
>
> - .globl nop_flush_user_cache_range
> - .equ nop_flush_user_cache_range, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_flush_user_cache_range)
> + ret lr
> +SYM_FUNC_END(nop_flush_user_cache_range)
>
> - .globl nop_coherent_kern_range
> - .equ nop_coherent_kern_range, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_coherent_kern_range)
> + ret lr
> +SYM_FUNC_END(nop_coherent_kern_range)
>
> -ENTRY(nop_coherent_user_range)
> +SYM_TYPED_FUNC_START(nop_coherent_user_range)
> mov r0, 0
> ret lr
> -ENDPROC(nop_coherent_user_range)
> -
> - .globl nop_flush_kern_dcache_area
> - .equ nop_flush_kern_dcache_area, nop_flush_icache_all
> +SYM_FUNC_END(nop_coherent_user_range)
>
> - .globl nop_dma_flush_range
> - .equ nop_dma_flush_range, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_flush_kern_dcache_area)
> + ret lr
> +SYM_FUNC_END(nop_flush_kern_dcache_area)
>
> - .globl nop_dma_map_area
> - .equ nop_dma_map_area, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_dma_flush_range)
> + ret lr
> +SYM_FUNC_END(nop_dma_flush_range)
>
> - .globl nop_dma_unmap_area
> - .equ nop_dma_unmap_area, nop_flush_icache_all
> +SYM_TYPED_FUNC_START(nop_dma_map_area)
> + ret lr
> +SYM_FUNC_END(nop_dma_map_area)
>
> __INITDATA
>
> @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
> define_cache_functions nop
> +
> +SYM_TYPED_FUNC_START(nop_dma_unmap_area)
> + ret lr
> +SYM_FUNC_END(nop_dma_unmap_area)
> diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
> index 7787057e4990..22d9c9d9e0d7 100644
> --- a/arch/arm/mm/cache-v4.S
> +++ b/arch/arm/mm/cache-v4.S
> @@ -6,6 +6,7 @@
> */
> #include <linux/linkage.h>
> #include <linux/init.h>
> +#include <linux/cfi_types.h>
> #include <asm/assembler.h>
> #include <asm/page.h>
> #include "proc-macros.S"
> @@ -15,9 +16,9 @@
> *
> * Unconditionally clean and invalidate the entire icache.
> */
> -ENTRY(v4_flush_icache_all)
> +SYM_TYPED_FUNC_START(v4_flush_icache_all)
> ret lr
> -ENDPROC(v4_flush_icache_all)
> +SYM_FUNC_END(v4_flush_icache_all)
>
> /*
> * flush_user_cache_all()
> @@ -27,21 +28,24 @@ ENDPROC(v4_flush_icache_all)
> *
> * - mm - mm_struct describing address space
> */
> -ENTRY(v4_flush_user_cache_all)
> - /* FALLTHROUGH */
> +SYM_TYPED_FUNC_START(v4_flush_user_cache_all)
> + b v4_flush_kern_cache_all
> +SYM_FUNC_END(v4_flush_user_cache_all)
> +
> /*
> * flush_kern_cache_all()
> *
> * Clean and invalidate the entire cache.
> */
> -ENTRY(v4_flush_kern_cache_all)
> +SYM_TYPED_FUNC_START(v4_flush_kern_cache_all)
> #ifdef CONFIG_CPU_CP15
> mov r0, #0
> mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
> ret lr
> #else
> - /* FALLTHROUGH */
> + ret lr
> #endif
> +SYM_FUNC_END(v4_flush_kern_cache_all)
>
Yeah, this is needlessly obfuscated, falling through twice to a 'ret lr' :-)
You could still use SYM_FUNC_ALIAS() here if the prototypes are identical.
More information about the linux-arm-kernel
mailing list