[PATCH v2] arm64: Support Clang UBSAN trap codes for better reporting

Fangrui Song maskray at google.com
Fri Feb 3 11:14:49 PST 2023


On Fri, Feb 3, 2023 at 9:39 AM Kees Cook <keescook at chromium.org> wrote:
>
> When building with CONFIG_UBSAN_TRAP=y on arm64, Clang encodes the UBSAN
> check (handler) type in the esr. Extract this and actually report these
> traps as coming from the specific UBSAN check that tripped.
>
> Before:
>
>   Internal error: BRK handler: 00000000f20003e8 [#1] PREEMPT SMP
>
> After:
>
>   Internal error: UBSAN: shift out of bounds: 00000000f2005514 [#1] PREEMPT SMP
>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: John Stultz <jstultz at google.com>
> Cc: Yongqin Liu <yongqin.liu at linaro.org>
> Cc: Sami Tolvanen <samitolvanen at google.com>
> Cc: Ard Biesheuvel <ardb at kernel.org>
> Cc: Yury Norov <yury.norov at gmail.com>
> Cc: Andrey Konovalov <andreyknvl at gmail.com>
> Cc: Marco Elver <elver at google.com>
> Cc: linux-arm-kernel at lists.infradead.org
> Cc: llvm at lists.linux.dev
> Signed-off-by: Kees Cook <keescook at chromium.org>
> ---
> v2: improve commit log, limit report strings to actual configs, document mappings
> v1: https://lore.kernel.org/lkml/20230202223653.never.473-kees@kernel.org/

Thanks. I'll add the Linux kernel use to
https://maskray.me/blog/2023-01-29-all-about-undefined-behavior-sanitizer
when this lands:)

> ---
>  arch/arm64/include/asm/brk-imm.h |  2 +
>  arch/arm64/kernel/traps.c        | 21 ++++++++++
>  include/linux/ubsan.h            |  9 +++++
>  lib/Makefile                     |  2 -
>  lib/ubsan.c                      | 67 ++++++++++++++++++++++++++++++++
>  lib/ubsan.h                      | 32 +++++++++++++++
>  6 files changed, 131 insertions(+), 2 deletions(-)
>  create mode 100644 include/linux/ubsan.h
>
> diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
> index 6e000113e508..3f0f0d03268b 100644
> --- a/arch/arm64/include/asm/brk-imm.h
> +++ b/arch/arm64/include/asm/brk-imm.h
> @@ -28,6 +28,8 @@
>  #define BUG_BRK_IMM                    0x800
>  #define KASAN_BRK_IMM                  0x900
>  #define KASAN_BRK_MASK                 0x0ff
> +#define UBSAN_BRK_IMM                  0x5500
> +#define UBSAN_BRK_MASK                 0x00ff

Q: How is 0x5500 derived?

>  #define CFI_BRK_IMM_TARGET             GENMASK(4, 0)
>  #define CFI_BRK_IMM_TYPE               GENMASK(9, 5)
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 4c0caa589e12..87f42eb1c950 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -26,6 +26,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/mm_types.h>
>  #include <linux/kasan.h>
> +#include <linux/ubsan.h>
>  #include <linux/cfi.h>
>
>  #include <asm/atomic.h>
> @@ -1074,6 +1075,19 @@ static struct break_hook kasan_break_hook = {
>  };
>  #endif
>
> +#ifdef CONFIG_UBSAN_TRAP
> +static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
> +{
> +       die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
> +       return DBG_HOOK_HANDLED;
> +}
> +
> +static struct break_hook ubsan_break_hook = {
> +       .fn     = ubsan_handler,
> +       .imm    = UBSAN_BRK_IMM,
> +       .mask   = UBSAN_BRK_MASK,
> +};
> +#endif
>
>  #define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
>
> @@ -1091,6 +1105,10 @@ int __init early_brk64(unsigned long addr, unsigned long esr,
>  #ifdef CONFIG_KASAN_SW_TAGS
>         if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
>                 return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
> +#endif
> +#ifdef CONFIG_UBSAN_TRAP
> +       if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
> +               return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
>  #endif
>         return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
>  }
> @@ -1104,6 +1122,9 @@ void __init trap_init(void)
>         register_kernel_break_hook(&fault_break_hook);
>  #ifdef CONFIG_KASAN_SW_TAGS
>         register_kernel_break_hook(&kasan_break_hook);
> +#endif
> +#ifdef CONFIG_UBSAN_TRAP
> +       register_kernel_break_hook(&ubsan_break_hook);
>  #endif

IIUC, the break hook is a list so CONFIG_KASAN_SW_TAGS
(kernel-hwaddress) can be used with CONFIG_UBSAN_TRAP.

>         debug_traps_init();
>  }
> diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
> new file mode 100644
> index 000000000000..bff7445498de
> --- /dev/null
> +++ b/include/linux/ubsan.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_UBSAN_H
> +#define _LINUX_UBSAN_H
> +
> +#ifdef CONFIG_UBSAN_TRAP
> +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
> +#endif
> +
> +#endif
> diff --git a/lib/Makefile b/lib/Makefile
> index 4d9461bfea42..81b988bf9448 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -340,9 +340,7 @@ quiet_cmd_build_OID_registry = GEN     $@
>  clean-files    += oid_registry_data.c
>
>  obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
> -ifneq ($(CONFIG_UBSAN_TRAP),y)
>  obj-$(CONFIG_UBSAN) += ubsan.o
> -endif
>
>  UBSAN_SANITIZE_ubsan.o := n
>  KASAN_SANITIZE_ubsan.o := n
> diff --git a/lib/ubsan.c b/lib/ubsan.c
> index 60c7099857a0..f05ae85fc268 100644
> --- a/lib/ubsan.c
> +++ b/lib/ubsan.c
> @@ -18,6 +18,71 @@
>
>  #include "ubsan.h"
>
> +#ifdef CONFIG_UBSAN_TRAP
> +/*
> + * Only include matches for UBSAN checks that are actually compiled in.
> + * The mappings of struct SanitizerKind (the -fsanitize=xxx args) to
> + * enum SanitizerHandler (the traps) in Clang is in clang/lib/CodeGen/.
> + */
> +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
> +{
> +       switch (check_type) {
> +#ifdef CONFIG_UBSAN_BOUNDS
> +       /*
> +        * SanitizerKind::ArrayBounds and SanitizerKind::LocalBounds
> +        * emit SanitizerHandler::OutOfBounds.
> +        */
> +       case ubsan_out_of_bounds:
> +               return "UBSAN: array index out of bounds";
> +#endif
> +#ifdef CONFIG_UBSAN_SHIFT
> +       /*
> +        * SanitizerKind::ShiftBase and SanitizerKind::ShiftExponent
> +        * emit SanitizerHandler::ShiftOutOfBounds.
> +        */
> +       case ubsan_shift_out_of_bounds:
> +               return "UBSAN: shift out of bounds";
> +#endif
> +#ifdef CONFIG_UBSAN_DIV_ZERO
> +       /*
> +        * SanitizerKind::IntegerDivideByZero emits
> +        * SanitizerHandler::DivremOverflow.
> +        */
> +       case ubsan_divrem_overflow:
> +               return "UBSAN: divide/remainder overflow";
> +#endif
> +#ifdef CONFIG_UBSAN_UNREACHABLE
> +       /*
> +        * SanitizerKind::Unreachable emits
> +        * SanitizerHandler::BuiltinUnreachable.
> +        */
> +       case ubsan_builtin_unreachable:
> +               return "UBSAN: unreachable code";
> +#endif
> +#if defined(CONFIG_UBSAN_BOOL) || defined(CONFIG_UBSAN_ENUM)
> +       /*
> +        * SanitizerKind::Bool and SanitizerKind::Enum emit
> +        * SanitizerHandler::LoadInvalidValue.
> +        */
> +       case ubsan_load_invalid_value:
> +               return "UBSAN: loading invalid value";
> +#endif
> +#ifdef CONFIG_UBSAN_ALIGNMENT
> +       /*
> +        * SanitizerKind::Alignment emits SanitizerHandler::TypeMismatch
> +        * or SanitizerHandler::AlignmentAssumption.
> +        */
> +       case ubsan_alignment_assumption:
> +               return "UBSAN: alignment assumption";
> +       case ubsan_type_mismatch:
> +               return "UBSAN: type mismatch";
> +#endif
> +       default:
> +               return "UBSAN: unrecognized failure code";
> +       }
> +}

I wonder whether keeping the dash-prefixed name is better since that
matches compiler-rt/lib/ubsan.
People can search for "add-overflow" and get cross references from
compiler-rt/lib/ubsan, instead of needing to knowing that "addition
overflow" is another name for "add-overflow".

> +
> +#else
>  static const char * const type_check_kinds[] = {
>         "load of",
>         "store to",
> @@ -384,3 +449,5 @@ void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
>         ubsan_epilogue();
>  }
>  EXPORT_SYMBOL(__ubsan_handle_alignment_assumption);
> +
> +#endif /* !CONFIG_UBSAN_TRAP */
> diff --git a/lib/ubsan.h b/lib/ubsan.h
> index 9a0b71c5ff9f..cc5cb94895a6 100644
> --- a/lib/ubsan.h
> +++ b/lib/ubsan.h
> @@ -2,6 +2,38 @@
>  #ifndef _LIB_UBSAN_H
>  #define _LIB_UBSAN_H
>
> +/*
> + * ABI defined by Clang's UBSAN enum SanitizerHandler:
> + * https://github.com/llvm/llvm-project/blob/release/16.x/clang/lib/CodeGen/CodeGenFunction.h#L113
> + */
> +enum ubsan_checks {
> +       ubsan_add_overflow,
> +       ubsan_builtin_unreachable,
> +       ubsan_cfi_check_fail,
> +       ubsan_divrem_overflow,
> +       ubsan_dynamic_type_cache_miss,
> +       ubsan_float_cast_overflow,
> +       ubsan_function_type_mismatch,
> +       ubsan_implicit_conversion,
> +       ubsan_invalid_builtin,
> +       ubsan_invalid_objc_cast,
> +       ubsan_load_invalid_value,
> +       ubsan_missing_return,
> +       ubsan_mul_overflow,
> +       ubsan_negate_overflow,
> +       ubsan_nullability_arg,
> +       ubsan_nullability_return,
> +       ubsan_nonnull_arg,
> +       ubsan_nonnull_return,
> +       ubsan_out_of_bounds,
> +       ubsan_pointer_overflow,
> +       ubsan_shift_out_of_bounds,
> +       ubsan_sub_overflow,
> +       ubsan_type_mismatch,
> +       ubsan_alignment_assumption,
> +       ubsan_vla_bound_not_positive,
> +};
> +
>  enum {
>         type_kind_int = 0,
>         type_kind_float = 1,
> --
> 2.34.1
>
>

Reviewed-by: Fangrui Song <maskray at google.com>



More information about the linux-arm-kernel mailing list