[RFC] [PATCH] arm64: survive after access to unimplemented register

Yury Norov ynorov at caviumnetworks.com
Thu Mar 31 05:28:59 PDT 2016


Hi Mark,

On Thu, Mar 31, 2016 at 11:05:48AM +0100, Mark Rutland wrote:
> On Thu, Mar 31, 2016 at 05:27:03AM +0300, Yury Norov wrote:
> > Not all vendors implement all the system registers ARM specifies.
> 
> The ID registers in question are precisely documented in the ARM ARM
> (see table C5-6 in ARM DDI 0487A.i). Specifically, the ID space
> ID_AA64MMFR2_EL1 now falls in to is listed as RAZ.
> 
> Any deviation from this is an erratum, and needs to be handled as such
> (e.g. listing in silicon-errata.txt).
> 
> Does the issue affect ThunderX natively?

Yes, Thunder is involved, but I cannot tell more due to NDA.
And this error is not in silicon-errata.txt.
I'll ask permission to share more details.

> 
> Or has this only been seen with QEMU (in TCG mode), where the bug has
> already been fixed?
> 
> > So access them causes undefined instruction abort and then kernel
> > panic. There are 3 ways to handle it we can figure out:
> >  - use conditional compilation and erratas;
> >  - use kernel patching;
> >  - inline fixups resolving the abort.
> > 
> > Last option is more robust as it does not require additional efforts
> > to support targers. It is looking reasonable because in many cases
> > optional registers should be RAZ if not implemented. Special cases may
> > be handled by underlying __read_cpuid() when needed.
> 
> I don't think we should do this if the only affected implementations are
> software emulators which can be patched (and have already been, in the
> case of QEMU).
> 
> In future it's very likely that early assembly code (potentially in
> hypervisor context) will need to access ID registers which are currently
> reserved/RAZ, and it will be rather painful to fix up accesses to this.
> 

So we will not fix. This one fixes el1 only, and don't pretend for more. 

> Additionally, this workaround will silently mask other bugs in this area
> (e.g. if registers like ID_AA64MMFR0_EL1 were to trap for some reason on
> an implementation), which doesn't seem good.
> 

We can mask it less silently, for example, print message to dmesg.

Initially I was thinking about erratas as well, but Arnd suggested
this approach, and now think it's better. From consumer point of view,
it's much better to have a warning line in dmesg, instead of bricked
device, after another kernel or driver update.

> Thanks,
> Mark.
> 
> > Tested on QEMU emulator version 2.3.0 (Debian 1:2.3+dfsg-5ubuntu9.2)
> > that does not implement SYS_ID_AA64MMFR2_EL1 register. (Fixed in new
> > releases),
> > 
> > Discussion: https://lkml.org/lkml/2016/3/29/931
> > 
> > Signed-off-by: Yury Norov <ynorov at caviumnetworks.com>
> > ---
> >  arch/arm64/include/asm/cputype.h  | 17 +++++++++++++++--
> >  arch/arm64/include/asm/exttable.h | 21 +++++++++++++++++++++
> >  arch/arm64/include/asm/uaccess.h  | 15 ---------------
> >  arch/arm64/kernel/entry.S         |  3 ++-
> >  arch/arm64/kernel/traps.c         |  6 ++++++
> >  arch/arm64/mm/extable.c           |  2 +-
> >  6 files changed, 45 insertions(+), 19 deletions(-)
> >  create mode 100644 arch/arm64/include/asm/exttable.h
> > 
> > diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> > index 87e1985..9660130 100644
> > --- a/arch/arm64/include/asm/cputype.h
> > +++ b/arch/arm64/include/asm/cputype.h
> > @@ -90,13 +90,26 @@
> >  #ifndef __ASSEMBLY__
> >  
> >  #include <asm/sysreg.h>
> > +#include <asm/exttable.h>
> >  
> > -#define read_cpuid(reg) ({						\
> > +#define __read_cpuid(reg, err) ({					\
> >  	u64 __val;							\
> > -	asm("mrs_s	%0, " __stringify(SYS_ ## reg) : "=r" (__val));	\
> > +	asm (								\
> > +	"1:mrs_s	%0, " reg "\n"					\
> > +	"2:\n"								\
> > +	"	.section .fixup, \"ax\"\n"				\
> > +	"	.align	2\n"						\
> > +	"3:	mov	%w0, %1\n"					\
> > +	"	b	2b\n"						\
> > +	"	.previous\n"						\
> > +	_ASM_EXTABLE(1b, 3b)						\
> > +	: "=r" (__val)							\
> > +	: "i" (err));							\
> >  	__val;								\
> >  })
> >  
> > +#define read_cpuid(reg) __read_cpuid(__stringify(SYS_ ## reg), 0)
> > +
> >  /*
> >   * The CPU ID never changes at run time, so we might as well tell the
> >   * compiler that it's constant.  Use this function to read the CPU ID
> > diff --git a/arch/arm64/include/asm/exttable.h b/arch/arm64/include/asm/exttable.h
> > new file mode 100644
> > index 0000000..c0a66c8
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/exttable.h
> > @@ -0,0 +1,21 @@
> > +#ifndef __ASM_EXTTABLE_H
> > +#define __ASM_EXTTABLE_H
> > +
> > +#include <asm/ptrace.h>
> > +
> > +#define ARCH_HAS_RELATIVE_EXTABLE
> > +
> > +#define _ASM_EXTABLE(from, to)						\
> > +	"	.pushsection	__ex_table, \"a\"\n"			\
> > +	"	.align		3\n"					\
> > +	"	.long		(" #from " - .), (" #to " - .)\n"	\
> > +	"	.popsection\n"
> > +
> > +struct exception_table_entry
> > +{
> > +	int insn, fixup;
> > +};
> > +
> > +extern int fixup_exception(struct pt_regs *regs);
> > +
> > +#endif /* __ASM_EXTTABLE_H */
> > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
> > index 0685d74..19cfdc5 100644
> > --- a/arch/arm64/include/asm/uaccess.h
> > +++ b/arch/arm64/include/asm/uaccess.h
> > @@ -48,15 +48,6 @@
> >   * on our cache or tlb entries.
> >   */
> >  
> > -struct exception_table_entry
> > -{
> > -	int insn, fixup;
> > -};
> > -
> > -#define ARCH_HAS_RELATIVE_EXTABLE
> > -
> > -extern int fixup_exception(struct pt_regs *regs);
> > -
> >  #define KERNEL_DS	(-1UL)
> >  #define get_ds()	(KERNEL_DS)
> >  
> > @@ -117,12 +108,6 @@ static inline void set_fs(mm_segment_t fs)
> >  #define access_ok(type, addr, size)	__range_ok(addr, size)
> >  #define user_addr_max			get_fs
> >  
> > -#define _ASM_EXTABLE(from, to)						\
> > -	"	.pushsection	__ex_table, \"a\"\n"			\
> > -	"	.align		3\n"					\
> > -	"	.long		(" #from " - .), (" #to " - .)\n"	\
> > -	"	.popsection\n"
> > -
> >  /*
> >   * The "__xxx" versions of the user access functions do not verify the address
> >   * space - it must have been done previously with a separate "access_ok()"
> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > index 1f7a145..6b88096 100644
> > --- a/arch/arm64/kernel/entry.S
> > +++ b/arch/arm64/kernel/entry.S
> > @@ -377,7 +377,8 @@ el1_undef:
> >  	 */
> >  	enable_dbg
> >  	mov	x0, sp
> > -	b	do_undefinstr
> > +	bl	do_undefinstr
> > +	kernel_exit 1
> >  el1_dbg:
> >  	/*
> >  	 * Debug exception handling
> > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > index cacd30a..515444a 100644
> > --- a/arch/arm64/kernel/traps.c
> > +++ b/arch/arm64/kernel/traps.c
> > @@ -386,6 +386,12 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
> >  	if (call_undef_hook(regs) == 0)
> >  		return;
> >  
> > +	/*
> > +	 * Are we prepared to handle this kernel fault?
> > +	 */
> > +	if (fixup_exception(regs))
> > +		return;
> > +
> >  	if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) {
> >  		pr_info("%s[%d]: undefined instruction: pc=%p\n",
> >  			current->comm, task_pid_nr(current), pc);
> > diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
> > index 81acd47..c140688 100644
> > --- a/arch/arm64/mm/extable.c
> > +++ b/arch/arm64/mm/extable.c
> > @@ -3,7 +3,7 @@
> >   */
> >  
> >  #include <linux/module.h>
> > -#include <linux/uaccess.h>
> > +#include <asm/exttable.h>
> >  
> >  int fixup_exception(struct pt_regs *regs)
> >  {
> > -- 
> > 2.5.0
> > 
> > 
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel at lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > 



More information about the linux-arm-kernel mailing list