[RFC PATCH v1 07/28] riscv: kernel handling on trap entry/exit for user cfi

Deepak Gupta debug at rivosinc.com
Thu Jan 25 16:25:15 PST 2024


On Thu, Jan 25, 2024 at 02:47:49PM -0500, Stefan O'Rear wrote:
>On Thu, Jan 25, 2024, at 12:30 PM, Deepak Gupta wrote:
>> On Thu, Jan 25, 2024 at 02:29:01AM -0500, Stefan O'Rear wrote:
>>>On Thu, Jan 25, 2024, at 1:21 AM, debug at rivosinc.com wrote:
>>>> From: Deepak Gupta <debug at rivosinc.com>
>>>>
>>>> Carves out space in arch specific thread struct for cfi status and shadow stack
>>>> in usermode on riscv.
>>>>
>>>> This patch does following
>>>> - defines a new structure cfi_status with status bit for cfi feature
>>>> - defines shadow stack pointer, base and size in cfi_status structure
>>>> - defines offsets to new member fields in thread in asm-offsets.c
>>>> - Saves and restore shadow stack pointer on trap entry (U --> S) and exit
>>>>   (S --> U)
>>>>
>>>> Signed-off-by: Deepak Gupta <debug at rivosinc.com>
>>>> ---
>>>>  arch/riscv/include/asm/processor.h   |  1 +
>>>>  arch/riscv/include/asm/thread_info.h |  3 +++
>>>>  arch/riscv/include/asm/usercfi.h     | 24 ++++++++++++++++++++++++
>>>>  arch/riscv/kernel/asm-offsets.c      |  5 ++++-
>>>>  arch/riscv/kernel/entry.S            | 25 +++++++++++++++++++++++++
>>>>  5 files changed, 57 insertions(+), 1 deletion(-)
>>>>  create mode 100644 arch/riscv/include/asm/usercfi.h
>>>>
>>>> diff --git a/arch/riscv/include/asm/processor.h
>>>> b/arch/riscv/include/asm/processor.h
>>>> index ee2f51787ff8..d4dc298880fc 100644
>>>> --- a/arch/riscv/include/asm/processor.h
>>>> +++ b/arch/riscv/include/asm/processor.h
>>>> @@ -14,6 +14,7 @@
>>>>
>>>>  #include <asm/ptrace.h>
>>>>  #include <asm/hwcap.h>
>>>> +#include <asm/usercfi.h>
>>>>
>>>>  #ifdef CONFIG_64BIT
>>>>  #define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
>>>> diff --git a/arch/riscv/include/asm/thread_info.h
>>>> b/arch/riscv/include/asm/thread_info.h
>>>> index 320bc899a63b..6a2acecec546 100644
>>>> --- a/arch/riscv/include/asm/thread_info.h
>>>> +++ b/arch/riscv/include/asm/thread_info.h
>>>> @@ -58,6 +58,9 @@ struct thread_info {
>>>>  	int			cpu;
>>>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>>>>  	unsigned long envcfg;
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +	struct cfi_status       user_cfi_state;
>>>> +#endif
>>>>  #ifdef CONFIG_SHADOW_CALL_STACK
>>>>  	void			*scs_base;
>>>>  	void			*scs_sp;
>>>> diff --git a/arch/riscv/include/asm/usercfi.h
>>>> b/arch/riscv/include/asm/usercfi.h
>>>> new file mode 100644
>>>> index 000000000000..080d7077d12c
>>>> --- /dev/null
>>>> +++ b/arch/riscv/include/asm/usercfi.h
>>>> @@ -0,0 +1,24 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0
>>>> + * Copyright (C) 2023 Rivos, Inc.
>>>> + * Deepak Gupta <debug at rivosinc.com>
>>>> + */
>>>> +#ifndef _ASM_RISCV_USERCFI_H
>>>> +#define _ASM_RISCV_USERCFI_H
>>>> +
>>>> +#ifndef __ASSEMBLY__
>>>> +#include <linux/types.h>
>>>> +
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +struct cfi_status {
>>>> +	unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
>>>> +	unsigned long rsvd : ((sizeof(unsigned long)*8) - 1);
>>>> +	unsigned long user_shdw_stk; /* Current user shadow stack pointer */
>>>> +	unsigned long shdw_stk_base; /* Base address of shadow stack */
>>>> +	unsigned long shdw_stk_size; /* size of shadow stack */
>>>> +};
>>>> +
>>>> +#endif /* CONFIG_RISCV_USER_CFI */
>>>> +
>>>> +#endif /* __ASSEMBLY__ */
>>>> +
>>>> +#endif /* _ASM_RISCV_USERCFI_H */
>>>> diff --git a/arch/riscv/kernel/asm-offsets.c
>>>> b/arch/riscv/kernel/asm-offsets.c
>>>> index cdd8f095c30c..5e1f412e96ba 100644
>>>> --- a/arch/riscv/kernel/asm-offsets.c
>>>> +++ b/arch/riscv/kernel/asm-offsets.c
>>>> @@ -43,8 +43,11 @@ void asm_offsets(void)
>>>>  #ifdef CONFIG_SHADOW_CALL_STACK
>>>>  	OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
>>>>  #endif
>>>> -
>>>>  	OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +	OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state);
>>>> +	OFFSET(TASK_TI_USER_SSP, task_struct,
>>>> thread_info.user_cfi_state.user_shdw_stk);
>>>> +#endif
>>>>  	OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
>>>>  	OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
>>>>  	OFFSET(TASK_THREAD_F2,  task_struct, thread.fstate.f[2]);
>>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>>> index 63c3855ba80d..410659e2eadb 100644
>>>> --- a/arch/riscv/kernel/entry.S
>>>> +++ b/arch/riscv/kernel/entry.S
>>>> @@ -49,6 +49,21 @@ SYM_CODE_START(handle_exception)
>>>>  	REG_S x5,  PT_T0(sp)
>>>>  	save_from_x6_to_x31
>>>>
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +	/*
>>>> +	* we need to save cfi status only when previous mode was U
>>>> +	*/
>>>> +	csrr s2, CSR_STATUS
>>>> +	andi s2, s2, SR_SPP
>>>> +	bnez s2, skip_bcfi_save
>>>> +	/* load cfi status word */
>>>> +	lw s3, TASK_TI_CFI_STATUS(tp)
>>>> +	andi s3, s3, 1
>>>> +	beqz s3, skip_bcfi_save
>>>> +	csrr s3, CSR_SSP
>>>> +	REG_S s3, TASK_TI_USER_SSP(tp) /* save user ssp in thread_info */
>>>> +skip_bcfi_save:
>>>> +#endif
>>>>  	/*
>>>>  	 * Disable user-mode memory access as it should only be set in the
>>>>  	 * actual user copy routines.
>>>> @@ -141,6 +156,16 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>>>>  	 * structures again.
>>>>  	 */
>>>>  	csrw CSR_SCRATCH, tp
>>>> +
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +	lw s3, TASK_TI_CFI_STATUS(tp)
>>>> +	andi s3, s3, 1
>>>> +	beqz s3, skip_bcfi_resume
>>>> +	REG_L s3, TASK_TI_USER_SSP(tp) /* restore user ssp from thread struct */
>>>> +	csrw CSR_SSP, s3
>>>> +skip_bcfi_resume:
>>>> +#endif
>>>> +
>>>
>>>We shouldn't need any of this in the entry/exit code, at least as long as
>>>the kernel itself is not using Zicfiss.  ssp can keep its value in the
>>>kernel and swap it on task switches.  Our entry/exit code is rather short
>>>and I'd like to keep it that way.
>>
>> I kept it here because sooner or later we will need to establish kernel
>> shadow
>> stack. Kernel shadow stack on riscv (compared to other arches) kernel
>> actually will
>> be easier to support and adopt because there is already support for
>> shadow call stack
>> (SCS, [1]). Difference between existing shadow call stack (SCS) and
>> `zicfiss` based
>> kernel shadow stack would be
>>
>> 	- In prolog instead of using `sd`, we will be inserting `sspush` to
>> save ret addr
>> 	- In epilog instead of using `ld` and compare, we will be inserting
>> `sspopchk`
>>
>> So a lot underlying work and functional testing for shadow kernel stack
>> is already carried
>> out with SCS patches. It would be easier and faster to re-use SCS
>> patches to support
>> `zicfiss` based shadow stack.
>
>Do you think that realistically, after all the patches are merged, almost all
>kernel configurations that enable kernel Zicfiss will also enable userspace
>Zicfiss and vice versa?
>
>If not - if Zicfiss exclusively in user mode is likely to be a common
>configuration - then the kernel should handle that case in task switch.
>
>If kernel Zicfiss and user Zicfiss are overwhelmingly likely to be supported
>together, then I agree it makes sense to handle it in the same place in
>entry/exit, but I think what you have is more complicated than necessary.
>I'm picturing something like this:

I expect user mode would be the first target and even if kernel shadow stacks
are enabled, it may not be as pervasive in adoption as I expect for user mode.

I do expect it to be used in settings where both are enabled (if not overwhelmingly)

Since that has to be supported, it's better to have it organized where we are saving/
restoring in a way which serves both needs rather than have two ways to save/restore
depending on how user shadow stacks and kernel shadow stacks are configured.

>
>--- a/arch/riscv/kernel/entry.S
>+++ b/arch/riscv/kernel/entry.S
>@@ -32,6 +32,13 @@ SYM_CODE_START(handle_exception)
>        csrr tp, CSR_SCRATCH
>        REG_S sp, TASK_TI_KERNEL_SP(tp)
>
>+#ifdef CONFIG_SHADOW_CALL_STACK
>+       ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
>+                    "csrr sp, ssp\n\t"
>+                   "REG_S sp, TASK_TI_SCS_SP(tp)\n\t"
>+                    "REG_L sp, TASK_TI_KERNEL_SP(tp)")
>+#endif
>+
> #ifdef CONFIG_VMAP_STACK
>        addi sp, sp, -(PT_SIZE_ON_STACK)
>        srli sp, sp, THREAD_SHIFT
>@@ -80,8 +87,13 @@ SYM_CODE_START(handle_exception)
>        /* Load the global pointer */
>        load_global_pointer
>
>-       /* Load the kernel shadow call stack pointer if coming from userspace */
>-       scs_load_current_if_task_changed s5
>+       /* Load the kernel shadow call stack pointer (harmless if from kernel) */
>+#ifdef CONFIG_SHADOW_CALL_STACK
>+       ALTERNATIVE("scs_load_current\n\tnop\n\tnop",
>+                    "REG_L s0, TASK_TI_SCS_SP(tp)\n\t"
>+                    "csrrw s0, ssp, s0\n\t"
>+                    "REG_S s0, PT_SSP(sp)")
>+#endif
>
>        move a0, sp /* pt_regs */
>        la ra, ret_from_exception
>@@ -130,7 +142,12 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>        REG_S s0, TASK_TI_KERNEL_SP(tp)
>
>        /* Save the kernel shadow call stack pointer */
>-       scs_save_current
>+#ifdef CONFIG_SHADOW_CALL_STACK
>+       ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
>+                    "REG_L s0, PT_SSP(sp)\n\t"
>+                    "csrrw s0, ssp, s0\n\t"
>+                    "REG_S s0, TASK_TI_SCS_SP(tp)")
>+#endif

I've not used alternatives earlier. But yes along with kernel shadow stack
this is much more appealing. Let me munch on it a bit. 

>
>        /*
>         * Save TP into the scratch register , so we can find the kernel data
>
>
>I moved the shadow stack pointer into pt_regs because it's nearly a GPR and has a
>meaningfully different value on every trap; this allows us to talk about the ssp
>at the time of a trap in kernel mode.

I had been under the impression that changing `pt_regs` is something that we don't
do usually. If it doesn't require a high bar, I'll do that. Infact, one my earlier
implementation had ssp in pt_regs.

>
>Saving both the sp and ssp in Lrestore_kernel_tpsp avoids adding conditional logic
>to Lsave_context.  I believe the current code also has a bug: if the U-mode tp is,
>by chance or intentional exploit, equal to the thread_info address, kernel code
>will be executed with whatever value U-mode left in gp.
>
>I also notice that there is no check for overflow of the shadow stack.  This may be
>intentional, since as long as the shadow stack is at least half the size of the
>main kernel stack the latter will always overflow first, barring deeper corruption
>of the control structures or assembly code issues.  I expect that the result in that
>case would be an infinite loop of shadow stack overflows in handle_bad_stack and
>do_trap_software_check with occasional visits to handle_kernel_stack_overflow.
>
>I believe that "Save unwound kernel stack pointer in thread_info" and "Save the
>kernel shadow call stack pointer" are both no-ops in all cases other than ret_from_fork,
>since the ABI requires the C trap handler to return with the same sp and ssp it
>was entered with.  Optimizing that would be a separate issue.
>
>-s
>
>>
>> I don't have favorites here, if overwhelving opinion of community here
>> is to take this
>> logic into task switching and re-work this logic back into entry.S
>> whenever shadow stack for
>> kernel patches are posted, I can do that as well.
>>
>> [1] -
>> https://lore.kernel.org/all/20230828195833.756747-8-samitolvanen@google.com/
>>
>>>
>>>-s
>>>
>>>>  1:
>>>>  	REG_L a0, PT_STATUS(sp)
>>>>  	/*
>>>> --
>>>> 2.43.0
>>>>
>>>>
>>>> _______________________________________________
>>>> linux-riscv mailing list
>>>> linux-riscv at lists.infradead.org
>>>> http://lists.infradead.org/mailman/listinfo/linux-riscv
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv



More information about the linux-riscv mailing list