[PATCH] riscv: mm: Synchronize memory attributes for all mm in free_initmem() on RV32 platform.
Alexandre Ghiti
alex at ghiti.fr
Mon Oct 2 06:59:19 PDT 2023
Hi CL Wang,
For reference, I found a similar patch here
https://lore.kernel.org/all/20220902101312.220350-1-vladimir.isaev@syntacore.com/
which was never merged.
On 12/09/2023 11:33, CL Wang wrote:
> 1. Symptom:
> [ 44.486537] Unable to handle kernel paging request at virtual address c0800000
> [ 44.509980] Oops [#1]
> [ 44.516975] Modules linked in:
> [ 44.526260] CPU: 0 PID: 1 Comm: swapper Not tainted 6.1.27-05153-g45f6a9286550-dirty #19
> [ 44.550422] Hardware name: andestech,a45 (DT)
> [ 44.563473] epc : __memset+0x58/0xf4
> [ 44.574353] ra : free_reserved_area+0xb0/0x1a4
> [ 44.588144] epc : c05d4ca0 ra : c011f32c sp : c2c61f00
> [ 44.603536] gp : c28a57c8 tp : c2c98000 t0 : c0800000
> [ 44.618916] t1 : 07901b48 t2 : 0000000f s0 : c2c61f50
> [ 44.634308] s1 : 00000001 a0 : c0800000 a1 : cccccccc
> [ 44.649696] a2 : 00001000 a3 : c0801000 a4 : 00000000
> [ 44.665085] a5 : 02000000 a6 : c0800fff a7 : 00000c08
> [ 44.680467] s2 : 000000cc s3 : ffffffff s4 : 00000000
> [ 44.695846] s5 : c28a66cc s6 : c1eba000 s7 : c2125820
> [ 44.711225] s8 : c0800000 s9 : c212583c s10: c28a6648
> [ 44.726623] s11: fe03c7c0 t3 : acf917bf t4 : e0000000
> [ 44.742009] t5 : c2ca0011 t6 : c2ca0016
> [ 44.753789] status: 00000120 badaddr: c0800000 cause: 0000000f
> [ 44.771234] [<c05d4ca0>] __memset+0x58/0xf4
> [ 44.783895] [<c0003e54>] free_initmem+0x80/0x88
> [ 44.797599] [<c05dcd5c>] kernel_init+0x3c/0x124
> [ 44.811391] [<c0003428>] ret_from_exception+0x0/0x16
>
> 2. To reproduce the problem:
> a. Use the RV32 toolchain to build the system.
> b. Build in the SPI module and mtdpart module in the kernel
> Example: Enable the following configuration
> - CONFIG_SPI
> - CONFIG_MTD and CONFIG_MTD_SPI_NOR
> c. Enable the "Make kernel text and rodata read-only" option by using the
> following kernel config.
> - CONFIG_STRICT_KERNEL_RWX
>
> 3. Root cause:
> This problem occurs when the virtual address of the kernel paging request
> is mapped to a megapage on the RV32 platform.
> During system startup, free_initmem() calls set_kernel_memory() to
> change the memory attributes of the init section from RO to RW. It
> then calls free_initmem_default() to set the memory to
> POISON_FREE_INITMEM. If the system runs modprobe at boot time, it
> will trigger a fork/exec to create a new mm for the new process. If
> the modprobe was called before free_initmem(), it will cause a kernel
> oops because the memory attributes of the current mm were not changed
> by set_kernel_memory(). This is because the set_kernel_memory() changes
> the memory attributes of init_mm, but the pgd(satp) currently in use
> is another process's mm and it's memory attribute doesn't change.
> Thus, it causes a kernel oops because the memory region has an
> un-writable attribute.
>
> 4. The solution.
> A similar problem occurred on ARM platforms and was fixed in
> 08925c2f12 (ARM: 8464/1: Update all mm structures with section
> adjustments). This patch uses a similar approach to fix the
> problem on RV32 by synchronizing the memory attributes
> of the init section for all mm
>
> Signed-off-by: CL Wang <cl634 at andestech.com>
> ---
> arch/riscv/include/asm/set_memory.h | 12 +++++++++
> arch/riscv/kernel/setup.c | 40 +++++++++++++++++++++++++----
> arch/riscv/mm/pageattr.c | 30 ++++++++++++++--------
> 3 files changed, 66 insertions(+), 16 deletions(-)
>
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index a2c14d4b3993..041551bf568e 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -16,6 +16,10 @@ int set_memory_rw(unsigned long addr, int numpages);
> int set_memory_x(unsigned long addr, int numpages);
> int set_memory_nx(unsigned long addr, int numpages);
> int set_memory_rw_nx(unsigned long addr, int numpages);
> +
> +#if defined(CONFIG_32BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
> +int set_memory_rw_nx_by_mm(unsigned long addr, int numpages, struct mm_struct *mm);
> +#endif
> static __always_inline int set_kernel_memory(char *startp, char *endp,
> int (*set_memory)(unsigned long start,
> int num_pages))
> @@ -32,6 +36,14 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
> +
> +#if defined(CONFIG_32BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
> +static inline int set_memory_rw_nx_by_mm(unsigned long addr,
> + int numpages, struct mm_struct *mm)
> +{
> + return 0;
> +}
> +#endif
> static inline int set_kernel_memory(char *startp, char *endp,
> int (*set_memory)(unsigned long start,
> int num_pages))
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 5424d7631502..73c221b3c399 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -319,13 +319,43 @@ static int __init topology_init(void)
> }
> subsys_initcall(topology_init);
>
> -void free_initmem(void)
> +#if defined(CONFIG_32BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
> +static void set_kernel_mm_early(char *startp, char *endp,
> + int (*set_memory)(unsigned long start,
> + int num_pages, struct mm_struct *mm))
> {
> - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
> - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx);
> - if (IS_ENABLED(CONFIG_64BIT))
> - set_kernel_memory(__init_begin, __init_end, set_memory_nx);
> + struct task_struct *t, *s;
> + unsigned long start = (unsigned long)startp;
> + unsigned long end = (unsigned long)endp;
> + int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
> +
> + set_memory(start, num_pages, current->active_mm);
> + if (current->active_mm != &init_mm)
> + set_memory(start, num_pages, &init_mm);
> +
> + for_each_process(t) {
> + if (t->flags & PF_KTHREAD)
> + continue;
> + for_each_thread(t, s) {
> + if (s->mm)
> + set_memory(start, num_pages, s->mm);
> + }
> }
There is one big difference between your patch here and commit
08925c2f12 (ARM: 8464/1: Update all mm structures with section
adjustments): the parsing of the tasks is done within a stop_machine()
call which stops all other cpus (IIUC). At least the tasklist_lock
should be held but it seems to be wrong according to the last answer
from Palmer in the patch I mention ^.
To me something needs to be done to prevent the creation of new
processes, and if it's only used once in the kernel lifetime and it
works, I would use stop_machine() like arm does.
Thanks,
Alex
> +}
> +#endif
> +
> +void free_initmem(void)
> +{
> +#ifdef CONFIG_STRICT_KERNEL_RWX
> +#ifdef CONFIG_32BIT
> + set_kernel_mm_early(lm_alias(__init_begin), lm_alias(__init_end),
> + set_memory_rw_nx_by_mm);
> +#else
> + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx);
> +#endif
> + if (IS_ENABLED(CONFIG_64BIT))
> + set_kernel_memory(__init_begin, __init_end, set_memory_nx);
> +#endif
>
> free_initmem_default(POISON_FREE_INITMEM);
> }
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index ea3d61de065b..16ed5cc8f683 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -105,7 +105,7 @@ static const struct mm_walk_ops pageattr_ops = {
> };
>
> static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> - pgprot_t clear_mask)
> + pgprot_t clear_mask, struct mm_struct *mm)
> {
> int ret;
> unsigned long start = addr;
> @@ -118,42 +118,50 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> if (!numpages)
> return 0;
>
> - mmap_write_lock(&init_mm);
> - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
> + mmap_write_lock(mm);
> + ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
> &masks);
> - mmap_write_unlock(&init_mm);
> + mmap_write_unlock(mm);
>
> flush_tlb_kernel_range(start, end);
>
> return ret;
> }
>
> +#if defined(CONFIG_32BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
> +int set_memory_rw_nx_by_mm(unsigned long addr, int numpages, struct mm_struct *mm)
> +{
> + return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
> + __pgprot(_PAGE_EXEC), mm);
> +}
> +#endif
> +
> int set_memory_rw_nx(unsigned long addr, int numpages)
> {
> return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
> - __pgprot(_PAGE_EXEC));
> + __pgprot(_PAGE_EXEC), &init_mm);
> }
>
> int set_memory_ro(unsigned long addr, int numpages)
> {
> return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> - __pgprot(_PAGE_WRITE));
> + __pgprot(_PAGE_WRITE), &init_mm);
> }
>
> int set_memory_rw(unsigned long addr, int numpages)
> {
> return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
> - __pgprot(0));
> + __pgprot(0), &init_mm);
> }
>
> int set_memory_x(unsigned long addr, int numpages)
> {
> - return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0));
> + return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0), &init_mm);
> }
>
> int set_memory_nx(unsigned long addr, int numpages)
> {
> - return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC));
> + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC), &init_mm);
> }
>
> int set_direct_map_invalid_noflush(struct page *page)
> @@ -198,10 +206,10 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
>
> if (enable)
> __set_memory((unsigned long)page_address(page), numpages,
> - __pgprot(_PAGE_PRESENT), __pgprot(0));
> + __pgprot(_PAGE_PRESENT), __pgprot(0), &init_mm);
> else
> __set_memory((unsigned long)page_address(page), numpages,
> - __pgprot(0), __pgprot(_PAGE_PRESENT));
> + __pgprot(0), __pgprot(_PAGE_PRESENT), &init_mm);
> }
> #endif
>
More information about the linux-riscv
mailing list