[PATCH -next v8 4/4] arm64: add cow to machine check safe

Catalin Marinas catalin.marinas at arm.com
Tue Apr 11 09:45:18 PDT 2023


On Mon, Dec 19, 2022 at 12:00:08PM +0000, Tong Tiangen wrote:
> At present, Recover from poison consumption from copy-on-write has been
> supported[1], arm64 should also support this mechanism.
> 
> Add new helper copy_mc_page() which provide a page copy implementation with
> machine check safe. At present, only used in cow. In the future, we can
> expand more scenes. As long as the consequences of page copy failure are
> not fatal(eg: only affect user process), we can use this helper.
> 
> The copy_mc_page() in copy_page_mc.S is largely borrows from copy_page()
> in copy_page.S and the main difference is copy_mc_page() add extable entry
> to every load/store insn to support machine check safe. largely to keep the
> patch simple. If needed those optimizations can be folded in.
> 
> Add new extable type EX_TYPE_COPY_MC_PAGE which used in copy_mc_page().
> 
> [1]https://lore.kernel.org/lkml/20221031201029.102123-1-tony.luck@intel.com/
> 
> Signed-off-by: Tong Tiangen <tongtiangen at huawei.com>

This series needs rebasing onto a newer kernel. Some random comments
below.

> diff --git a/arch/arm64/lib/copy_mc_page.S b/arch/arm64/lib/copy_mc_page.S
> new file mode 100644
> index 000000000000..03d657a182f6
> --- /dev/null
> +++ b/arch/arm64/lib/copy_mc_page.S
> @@ -0,0 +1,82 @@
[...]
> +SYM_FUNC_START(__pi_copy_mc_page)
> +alternative_if ARM64_HAS_NO_HW_PREFETCH
> +	// Prefetch three cache lines ahead.
> +	prfm	pldl1strm, [x1, #128]
> +	prfm	pldl1strm, [x1, #256]
> +	prfm	pldl1strm, [x1, #384]
> +alternative_else_nop_endif
> +
> +CPY_MC(9998f, ldp	x2, x3, [x1])
> +CPY_MC(9998f, ldp	x4, x5, [x1, #16])
> +CPY_MC(9998f, ldp	x6, x7, [x1, #32])
> +CPY_MC(9998f, ldp	x8, x9, [x1, #48])
> +CPY_MC(9998f, ldp	x10, x11, [x1, #64])
> +CPY_MC(9998f, ldp	x12, x13, [x1, #80])
> +CPY_MC(9998f, ldp	x14, x15, [x1, #96])
> +CPY_MC(9998f, ldp	x16, x17, [x1, #112])
[...]
[...]
> +9998:	ret

What I don't understand, is there any error returned here or the bytes
not copied? I can see its return value is never used in this series.

Also, do we need to distinguish between fault on the source or the
destination?

> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index 5018ac03b6bf..bf4dd861c41c 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -80,6 +80,25 @@ SYM_FUNC_START(mte_copy_page_tags)
>  	ret
>  SYM_FUNC_END(mte_copy_page_tags)
>  
> +/*
> + * Copy the tags from the source page to the destination one wiht machine check safe
> + *   x0 - address of the destination page
> + *   x1 - address of the source page
> + */
> +SYM_FUNC_START(mte_copy_mc_page_tags)
> +	mov	x2, x0
> +	mov	x3, x1
> +	multitag_transfer_size x5, x6
> +1:
> +CPY_MC(2f, ldgm	x4, [x3])
> +	stgm	x4, [x2]
> +	add	x2, x2, x5
> +	add	x3, x3, x5
> +	tst	x2, #(PAGE_SIZE - 1)
> +	b.ne	1b
> +2:	ret
> +SYM_FUNC_END(mte_copy_mc_page_tags)

While the data copy above handles errors on both source and destination,
here you skip the destination. Any reason?

> diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
> index 8dd5a8fe64b4..005ee2a3cb4e 100644
> --- a/arch/arm64/mm/copypage.c
> +++ b/arch/arm64/mm/copypage.c
[...]
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +void copy_mc_highpage(struct page *to, struct page *from)
> +{
> +	void *kto = page_address(to);
> +	void *kfrom = page_address(from);
> +
> +	copy_mc_page(kto, kfrom);
> +	do_mte(to, from, kto, kfrom, true);
> +}
> +EXPORT_SYMBOL(copy_mc_highpage);
> +
> +int copy_mc_user_highpage(struct page *to, struct page *from,
> +			unsigned long vaddr, struct vm_area_struct *vma)
> +{
> +	copy_mc_highpage(to, from);
> +	flush_dcache_page(to);
> +	return 0;
> +}

This one always returns 0. Does it actually catch any memory failures?

> +EXPORT_SYMBOL_GPL(copy_mc_user_highpage);
> +#endif
> diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
> index 28ec35e3d210..0fdab18f2f07 100644
> --- a/arch/arm64/mm/extable.c
> +++ b/arch/arm64/mm/extable.c
> @@ -16,6 +16,13 @@ get_ex_fixup(const struct exception_table_entry *ex)
>  	return ((unsigned long)&ex->fixup + ex->fixup);
>  }
>  
> +static bool ex_handler_fixup(const struct exception_table_entry *ex,
> +			     struct pt_regs *regs)
> +{
> +	regs->pc = get_ex_fixup(ex);
> +	return true;
> +}

Should we prepare some error here like -EFAULT? That's done in
ex_handler_uaccess_err_zero().

-- 
Catalin



More information about the linux-arm-kernel mailing list