[v3 PATCH] arm64: mm: force write fault for atomic RMW instructions
Yang Shi
yang at os.amperecomputing.com
Wed Jun 5 10:12:01 PDT 2024
On 6/5/24 9:54 AM, Catalin Marinas wrote:
> On Tue, Jun 04, 2024 at 10:15:16AM -0700, Yang Shi wrote:
>> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
>> index 8c0a36f72d6f..4e0aa6738579 100644
>> --- a/arch/arm64/include/asm/insn.h
>> +++ b/arch/arm64/include/asm/insn.h
>> @@ -325,6 +325,7 @@ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
>> * "-" means "don't care"
>> */
>> __AARCH64_INSN_FUNCS(class_branch_sys, 0x1c000000, 0x14000000)
>> +__AARCH64_INSN_FUNCS(class_atomic, 0x3b200c00, 0x38200000)
>>
>> __AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
>> __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
>> @@ -345,6 +346,7 @@ __AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
>> __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
>> __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
>> __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
>> +__AARCH64_INSN_FUNCS(casp, 0xBFA07C00, 0x08207C00)
>> __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
>> __AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800)
>> __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
>> @@ -549,6 +551,21 @@ static __always_inline bool aarch64_insn_uses_literal(u32 insn)
>> aarch64_insn_is_prfm_lit(insn);
>> }
>>
>> +static __always_inline bool aarch64_insn_is_class_cas(u32 insn)
>> +{
>> + return aarch64_insn_is_cas(insn) ||
>> + aarch64_insn_is_casp(insn);
>> +}
>> +
>> +/* Exclude unallocated atomic instructions and LD64B/LDAPR */
>> +static __always_inline bool aarch64_atomic_insn_has_wr_perm(u32 insn)
>> +{
>> + return (((insn & 0x3f207c00) == 0x38200000) |
>> + ((insn & 0x3f208c00) == 0x38200000) |
>> + ((insn & 0x7fe06c00) == 0x78202000) |
>> + ((insn & 0xbf204c00) == 0x38200000));
> Please use the logical || instead of the bitwise operator. You can also
> remove the outer brackets.
OK
>
> That said, the above is pretty opaque if we want to update it in the
> future. I have no idea how it was generated or whether it's correct. At
> least maybe add a comment on how you got to these masks and values.
It was generated by a script using Python sympy module, which could help
figure out the most simplified condition.
>
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 8251e2fea9c7..1ed1b061ee8f 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -519,6 +519,35 @@ static bool is_write_abort(unsigned long esr)
>> return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>> }
>>
>> +static bool is_el0_atomic_instr(struct pt_regs *regs)
>> +{
>> + u32 insn;
>> + __le32 insn_le;
>> + unsigned long pc = instruction_pointer(regs);
>> +
>> + if (!user_mode(regs) || compat_user_mode(regs))
>> + return false;
>> +
>> + pagefault_disable();
>> + if (get_user(insn_le, (__le32 __user *)pc)) {
>> + pagefault_enable();
>> + return false;
>> + }
>> + pagefault_enable();
>> +
>> + insn = le32_to_cpu(insn_le);
>> +
>> + if (aarch64_insn_is_class_atomic(insn)) {
>> + if (aarch64_atomic_insn_has_wr_perm(insn))
>> + return true;
>> + }
> Nitpick:
>
> if (aarch64_insn_is_class_atomic(insn) &&
> aarch64_atomic_insn_has_wr_perm(insn))
> return true;
>
> (less indentation)
Sure
>
>> @@ -557,6 +587,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>> /* It was write fault */
>> vm_flags = VM_WRITE;
>> mm_flags |= FAULT_FLAG_WRITE;
>> + } else if (is_el0_atomic_instr(regs)) {
>> + /* Force write fault */
>> + vm_flags = VM_WRITE;
>> + mm_flags |= FAULT_FLAG_WRITE;
>> + force_write = true;
>> } else {
>> /* It was read fault */
>> vm_flags = VM_READ;
>> @@ -586,6 +621,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>> if (!vma)
>> goto lock_mmap;
>>
>> + /* vma flags don't allow write, undo force write */
>> + if (force_write && !(vma->vm_flags & VM_WRITE)) {
>> + vm_flags |= VM_READ;
>> + if (!alternative_has_cap_unlikely(ARM64_HAS_EPAN))
>> + vm_flags |= VM_EXEC;
>> + mm_flags &= ~FAULT_FLAG_WRITE;
>> + }
> Ah, this revert to the non-write flags doesn't look great as we
> basically duplicate the 'else' block in the original check. So, it
> probably look better as per your earlier patch to just do the
> instruction read just before the !(vma->vm_flags & flags) check,
> something like:
>
> if ((vma->vm_flags & VM_WRITE) && is_el0_atomic_instr(regs)) {
> vm_flags = VM_WRITE;
> mm_flags |= FAULT_FLAG_WRITE;
> }
>
> This way we also only read the instruction if the vma is writeable. I
> think it's fine to do this under the vma lock since we have
> pagefault_disable() for the insn read.
Yes, I agree.
>
More information about the linux-arm-kernel
mailing list