[RFC PATCH v1 3/4] mm/memory: Use ptep_get_lockless_norecency() for orig_pte

Ryan Roberts ryan.roberts at arm.com
Thu Feb 15 04:17:55 PST 2024


Let's convert handle_pte_fault()'s use of ptep_get_lockless() to
ptep_get_lockless_norecency() to save orig_pte.

There are a number of places that follow this model:

    orig_pte = ptep_get_lockless(ptep)
    ...
    <lock>
    if (!pte_same(orig_pte, ptep_get(ptep)))
            // RACE!
    ...
    <unlock>

So we need to be careful to convert all of those to use
pte_same_norecency() so that the access and dirty bits are excluded from
the comparison.

Additionally there are a couple of places that genuinely rely on the
access and dirty bits of orig_pte, but with some careful refactoring, we
can use ptep_get() once we are holding the lock to achieve equivalent
logic.

Signed-off-by: Ryan Roberts <ryan.roberts at arm.com>
---
 mm/memory.c | 55 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 8e65fa1884f1..075245314ec3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3014,7 +3014,7 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
 	if (sizeof(pte_t) > sizeof(unsigned long)) {
 		spin_lock(vmf->ptl);
-		same = pte_same(ptep_get(vmf->pte), vmf->orig_pte);
+		same = pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte);
 		spin_unlock(vmf->ptl);
 	}
 #endif
@@ -3062,11 +3062,14 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 	 * take a double page fault, so mark it accessed here.
 	 */
 	vmf->pte = NULL;
-	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
+	if (!arch_has_hw_pte_young()) {
 		pte_t entry;

 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (likely(vmf->pte))
+			entry = ptep_get(vmf->pte);
+		if (unlikely(!vmf->pte ||
+			     !pte_same_norecency(entry, vmf->orig_pte))) {
 			/*
 			 * Other thread has already handled the fault
 			 * and update local tlb only
@@ -3077,9 +3080,11 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 			goto pte_unlock;
 		}

-		entry = pte_mkyoung(vmf->orig_pte);
-		if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
-			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
+		if (!pte_young(entry)) {
+			entry = pte_mkyoung(entry);
+			if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
+				update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
+		}
 	}

 	/*
@@ -3094,7 +3099,8 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,

 		/* Re-validate under PTL if the page is still mapped */
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (unlikely(!vmf->pte ||
+			     !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte))) {
 			/* The PTE changed under us, update local tlb */
 			if (vmf->pte)
 				update_mmu_tlb(vma, addr, vmf->pte);
@@ -3369,14 +3375,17 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	 * Re-check the pte - we dropped the lock
 	 */
 	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
-	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+	if (likely(vmf->pte))
+		entry = ptep_get(vmf->pte);
+	if (likely(vmf->pte && pte_same_norecency(entry, vmf->orig_pte))) {
 		if (old_folio) {
 			if (!folio_test_anon(old_folio)) {
 				dec_mm_counter(mm, mm_counter_file(old_folio));
 				inc_mm_counter(mm, MM_ANONPAGES);
 			}
 		} else {
-			ksm_might_unmap_zero_page(mm, vmf->orig_pte);
+			/* Needs dirty bit so can't use vmf->orig_pte. */
+			ksm_might_unmap_zero_page(mm, entry);
 			inc_mm_counter(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
@@ -3494,7 +3503,7 @@ static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio
 	 * We might have raced with another page fault while we released the
 	 * pte_offset_map_lock.
 	 */
-	if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) {
+	if (!pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return VM_FAULT_NOPAGE;
@@ -3883,7 +3892,8 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)

 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 				&vmf->ptl);
-	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+	if (likely(vmf->pte &&
+		   pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)))
 		restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);

 	if (vmf->pte)
@@ -3928,7 +3938,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
 	 * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED.
 	 * So is_pte_marker() check is not enough to safely drop the pte.
 	 */
-	if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
+	if (pte_same_norecency(vmf->orig_pte, ptep_get(vmf->pte)))
 		pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
@@ -4028,8 +4038,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (unlikely(!vmf->pte ||
-				     !pte_same(ptep_get(vmf->pte),
-							vmf->orig_pte)))
+				     !pte_same_norecency(ptep_get(vmf->pte),
+							 vmf->orig_pte)))
 				goto unlock;

 			/*
@@ -4117,7 +4127,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (likely(vmf->pte &&
-				   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+				   pte_same_norecency(ptep_get(vmf->pte),
+						      vmf->orig_pte)))
 				ret = VM_FAULT_OOM;
 			goto unlock;
 		}
@@ -4187,7 +4198,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
-	if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+	if (unlikely(!vmf->pte ||
+		     !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)))
 		goto out_nomap;

 	if (unlikely(!folio_test_uptodate(folio))) {
@@ -4747,7 +4759,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
 static bool vmf_pte_changed(struct vm_fault *vmf)
 {
 	if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)
-		return !pte_same(ptep_get(vmf->pte), vmf->orig_pte);
+		return !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte);

 	return !pte_none(ptep_get(vmf->pte));
 }
@@ -5125,7 +5137,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * the pfn may be screwed if the read is non atomic.
 	 */
 	spin_lock(vmf->ptl);
-	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+	if (unlikely(!pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte))) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		goto out;
 	}
@@ -5197,7 +5209,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 					       vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
 			goto out;
-		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (unlikely(!pte_same_norecency(ptep_get(vmf->pte),
+							  vmf->orig_pte))) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			goto out;
 		}
@@ -5343,7 +5356,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 						 vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
 			return 0;
-		vmf->orig_pte = ptep_get_lockless(vmf->pte);
+		vmf->orig_pte = ptep_get_lockless_norecency(vmf->pte);
 		vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;

 		if (pte_none(vmf->orig_pte)) {
@@ -5363,7 +5376,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)

 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
-	if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {
+	if (unlikely(!pte_same_norecency(ptep_get(vmf->pte), entry))) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		goto unlock;
 	}
--
2.25.1




More information about the linux-arm-kernel mailing list