[PATCH v4 00/21] mm: expand mmap_prepare functionality and usage

Andrew Morton akpm at linux-foundation.org
Fri Mar 20 19:42:58 PDT 2026


On Fri, 20 Mar 2026 22:39:26 +0000 "Lorenzo Stoakes (Oracle)" <ljs at kernel.org> wrote:

> This series expands the mmap_prepare functionality, which is intended to
> replace the deprecated f_op->mmap hook which has been the source of bugs
> and security issues for some time.

Thanks, I updated mm-unstable to this version.  Here's how that altered
mm.git:


 Documentation/filesystems/mmap_prepare.rst |    4 
 include/linux/mm.h                         |    3 
 mm/internal.h                              |   27 ++++-
 mm/util.c                                  |   87 +++++++++----------
 mm/vma.c                                   |   41 +-------
 tools/testing/vma/include/dup.h            |   44 +--------
 tools/testing/vma/include/stubs.h          |    3 
 7 files changed, 80 insertions(+), 129 deletions(-)

--- a/Documentation/filesystems/mmap_prepare.rst~b
+++ a/Documentation/filesystems/mmap_prepare.rst
@@ -123,8 +123,8 @@ When implementing mmap_prepare(), refere
 as a ``VMA_xxx_BIT`` macro, e.g. ``VMA_READ_BIT``, ``VMA_WRITE_BIT`` etc.,
 and use one of (where ``desc`` is a pointer to struct vm_area_desc):
 
-* ``vma_desc_test_flags(desc, ...)`` - Specify a comma-separated list of flags
-  you wish to test for (whether _any_ are set), e.g. - ``vma_desc_test_flags(
+* ``vma_desc_test_any(desc, ...)`` - Specify a comma-separated list of flags
+  you wish to test for (whether _any_ are set), e.g. - ``vma_desc_test_any(
   desc, VMA_WRITE_BIT, VMA_MAYWRITE_BIT)`` - returns ``true`` if either are set,
   otherwise ``false``.
 * ``vma_desc_set_flags(desc, ...)`` - Update the VMA descriptor flags to set
--- a/include/linux/mm.h~b
+++ a/include/linux/mm.h
@@ -4394,8 +4394,7 @@ static inline void mmap_action_map_kerne
 
 int mmap_action_prepare(struct vm_area_desc *desc);
 int mmap_action_complete(struct vm_area_struct *vma,
-			 struct mmap_action *action,
-			 bool rmap_lock_held);
+			 struct mmap_action *action);
 
 /* Look up the first VMA which exactly match the interval vm_start ... vm_end */
 static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
--- a/mm/internal.h~b
+++ a/mm/internal.h
@@ -202,14 +202,6 @@ static inline void vma_close(struct vm_a
 /* unmap_vmas is in mm/memory.c */
 void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
 
-static inline void unmap_vma_locked(struct vm_area_struct *vma)
-{
-	const size_t len = vma_pages(vma) << PAGE_SHIFT;
-
-	mmap_assert_write_locked(vma->vm_mm);
-	do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
-}
-
 #ifdef CONFIG_MMU
 
 static inline void get_anon_vma(struct anon_vma *anon_vma)
@@ -1826,6 +1818,25 @@ static inline int io_remap_pfn_range_pre
 	return 0;
 }
 
+/*
+ * When we succeed an mmap action or just before we unmap a VMA on error, we
+ * need to ensure any rmap lock held is released. On unmap it's required to
+ * avoid a deadlock.
+ */
+static inline void maybe_rmap_unlock_action(struct vm_area_struct *vma,
+		struct mmap_action *action)
+{
+	struct file *file;
+
+	if (!action->hide_from_rmap_until_complete)
+		return;
+
+	VM_WARN_ON_ONCE(vma_is_anonymous(vma));
+	file = vma->vm_file;
+	i_mmap_unlock_write(file->f_mapping);
+	action->hide_from_rmap_until_complete = false;
+}
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline int clear_flush_young_ptes_notify(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep, unsigned int nr)
--- a/mm/util.c~b
+++ a/mm/util.c
@@ -1198,25 +1198,6 @@ void compat_set_desc_from_vma(struct vm_
 }
 EXPORT_SYMBOL(compat_set_desc_from_vma);
 
-static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
-{
-	const struct vm_operations_struct *vm_ops = vma->vm_ops;
-	void *vm_private_data = vma->vm_private_data;
-	int err;
-
-	if (!vm_ops || !vm_ops->mapped)
-		return 0;
-
-	err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file,
-			     &vm_private_data);
-	if (err)
-		unmap_vma_locked(vma);
-	else if (vm_private_data != vma->vm_private_data)
-		vma->vm_private_data = vm_private_data;
-
-	return err;
-}
-
 /**
  * __compat_vma_mmap() - Similar to compat_vma_mmap(), only it allows
  * flexibility as to how the mmap_prepare callback is invoked, which is useful
@@ -1251,13 +1232,7 @@ int __compat_vma_mmap(struct vm_area_des
 	/* Update the VMA from the descriptor. */
 	compat_set_vma_from_desc(vma, desc);
 	/* Complete any specified mmap actions. */
-	err = mmap_action_complete(vma, &desc->action,
-				   /*rmap_lock_held=*/false);
-	if (err)
-		return err;
-
-	/* Invoke vm_ops->mapped callback. */
-	return __compat_vma_mapped(desc->file, vma);
+	return mmap_action_complete(vma, &desc->action);
 }
 EXPORT_SYMBOL(__compat_vma_mmap);
 
@@ -1290,12 +1265,17 @@ EXPORT_SYMBOL(__compat_vma_mmap);
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct vm_area_desc desc;
+	struct mmap_action *action;
 	int err;
 
 	compat_set_desc_from_vma(&desc, file, vma);
 	err = vfs_mmap_prepare(file, &desc);
 	if (err)
 		return err;
+	action = &desc.action;
+
+	/* being invoked from .mmmap means we don't have to enforce this. */
+	action->hide_from_rmap_until_complete = false;
 
 	return __compat_vma_mmap(&desc, vma);
 }
@@ -1399,25 +1379,47 @@ again:
 	}
 }
 
+static int call_vma_mapped(struct vm_area_struct *vma)
+{
+	const struct vm_operations_struct *vm_ops = vma->vm_ops;
+	void *vm_private_data = vma->vm_private_data;
+	int err;
+
+	if (!vm_ops || !vm_ops->mapped)
+		return 0;
+
+	err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
+			     vma->vm_file, &vm_private_data);
+	if (err)
+		return err;
+
+	if (vm_private_data != vma->vm_private_data)
+		vma->vm_private_data = vm_private_data;
+	return 0;
+}
+
 static int mmap_action_finish(struct vm_area_struct *vma,
-			      struct mmap_action *action, int err,
-			      bool rmap_lock_held)
+			      struct mmap_action *action, int err)
 {
-	if (rmap_lock_held)
-		i_mmap_unlock_write(vma->vm_file->f_mapping);
+	size_t len;
 
-	if (!err) {
-		if (action->success_hook)
-			return action->success_hook(vma);
+	if (!err)
+		err = call_vma_mapped(vma);
+	if (!err && action->success_hook)
+		err = action->success_hook(vma);
+
+	/* do_munmap() might take rmap lock, so release if held. */
+	maybe_rmap_unlock_action(vma, action);
+	if (!err)
 		return 0;
-	}
 
 	/*
 	 * If an error occurs, unmap the VMA altogether and return an error. We
 	 * only clear the newly allocated VMA, since this function is only
 	 * invoked if we do NOT merge, so we only clean up the VMA we created.
 	 */
-	unmap_vma_locked(vma);
+	len = vma_pages(vma) << PAGE_SHIFT;
+	do_munmap(current->mm, vma->vm_start, len, NULL);
 	if (action->error_hook) {
 		/* We may want to filter the error. */
 		err = action->error_hook(err);
@@ -1459,16 +1461,13 @@ EXPORT_SYMBOL(mmap_action_prepare);
  * mmap_action_complete - Execute VMA descriptor action.
  * @vma: The VMA to perform the action upon.
  * @action: The action to perform.
- * @rmap_lock_held: Is the file rmap lock held?
  *
  * Similar to mmap_action_prepare().
  *
  * Return: 0 on success, or error, at which point the VMA will be unmapped.
  */
 int mmap_action_complete(struct vm_area_struct *vma,
-			 struct mmap_action *action,
-			 bool rmap_lock_held)
-
+			 struct mmap_action *action)
 {
 	int err = 0;
 
@@ -1489,8 +1488,7 @@ int mmap_action_complete(struct vm_area_
 		break;
 	}
 
-	return mmap_action_finish(vma, action, err,
-				  rmap_lock_held);
+	return mmap_action_finish(vma, action, err);
 }
 EXPORT_SYMBOL(mmap_action_complete);
 #else
@@ -1512,8 +1510,7 @@ int mmap_action_prepare(struct vm_area_d
 EXPORT_SYMBOL(mmap_action_prepare);
 
 int mmap_action_complete(struct vm_area_struct *vma,
-			 struct mmap_action *action,
-			 bool rmap_lock_held)
+			 struct mmap_action *action)
 {
 	int err = 0;
 
@@ -1523,14 +1520,14 @@ int mmap_action_complete(struct vm_area_
 	case MMAP_REMAP_PFN:
 	case MMAP_IO_REMAP_PFN:
 	case MMAP_SIMPLE_IO_REMAP:
-	casr MMAP_MAP_KERNEL_PAGES:
+	case MMAP_MAP_KERNEL_PAGES:
 		WARN_ON_ONCE(1); /* nommu cannot handle this. */
 
 		err = -EINVAL;
 		break;
 	}
 
-	return mmap_action_finish(vma, action, err, rmap_lock_held);
+	return mmap_action_finish(vma, action, err);
 }
 EXPORT_SYMBOL(mmap_action_complete);
 #endif
--- a/mm/vma.c~b
+++ a/mm/vma.c
@@ -38,8 +38,6 @@ struct mmap_state {
 
 	/* Determine if we can check KSM flags early in mmap() logic. */
 	bool check_ksm_early :1;
-	/* If we map new, hold the file rmap lock on mapping. */
-	bool hold_file_rmap_lock :1;
 	/* If .mmap_prepare changed the file, we don't need to pin. */
 	bool file_doesnt_need_get :1;
 };
@@ -2530,10 +2528,12 @@ static int __mmap_new_file_vma(struct mm
  *
  * @map:  Mapping state.
  * @vmap: Output pointer for the new VMA.
+ * @action: Any mmap_prepare action that is still to complete.
  *
  * Returns: Zero on success, or an error.
  */
-static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
+static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap,
+	struct mmap_action *action)
 {
 	struct vma_iterator *vmi = map->vmi;
 	int error = 0;
@@ -2582,7 +2582,7 @@ static int __mmap_new_vma(struct mmap_st
 	vma_start_write(vma);
 	vma_iter_store_new(vmi, vma);
 	map->mm->map_count++;
-	vma_link_file(vma, map->hold_file_rmap_lock);
+	vma_link_file(vma, action->hide_from_rmap_until_complete);
 
 	/*
 	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below
@@ -2649,8 +2649,6 @@ static int call_action_prepare(struct mm
 	if (err)
 		return err;
 
-	if (desc->action.hide_from_rmap_until_complete)
-		map->hold_file_rmap_lock = true;
 	return 0;
 }
 
@@ -2731,30 +2729,6 @@ static bool can_set_ksm_flags_early(stru
 	return false;
 }
 
-static int call_mapped_hook(struct mmap_state *map,
-			    struct vm_area_struct *vma)
-{
-	const struct vm_operations_struct *vm_ops = vma->vm_ops;
-	void *vm_private_data = vma->vm_private_data;
-	int err;
-
-	if (!vm_ops || !vm_ops->mapped)
-		return 0;
-	err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
-			     vma->vm_file, &vm_private_data);
-	if (err) {
-		if (map->hold_file_rmap_lock)
-			i_mmap_unlock_write(vma->vm_file->f_mapping);
-
-		unmap_vma_locked(vma);
-		return err;
-	}
-	/* Update private data if changed. */
-	if (vm_private_data != vma->vm_private_data)
-		vma->vm_private_data = vm_private_data;
-	return 0;
-}
-
 static unsigned long __mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vma_flags_t vma_flags,
 		unsigned long pgoff, struct list_head *uf)
@@ -2794,7 +2768,7 @@ static unsigned long __mmap_region(struc
 
 	/* ...but if we can't, allocate a new VMA. */
 	if (!vma) {
-		error = __mmap_new_vma(&map, &vma);
+		error = __mmap_new_vma(&map, &vma, &desc.action);
 		if (error)
 			goto unacct_error;
 		allocated_new = true;
@@ -2806,10 +2780,7 @@ static unsigned long __mmap_region(struc
 	__mmap_complete(&map, vma);
 
 	if (have_mmap_prepare && allocated_new) {
-		error = mmap_action_complete(vma, &desc.action,
-					     map.hold_file_rmap_lock);
-		if (!error)
-			error = call_mapped_hook(&map, vma);
+		error = mmap_action_complete(vma, &desc.action);
 		if (error)
 			return error;
 	}
--- a/tools/testing/vma/include/dup.h~b
+++ a/tools/testing/vma/include/dup.h
@@ -1313,27 +1313,9 @@ static inline unsigned long vma_pages(co
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
-static inline void unmap_vma_locked(struct vm_area_struct *vma)
-{
-	const size_t len = vma_pages(vma) << PAGE_SHIFT;
-
-	mmap_assert_write_locked(vma->vm_mm);
-	do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
-}
-
-static inline int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
+static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
 {
-	const struct vm_operations_struct *vm_ops = vma->vm_ops;
-	int err;
-
-	if (!vm_ops->mapped)
-		return 0;
-
-	err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file,
-			     &vma->vm_private_data);
-	if (err)
-		unmap_vma_locked(vma);
-	return err;
+	return file->f_op->mmap_prepare(desc);
 }
 
 static inline int __compat_vma_mmap(struct vm_area_desc *desc,
@@ -1348,35 +1330,27 @@ static inline int __compat_vma_mmap(stru
 	/* Update the VMA from the descriptor. */
 	compat_set_vma_from_desc(vma, desc);
 	/* Complete any specified mmap actions. */
-	err = mmap_action_complete(vma, &desc->action,
-				   /*rmap_lock_held=*/false);
-	if (err)
-		return err;
-
-	/* Invoke vm_ops->mapped callback. */
-	return __compat_vma_mapped(desc->file, vma);
-}
-
-static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
-{
-	return file->f_op->mmap_prepare(desc);
+	return mmap_action_complete(vma, &desc->action);
 }
 
-static inline int compat_vma_mmap(struct file *file,
-		struct vm_area_struct *vma)
+static inline int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct vm_area_desc desc;
+	struct mmap_action *action;
 	int err;
 
 	compat_set_desc_from_vma(&desc, file, vma);
 	err = vfs_mmap_prepare(file, &desc);
 	if (err)
 		return err;
+	action = &desc.action;
+
+	/* being invoked from .mmmap means we don't have to enforce this. */
+	action->hide_from_rmap_until_complete = false;
 
 	return __compat_vma_mmap(&desc, vma);
 }
 
-
 static inline void vma_iter_init(struct vma_iterator *vmi,
 		struct mm_struct *mm, unsigned long addr)
 {
--- a/tools/testing/vma/include/stubs.h~b
+++ a/tools/testing/vma/include/stubs.h
@@ -87,8 +87,7 @@ static inline int mmap_action_prepare(st
 }
 
 static inline int mmap_action_complete(struct vm_area_struct *vma,
-				       struct mmap_action *action,
-				       bool rmap_lock_held)
+				       struct mmap_action *action)
 {
 	return 0;
 }
_




More information about the linux-afs mailing list