[PATCH v11 00/69] Introducing the Maple Tree

Andrew Morton akpm at linux-foundation.org
Sat Jul 16 21:20:50 PDT 2022


On Sun, 17 Jul 2022 02:46:32 +0000 Liam Howlett <liam.howlett at oracle.com> wrote:

> This is the v10 + fixes and a few clean ups.

I'm seeing quite a lot of differences between this and what we had in
mm-unstable.  A surprising amount.

Please check it all?


--- include/linux/maple_tree.h	2022-07-16 21:05:04.697041964 -0700
+++ ../25-pre-maple/include/linux/maple_tree.h	2022-07-16 21:05:34.509477799 -0700
@@ -225,9 +225,9 @@
  * @flags: The maple tree flags
  *
  */
-#define MTREE_INIT(name, __flags) {					\
-	.ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock),		\
-	.ma_flags = __flags,						\
+#define MTREE_INIT(name, flags) {					\
+	.ma_lock = __SPIN_LOCK_UNLOCKED(name.ma_lock),			\
+	.ma_flags = flags,						\
 	.ma_root = NULL,						\
 }
 
@@ -238,13 +238,13 @@
  * @lock: The external lock
  */
 #ifdef CONFIG_LOCKDEP
-#define MTREE_INIT_EXT(name, __flags, __lock) {				\
-	.ma_external_lock = &(__lock).dep_map,				\
-	.ma_flags = (__flags),						\
+#define MTREE_INIT_EXT(name, flags, lock) {				\
+	.ma_external_lock = &(lock).dep_map,				\
+	.ma_flags = flags,						\
 	.ma_root = NULL,						\
 }
 #else
-#define MTREE_INIT_EXT(name, __flags, __lock)	MTREE_INIT(name, __flags)
+#define MTREE_INIT_EXT(name, flags, lock)	MTREE_INIT(name, flags)
 #endif
 
 #define DEFINE_MTREE(name)						\
@@ -520,8 +520,8 @@
  * Note: may return the zero entry.
  *
  */
-#define mas_for_each(__mas, __entry, __max) \
-	while (((__entry) = mas_find((__mas), (__max))) != NULL)
+#define mas_for_each(mas, entry, max) \
+	while (((entry) = mas_find((mas), (max))) != NULL)
 
 
 /**
@@ -654,9 +654,9 @@
  *
  * Note: Will not return the zero entry.
  */
-#define mt_for_each(__tree, __entry, __index, __max) \
-	for (__entry = mt_find(__tree, &(__index), __max); \
-		__entry; __entry = mt_find_after(__tree, &(__index), __max))
+#define mt_for_each(tree, entry, index, max) \
+	for (entry = mt_find(tree, &(index), max); \
+		entry; entry = mt_find_after(tree, &(index), max))
 
 
 #ifdef CONFIG_DEBUG_MAPLE_TREE
@@ -665,12 +665,12 @@
 
 void mt_dump(const struct maple_tree *mt);
 void mt_validate(struct maple_tree *mt);
-#define MT_BUG_ON(__tree, __x) do {					\
+#define MT_BUG_ON(tree, x) do {						\
 	atomic_inc(&maple_tree_tests_run);				\
-	if (__x) {							\
+	if (x) {							\
 		pr_info("BUG at %s:%d (%u)\n",				\
-		__func__, __LINE__, __x);				\
-		mt_dump(__tree);					\
+		__func__, __LINE__, x);					\
+		mt_dump(tree);						\
 		pr_info("Pass: %u Run:%u\n",				\
 			atomic_read(&maple_tree_tests_passed),		\
 			atomic_read(&maple_tree_tests_run));		\
@@ -680,7 +680,7 @@
 	}								\
 } while (0)
 #else
-#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#define MT_BUG_ON(tree, x) BUG_ON(x)
 #endif /* CONFIG_DEBUG_MAPLE_TREE */
 
 #endif /*_LINUX_MAPLE_TREE_H */
--- include/linux/mm.h	2022-07-16 21:05:07.625084770 -0700
+++ ../25-pre-maple/include/linux/mm.h	2022-07-16 21:05:37.847526599 -0700
@@ -683,12 +683,11 @@
 	return vmi->mas.index;
 }
 
-#define for_each_vma(__vmi, __vma)					\
-	while (((__vma) = vma_next(&(__vmi))) != NULL)
+#define for_each_vma(vmi, vma)		while ((vma = vma_next(&(vmi))) != NULL)
 
 /* The MM code likes to work with exclusive end addresses */
-#define for_each_vma_range(__vmi, __vma, __end)				\
-	while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
+#define for_each_vma_range(vmi, vma, end)				\
+	while ((vma = vma_find(&(vmi), (end) - 1)) != NULL)
 
 #ifdef CONFIG_SHMEM
 /*
--- include/linux/mm_types.h	2022-07-16 21:05:07.625084770 -0700
+++ ../25-pre-maple/include/linux/mm_types.h	2022-07-16 21:05:37.848526614 -0700
@@ -681,11 +681,11 @@
 	struct ma_state mas;
 };
 
-#define VMA_ITERATOR(name, __mm, __addr)				\
+#define VMA_ITERATOR(name, mm, addr) 					\
 	struct vma_iterator name = {					\
 		.mas = {						\
-			.tree = &(__mm)->mm_mt,				\
-			.index = __addr,				\
+			.tree = &mm->mm_mt,				\
+			.index = addr,					\
 			.node = MAS_START,				\
 		},							\
 	}




--- mm/memory.c	2022-07-16 21:05:07.627084799 -0700
+++ ../25-pre-maple/mm/memory.c	2022-07-16 21:05:37.980528543 -0700
@@ -1699,6 +1699,7 @@
 /**
  * unmap_vmas - unmap a range of memory covered by a list of vma's
  * @tlb: address of the caller's struct mmu_gather
+ * @mt: The maple tree pointer for the VMAs
  * @vma: the starting vma
  * @start_addr: virtual address at which to start unmapping
  * @end_addr: virtual address at which to end unmapping
--- mm/mmap.c	2022-07-16 21:05:07.716086100 -0700
+++ ../25-pre-maple/mm/mmap.c	2022-07-16 21:05:38.104530356 -0700
@@ -341,27 +341,28 @@
 	MA_STATE(mas, mt, 0, 0);
 
 	mt_validate(&mm->mm_mt);
+
 	mas_for_each(&mas, vma_mt, ULONG_MAX) {
 		if ((vma_mt->vm_start != mas.index) ||
 		    (vma_mt->vm_end - 1 != mas.last)) {
 			pr_emerg("issue in %s\n", current->comm);
 			dump_stack();
 			dump_vma(vma_mt);
-			pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
+			pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
 				 mas.index, mas.last);
-			pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
+			pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
 				 vma_mt->vm_start, vma_mt->vm_end);
 
 			mt_dump(mas.tree);
 			if (vma_mt->vm_end != mas.last + 1) {
-				pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
+				pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n",
 						mm, vma_mt->vm_start, vma_mt->vm_end,
 						mas.index, mas.last);
 				mt_dump(mas.tree);
 			}
 			VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
 			if (vma_mt->vm_start != mas.index) {
-				pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
+				pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n",
 						mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
 				mt_dump(mas.tree);
 			}
@@ -448,7 +449,7 @@
 		unsigned long vm_start = max(addr, vma->vm_start);
 		unsigned long vm_end = min(end, vma->vm_end);
 
-		nr_pages += PHYS_PFN(vm_end - vm_start);
+		nr_pages += (vm_end - vm_start) / PAGE_SIZE;
 	}
 
 	return nr_pages;
@@ -710,11 +711,11 @@
 				 * remove_next == 1 is case 1 or 7.
 				 */
 				remove_next = 1 + (end > next->vm_end);
-				if (remove_next == 2)
-					next_next = find_vma(mm, next->vm_end);
-
+				next_next = find_vma(mm, next->vm_end);
 				VM_WARN_ON(remove_next == 2 &&
 					   end != next_next->vm_end);
+				/* trim end to next, for case 6 first pass */
+				end = next->vm_end;
 			}
 
 			exporter = next;
@@ -725,7 +726,7 @@
 			 * next, if the vma overlaps with it.
 			 */
 			if (remove_next == 2 && !next->anon_vma)
-				exporter = next_next;
+				exporter = find_vma(mm, next->vm_end);
 
 		} else if (end > next->vm_start) {
 			/*
@@ -762,6 +763,7 @@
 				return error;
 		}
 	}
+again:
 	vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
 
 	if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
@@ -852,8 +854,6 @@
 
 	if (remove_next && file) {
 		__remove_shared_vm_struct(next, file, mapping);
-		if (remove_next == 2)
-			__remove_shared_vm_struct(next_next, file, mapping);
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -881,7 +881,6 @@
 	}
 
 	if (remove_next) {
-again:
 		if (file) {
 			uprobe_munmap(next, next->vm_start, next->vm_end);
 			fput(file);
@@ -890,22 +889,45 @@
 			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
-		if (remove_next != 2)
-			BUG_ON(vma->vm_end < next->vm_end);
+		BUG_ON(vma->vm_end < next->vm_end);
 		vm_area_free(next);
 
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
-		 * we must remove next_next too.
+		 * we must remove another next too. It would clutter
+		 * up the code too much to do both in one go.
 		 */
+		if (remove_next != 3) {
+			/*
+			 * If "next" was removed and vma->vm_end was
+			 * expanded (up) over it, in turn
+			 * "next->prev->vm_end" changed and the
+			 * "vma->next" gap must be updated.
+			 */
+			next = next_next;
+		} else {
+			/*
+			 * For the scope of the comment "next" and
+			 * "vma" considered pre-swap(): if "vma" was
+			 * removed, next->vm_start was expanded (down)
+			 * over it and the "next" gap must be updated.
+			 * Because of the swap() the post-swap() "vma"
+			 * actually points to pre-swap() "next"
+			 * (post-swap() "next" as opposed is now a
+			 * dangling pointer).
+			 */
+			next = vma;
+		}
 		if (remove_next == 2) {
+			mas_reset(&mas);
 			remove_next = 1;
-			next = next_next;
+			end = next->vm_end;
 			goto again;
 		}
 	}
-	if (insert && file)
+	if (insert && file) {
 		uprobe_mmap(insert);
+	}
 
 	mas_destroy(&mas);
 	validate_mm(mm);
@@ -1613,8 +1635,8 @@
 	return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
 }
 
-/**
- * unmapped_area() - Find an area between the low_limit and the high_limit with
+/*
+ * unmapped_area() Find an area between the low_limit and the high_limit with
  * the correct alignment and offset, all from @info. Note: current->mm is used
  * for the search.
  *
@@ -1640,15 +1662,12 @@
 
 	gap = mas.index;
 	gap += (info->align_offset - gap) & info->align_mask;
-	VM_BUG_ON(gap + info->length > info->high_limit);
-	VM_BUG_ON(gap + info->length > mas.last);
 	return gap;
 }
 
-/**
- * unmapped_area_topdown() - Find an area between the low_limit and the
+/* unmapped_area_topdown() Find an area between the low_limit and the
  * high_limit with * the correct alignment and offset at the highest available
- * address, all from @info. Note: current->mm is used for the search.
+ * address, all from * @info. Note: current->mm is used for the search.
  *
  * @info: The unmapped area information including the range (low_limit -
  * hight_limit), the alignment offset and mask.
@@ -1671,8 +1690,6 @@
 
 	gap = mas.last + 1 - info->length;
 	gap -= (gap - info->align_offset) & info->align_mask;
-	VM_BUG_ON(gap < info->low_limit);
-	VM_BUG_ON(gap < mas.index);
 	return gap;
 }
 
@@ -1884,12 +1901,12 @@
 EXPORT_SYMBOL(find_vma_intersection);
 
 /**
- * find_vma() - Find the VMA for a given address, or the next VMA.
+ * find_vma() - Find the VMA for a given address, or the next vma.
  * @mm: The mm_struct to check
  * @addr: The address
  *
- * Returns: The VMA associated with addr, or the next VMA.
- * May return %NULL in the case of no VMA at addr or above.
+ * Returns: The VMA associated with addr, or the next vma.
+ * May return %NULL in the case of no vma at addr or above.
  */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
@@ -2642,7 +2659,7 @@
 	    (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
 				       pgoff, vma->vm_userfaultfd_ctx, NULL) :
 		   can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
-				       NULL_VM_UFFD_CTX, NULL))) {
+				       NULL_VM_UFFD_CTX , NULL))) {
 		merge_start = prev->vm_start;
 		vma = prev;
 		vm_pgoff = prev->vm_pgoff;
@@ -3036,7 +3053,6 @@
 		unsigned long addr, unsigned long len, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-
 	validate_mm_mt(mm);
 
 	/*
@@ -3092,7 +3108,7 @@
 	vma->vm_flags = flags;
 	vma->vm_page_prot = vm_get_page_prot(flags);
 	mas_set_range(mas, vma->vm_start, addr + len - 1);
-	if (mas_store_gfp(mas, vma, GFP_KERNEL))
+	if ( mas_store_gfp(mas, vma, GFP_KERNEL))
 		goto mas_store_fail;
 
 	mm->map_count++;
@@ -3155,7 +3171,7 @@
 	vma = mas_prev(&mas, 0);
 	if (!vma || vma->vm_end != addr || vma_policy(vma) ||
 	    !can_vma_merge_after(vma, flags, NULL, NULL,
-				 addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL))
+				 addr >> PAGE_SHIFT,NULL_VM_UFFD_CTX, NULL))
 		vma = NULL;
 
 	ret = do_brk_flags(&mas, vma, addr, len, flags);




More information about the maple-tree mailing list