[PATCH] makedumpfile: Looking up page.compound_order/compound_dtor to exclude hugepages.

Atsushi Kumagai ats-kumagai at wm.jp.nec.com
Tue Jan 26 23:39:08 PST 2016


Hello,

This is for hugepage filtering on linux 4.4 and later.
Without this patch, hugepages wouldn't be removed correctly. Not only that,
irrelevant pages can be excluded.
This patch requires the kernel side fix which I've posted:

  https://lkml.org/lkml/2016/1/27/92

Any comments are helpful.

Thanks,
Atsushi Kumagai

-----------------------------------------------------------------------------
Required for kernel 4.4

Due to some changes in struct page, hugepages wouldn't be removed on
linux 4.4. makedumpfile reads page.lru.prev to get "order" (number of hugepages)
and page.lru.next to get "dtor" (destructor for hugepages) to detect hugepages,
but the offsets of the two symbol was changed in linux 4.4.

     kernel    |            order            |            dtor
    version    |      member    |   offset   |    member     |   offset
 --------------+----------------+------------+---------------+-----------
       - v3.19 |   lru.prev     |    as is   |   lru.next    |   as is
  v4.0 - v4.3  | compound_order | ==lru.prev | compound_dtor | ==lru.next
  v4.4 -       | compound_order | !=lru.prev | compound_dtor | !=lru.next

As above, OFFSET(page.compound_order) and OFFSET(page.compound_dtor) are
definitely necessary in VMCOREINFO on linux 4.4 and later. At least linux 4.4
doesn't export OFFSET(page.compound_order) and OFFSET(page.compound_dtor),
we have to give up hugepage filtering unless vmlinux is prepared.

Further, the content of page.compound_dtor was changed from direct address
of dtor to the ID of it in linux 4.4.

Signed-off-by: Atsushi Kumagai <ats-kumagai at wm.jp.nec.com>
---
 makedumpfile.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 makedumpfile.h |  5 ++--
 2 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index b802446..cd6c4de 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -240,6 +240,15 @@ is_in_same_page(unsigned long vaddr1, unsigned long vaddr2)
 	return FALSE;
 }
 
+static inline int
+isHugetlb(dtor)
+{
+        return ((NUMBER(HUGETLB_PAGE_DTOR) != NOT_FOUND_NUMBER)
+		&& (NUMBER(HUGETLB_PAGE_DTOR) == dtor))
+                || ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL)
+                    && (SYMBOL(free_huge_page) == dtor));
+}
+
 static inline unsigned long
 calculate_len_buf_out(long page_size)
 {
@@ -1614,6 +1623,8 @@ get_structure_info(void)
 	OFFSET_INIT(page.mapping, "page", "mapping");
 	OFFSET_INIT(page._mapcount, "page", "_mapcount");
 	OFFSET_INIT(page.private, "page", "private");
+	OFFSET_INIT(page.compound_dtor, "page", "compound_dtor");
+	OFFSET_INIT(page.compound_order, "page", "compound_order");
 
 	/*
 	 * Some vmlinux(s) don't have debugging information about
@@ -1720,6 +1731,8 @@ get_structure_info(void)
 			NUMBER(PG_head_mask) = 1UL << NUMBER(PG_head);
 	}
 
+	ENUM_NUMBER_INIT(HUGETLB_PAGE_DTOR, "HUGETLB_PAGE_DTOR");
+
 	ENUM_TYPE_SIZE_INIT(pageflags, "pageflags");
 
 	TYPEDEF_SIZE_INIT(nodemask_t, "nodemask_t");
@@ -2164,6 +2177,8 @@ write_vmcoreinfo_data(void)
 	WRITE_MEMBER_OFFSET("page.lru", page.lru);
 	WRITE_MEMBER_OFFSET("page._mapcount", page._mapcount);
 	WRITE_MEMBER_OFFSET("page.private", page.private);
+	WRITE_MEMBER_OFFSET("page.compound_dtor", page.compound_dtor);
+	WRITE_MEMBER_OFFSET("page.compound_order", page.compound_order);
 	WRITE_MEMBER_OFFSET("mem_section.section_mem_map",
 	    mem_section.section_mem_map);
 	WRITE_MEMBER_OFFSET("pglist_data.node_zones", pglist_data.node_zones);
@@ -2233,6 +2248,8 @@ write_vmcoreinfo_data(void)
 	WRITE_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE);
 	WRITE_NUMBER("KERNEL_IMAGE_SIZE", KERNEL_IMAGE_SIZE);
 
+	WRITE_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR);
+
 	/*
 	 * write the source file of 1st kernel
 	 */
@@ -2499,6 +2516,8 @@ read_vmcoreinfo(void)
 	READ_MEMBER_OFFSET("page.lru", page.lru);
 	READ_MEMBER_OFFSET("page._mapcount", page._mapcount);
 	READ_MEMBER_OFFSET("page.private", page.private);
+	READ_MEMBER_OFFSET("page.compound_dtor", page.compound_dtor);
+	READ_MEMBER_OFFSET("page.compound_order", page.compound_order);
 	READ_MEMBER_OFFSET("mem_section.section_mem_map",
 	    mem_section.section_mem_map);
 	READ_MEMBER_OFFSET("pglist_data.node_zones", pglist_data.node_zones);
@@ -2568,6 +2587,8 @@ read_vmcoreinfo(void)
 	READ_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE);
 	READ_NUMBER("KERNEL_IMAGE_SIZE", KERNEL_IMAGE_SIZE);
 
+	READ_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR);
+
 	return TRUE;
 }
 
@@ -5487,6 +5508,7 @@ __exclude_unnecessary_pages(unsigned long mem_map,
 	unsigned char page_cache[SIZE(page) * PGMM_CACHED];
 	unsigned char *pcache;
 	unsigned int _count, _mapcount = 0, compound_order = 0;
+	unsigned int order_offset, dtor_offset;
 	unsigned long flags, mapping, private = 0;
 	unsigned long compound_dtor;
 
@@ -5555,26 +5577,52 @@ __exclude_unnecessary_pages(unsigned long mem_map,
 		_count  = UINT(pcache + OFFSET(page._count));
 		mapping = ULONG(pcache + OFFSET(page.mapping));
 
-		if ((index_pg < PGMM_CACHED - 1) &&
-		    isCompoundHead(flags)) {
-			compound_order = ULONG(pcache + SIZE(page) + OFFSET(page.lru)
-					       + OFFSET(list_head.prev));
-			compound_dtor = ULONG(pcache + SIZE(page) + OFFSET(page.lru)
-					     + OFFSET(list_head.next));
+		if (OFFSET(page.compound_order) != NOT_FOUND_SYMBOL) {
+			order_offset = OFFSET(page.compound_order);
+		} else {
+			if (info->kernel_version < KERNEL_VERSION(4, 4, 0))
+				order_offset = OFFSET(page.lru) + OFFSET(list_head.prev);
+			else
+				order_offset = 0;
+		}
+
+		if (OFFSET(page.compound_dtor) != NOT_FOUND_SYMBOL) {
+			dtor_offset = OFFSET(page.compound_dtor);
+		} else {
+			if (info->kernel_version < KERNEL_VERSION(4, 4, 0))
+				dtor_offset = OFFSET(page.lru) + OFFSET(list_head.next);
+			else
+				dtor_offset = 0;
+		}
+
+		compound_order = 0;
+		compound_dtor = 0;
+		/*
+		 * The last pfn of the mem_map cache must not be compound page
+		 * since all compound pages are aligned to its page order and
+		 * PGMM_CACHED is a power of 2.
+		 */
+		if ((index_pg < PGMM_CACHED - 1) && isCompoundHead(flags)) {
+			if (order_offset)
+				compound_order = USHORT(pcache + SIZE(page) + order_offset);
+
+			if (dtor_offset) {
+				/*
+				 * compound_dtor has been changed from the address of descriptor
+				 * to the ID of it since linux-4.4.
+				 */
+				if (info->kernel_version >= KERNEL_VERSION(4, 4, 0)) {
+					compound_dtor = USHORT(pcache + SIZE(page) + dtor_offset);
+				} else {
+					compound_dtor = ULONG(pcache + SIZE(page) + dtor_offset);
+				}
+			}
 
 			if ((compound_order >= sizeof(unsigned long) * 8)
 			    || ((pfn & ((1UL << compound_order) - 1)) != 0)) {
 				/* Invalid order */
 				compound_order = 0;
 			}
-		} else {
-			/*
-			 * The last pfn of the mem_map cache must not be compound page
-			 * since all compound pages are aligned to its page order and
-			 * PGMM_CACHED is a power of 2.
-			 */
-			compound_order = 0;
-			compound_dtor = 0;
 		}
 
 		if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE)
diff --git a/makedumpfile.h b/makedumpfile.h
index e626be8..2912ee6 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -153,8 +153,6 @@ test_bit(int nr, unsigned long addr)
 #define isLRU(flags)		test_bit(NUMBER(PG_lru), flags)
 #define isPrivate(flags)	test_bit(NUMBER(PG_private), flags)
 #define isCompoundHead(flags)   (!!((flags) & NUMBER(PG_head_mask)))
-#define isHugetlb(dtor)         ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL) \
-				 && (SYMBOL(free_huge_page) == dtor))
 #define isSwapCache(flags)	test_bit(NUMBER(PG_swapcache), flags)
 #define isHWPOISON(flags)	(test_bit(NUMBER(PG_hwpoison), flags) \
 				&& (NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER))
@@ -1481,6 +1479,8 @@ struct offset_table {
 		long	lru;
 		long	_mapcount;
 		long	private;
+		long	compound_dtor;
+		long	compound_order;
 	} page;
 	struct mem_section {
 		long	section_mem_map;
@@ -1676,6 +1676,7 @@ struct number_table {
 	long	KERNEL_IMAGE_SIZE;
 	long	SECTION_SIZE_BITS;
 	long	MAX_PHYSMEM_BITS;
+	long    HUGETLB_PAGE_DTOR;
 };
 
 struct srcfile_table {
-- 
1.9.0



More information about the kexec mailing list