[PATCH 2/2] exclude page structures of non-dumped pages
Cliff Wickman
cpw at sgi.com
Tue Dec 9 14:07:54 PST 2014
From: Cliff Wickman <cpw at sgi.com>
This patch adds a -e option to makedumpfile.
The -e option excludes kernel pages that contain nothing but kernel page structures
for pages that are not being included in the dump.
A page structure (56 bytes) exists for every 4096-byte page.
This amounts to 3.67 million pages, or about 14GB, per terabyte of system memory!
Without -e a 2-terabyte system can be dumped (compressed) to a file of about 3.7G.
With -e that is reduced to about 590M. And the time and space savings multiply for
each additional terabyte of memory in the system.
The only disadvantage is that various options of the crash 'kmem' command (that
walk lists of page structures) will not work.
---
diskdump_mod.h | 1
makedumpfile.c | 671 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
makedumpfile.h | 59 ++++-
print_info.c | 7
4 files changed, 735 insertions(+), 3 deletions(-)
Index: makedumpfile-1.5.7/diskdump_mod.h
===================================================================
--- makedumpfile-1.5.7.orig/diskdump_mod.h
+++ makedumpfile-1.5.7/diskdump_mod.h
@@ -95,6 +95,7 @@ struct kdump_sub_header {
#define DUMP_DH_COMPRESSED_LZO 0x2 /* paged is compressed with lzo */
#define DUMP_DH_COMPRESSED_SNAPPY 0x4
/* paged is compressed with snappy */
+#define DUMP_DH_EXCLUDED_VMEMMAP 0x8 /* unused vmemmap pages are excluded */
/* descriptor of each page for vmcore */
typedef struct page_desc {
Index: makedumpfile-1.5.7/print_info.c
===================================================================
--- makedumpfile-1.5.7.orig/print_info.c
+++ makedumpfile-1.5.7/print_info.c
@@ -58,7 +58,7 @@ print_usage(void)
MSG("\n");
MSG("Usage:\n");
MSG(" Creating DUMPFILE:\n");
- MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] [-j] [-x VMLINUX|-i VMCOREINFO] VMCORE\n");
+ MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] [-j] [-e] [-x VMLINUX|-i VMCOREINFO] VMCORE\n");
MSG(" DUMPFILE\n");
MSG("\n");
MSG(" Creating DUMPFILE with filtered kernel data specified through filter config\n");
@@ -111,6 +111,11 @@ print_usage(void)
MSG(" [-j]:\n");
MSG(" Use raw (O_DIRECT) i/o on dump and bitmap files to avoid expanding kernel pagecache.\n");
MSG("\n");
+ MSG("\n");
+ MSG(" [-e]:\n");
+ MSG(" Exclude page structures (vmemmap) for unused pages.\n");
+ MSG("\n");
+ MSG("\n");
MSG(" [-d DL]:\n");
MSG(" Specify the type of unnecessary page for analysis.\n");
MSG(" Pages of the specified type are not copied to DUMPFILE. The page type\n");
Index: makedumpfile-1.5.7/makedumpfile.h
===================================================================
--- makedumpfile-1.5.7.orig/makedumpfile.h
+++ makedumpfile-1.5.7/makedumpfile.h
@@ -44,6 +44,9 @@
#include "diskdump_mod.h"
#include "sadump_mod.h"
+#define VMEMMAPSTART 0xffffea0000000000UL
+#define BITS_PER_WORD 64
+
/*
* Result of command
*/
@@ -484,6 +487,7 @@ do { \
#define VMALLOC_END (info->vmalloc_end)
#define VMEMMAP_START (info->vmemmap_start)
#define VMEMMAP_END (info->vmemmap_end)
+#define PMASK (0x7ffffffffffff000UL)
#ifdef __arm__
#define KVBASE_MASK (0xffff)
@@ -568,15 +572,20 @@ do { \
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
#define PTRS_PER_PGD (512)
+#define PGD_SHIFT (39)
+#define PUD_SHIFT (30)
#define PMD_SHIFT (21)
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+#define PTRS_PER_PUD (512)
#define PTRS_PER_PMD (512)
#define PTRS_PER_PTE (512)
#define PTE_SHIFT (12)
#define pml4_index(address) (((address) >> PML4_SHIFT) & (PTRS_PER_PML4 - 1))
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#define pgd4_index(address) (((address) >> PGD_SHIFT) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pte_index(address) (((address) >> PTE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -742,7 +751,6 @@ do { \
/*
* 4 Levels paging
*/
-#define PUD_SHIFT (PMD_SHIFT + PTRS_PER_PTD_SHIFT)
#define PGDIR_SHIFT_4L (PUD_SHIFT + PTRS_PER_PTD_SHIFT)
#define MASK_PUD ((1UL << REGION_SHIFT) - 1) & (~((1UL << PUD_SHIFT) - 1))
@@ -1546,6 +1554,52 @@ struct srcfile_table {
char pud_t[LEN_SRCFILE];
};
+/*
+ * This structure records where the vmemmap page structures reside, and which
+ * pfn's are represented by those page structures.
+ * The actual pages containing the page structures are 2MB pages, so their pfn's
+ * will all be multiples of 0x200.
+ * The page structures are 7 64-bit words in length (0x38) so they overlap the
+ * 2MB boundaries. Each page structure represents a 4k page.
+ * A 4k page is here defined to be represented on a 2MB page if its page structure
+ * 'ends' on that page (even if it began on the page before).
+ */
+struct vmap_pfns {
+ struct vmap_pfns *next;
+ struct vmap_pfns *prev;
+ /*
+ * These (start/end) are literal pfns of 2MB pages on which the page
+ * structures reside, not start and end+1.
+ */
+ unsigned long vmap_pfn_start;
+ unsigned long vmap_pfn_end;
+ /*
+ * These (start/end) are literal pfns represented on these pages, not
+ * start and end+1.
+ * The starting page struct is at least partly on the first page; the
+ * ending page struct is entirely on the last page.
+ */
+ unsigned long rep_pfn_start;
+ unsigned long rep_pfn_end;
+};
+
+/* for saving a list of pfns to a buffer, and then to a file if necessary */
+struct save_control {
+ int sc_fd;
+ char *sc_filename;
+ char *sc_buf_malloced;
+ char *sc_buf;
+ long sc_buflen; /* length of buffer never changes */
+ long sc_bufposition; /* offset of next slot for write, or next to be read */
+ long sc_filelen; /* length of valid data written */
+ long sc_fileposition; /* offset in file of next entry to be read */
+};
+/* one entry in the buffer and file */
+struct sc_entry {
+ unsigned long startpfn;
+ unsigned long numpfns;
+};
+
extern struct symbol_table symbol_table;
extern struct size_table size_table;
extern struct offset_table offset_table;
@@ -1700,6 +1754,9 @@ int get_xen_info_ia64(void);
#define get_xen_info_arch(X) FALSE
#endif /* s390x */
+#define PAGESHFT 12 /* assuming a 4k page */
+#define PSE 128 /* bit 7 */
+
struct cycle {
mdf_pfn_t start_pfn;
mdf_pfn_t end_pfn;
Index: makedumpfile-1.5.7/makedumpfile.c
===================================================================
--- makedumpfile-1.5.7.orig/makedumpfile.c
+++ makedumpfile-1.5.7/makedumpfile.c
@@ -31,9 +31,12 @@ struct offset_table offset_table;
struct array_table array_table;
struct number_table number_table;
struct srcfile_table srcfile_table;
+struct save_control sc;
struct vm_table vt = { 0 };
struct DumpInfo *info = NULL;
+struct vmap_pfns *gvmem_pfns;
+int nr_gvmem_pfns;
char filename_stdout[] = FILENAME_STDOUT;
@@ -84,6 +87,7 @@ long blocksize;
int retcd = FAILED; /* return code */
// jflag is rawio on the dumpfile and bitmap file
int jflag = 0;
+int eflag = 0;
#define INITIALIZE_LONG_TABLE(table, value) \
do { \
@@ -5131,6 +5135,340 @@ copy_bitmap(void)
return TRUE;
}
+/*
+ * Given a range of unused pfn's, check whether we can drop the vmemmap pages
+ * that represent them.
+ * (pfn ranges are literally start and end, not start and end+1)
+ * see the array of vmemmap pfns and the pfns then represent: gvmem_pfns
+ * Return 1 for delete, 0 for not to delete.
+ */
+int
+find_vmemmap_pages(unsigned long startpfn, unsigned long endpfn, unsigned long *vmappfn,
+ unsigned long *nmapnpfns)
+{
+ int i;
+ long npfns_offset, vmemmap_offset, vmemmap_pfns, start_vmemmap_pfn;
+ long npages, end_vmemmap_pfn;
+ struct vmap_pfns *vmapp;
+ int pagesize = info->page_size;
+
+ for (i = 0; i < nr_gvmem_pfns; i++) {
+ vmapp = gvmem_pfns + i;
+ if ((startpfn >= vmapp->rep_pfn_start) &&
+ (endpfn <= vmapp->rep_pfn_end)) {
+ npfns_offset = startpfn - vmapp->rep_pfn_start;
+ vmemmap_offset = npfns_offset * size_table.page;
+ // round up to a page boundary
+ if (vmemmap_offset % pagesize)
+ vmemmap_offset += (pagesize - (vmemmap_offset % pagesize));
+ vmemmap_pfns = vmemmap_offset / pagesize;
+ start_vmemmap_pfn = vmapp->vmap_pfn_start + vmemmap_pfns;
+ *vmappfn = start_vmemmap_pfn;
+
+ npfns_offset = endpfn - vmapp->rep_pfn_start;
+ vmemmap_offset = npfns_offset * size_table.page;
+ // round down to page boundary
+ vmemmap_offset -= (vmemmap_offset % pagesize);
+ vmemmap_pfns = vmemmap_offset / pagesize;
+ end_vmemmap_pfn = vmapp->vmap_pfn_start + vmemmap_pfns;
+ npages = end_vmemmap_pfn - start_vmemmap_pfn;
+ if (npages == 0)
+ return 0;
+ *nmapnpfns = npages;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Finalize the structure for saving pfn's to be deleted.
+ */
+void
+finalize_save_control()
+{
+ free(sc.sc_buf_malloced);
+ close(sc.sc_fd);
+ return;
+}
+
+/*
+ * Reset the structure for saving pfn's to be deleted so that it can be read
+ */
+void
+reset_save_control()
+{
+ int i;
+ if (sc.sc_bufposition == 0)
+ return;
+
+ /* direct i/o, so have to write the whole buffer */
+ i = write(sc.sc_fd, sc.sc_buf, sc.sc_buflen);
+ if (i != sc.sc_buflen) {
+ fprintf(stderr, "reset: Can't write a page to %s\n",
+ sc.sc_filename);
+ exit(1);
+ }
+ sc.sc_filelen += sc.sc_bufposition;
+
+ if (lseek(sc.sc_fd, 0, SEEK_SET) < 0) {
+ fprintf(stderr, "Can't seek the pfn file %s).", sc.sc_filename);
+ exit(1);
+ }
+ sc.sc_fileposition = 0;
+ sc.sc_bufposition = sc.sc_buflen; /* trigger 1st read */
+ return;
+}
+
+/*
+ * Initialize the structure for saving pfn's to be deleted.
+ */
+void
+init_save_control()
+{
+ int flags, len;
+ char *filename, *cp;
+
+ filename = malloc(50);
+ *filename = '\0';
+ /* for the crash kernel environment use the prefix of
+ the dump name e.g. /mnt//var/.... */
+ if (!strchr(info->name_dumpfile,'v')) {
+ printf("no /var found in name_dumpfile %s\n", info->name_dumpfile);
+ exit(1);
+ } else {
+ cp = strchr(info->name_dumpfile,'v');
+ if (strncmp(cp-1, "/var", 4)) {
+ printf("no /var found in name_dumpfile %s\n", info->name_dumpfile);
+ exit(1);
+ }
+ }
+ len = cp - info->name_dumpfile - 1;
+ strncpy(filename, info->name_dumpfile, len);
+ if (*(filename + len - 1) == '/')
+ len -= 1;
+ *(filename + len) = '\0';
+ strcat(filename, "/");
+ strcat(filename, "makedumpfilepfns");
+ sc.sc_filename = filename;
+ flags = O_RDWR|O_CREAT|O_TRUNC|O_DIRECT;
+ if ((sc.sc_fd = open(sc.sc_filename, flags, S_IRUSR|S_IWUSR)) < 0) {
+ fprintf(stderr, "Can't open the pfn file %s.\n",
+ sc.sc_filename);
+ exit(1);
+ }
+ unlink(sc.sc_filename);
+
+ sc.sc_buf_malloced = malloc(blocksize + DIRECT_ALIGN);
+ if (!sc.sc_buf_malloced) {
+ fprintf(stderr, "Can't allocate a page for pfn buf.\n");
+ exit(1);
+ }
+ /* round down to a block boundary */
+ sc.sc_buf = sc.sc_buf_malloced -
+ ((unsigned long)sc.sc_buf_malloced % DIRECT_ALIGN) + DIRECT_ALIGN;
+ sc.sc_buflen = blocksize;
+ sc.sc_bufposition = 0;
+ sc.sc_fileposition = 0;
+ sc.sc_filelen = 0;
+}
+
+/*
+ * Save a starting pfn and number of pfns for later delete from bitmap.
+ */
+void
+save_deletes(unsigned long startpfn, unsigned long numpfns)
+{
+ int i;
+ struct sc_entry *scp;
+
+ if (sc.sc_bufposition == sc.sc_buflen) {
+ i = write(sc.sc_fd, sc.sc_buf, sc.sc_buflen);
+ if (i != sc.sc_buflen) {
+ fprintf(stderr, "save: Can't write a page to %s\n",
+ sc.sc_filename);
+ exit(1);
+ }
+ sc.sc_filelen += sc.sc_buflen;
+ sc.sc_bufposition = 0;
+ }
+ scp = (struct sc_entry *)(sc.sc_buf + sc.sc_bufposition);
+ scp->startpfn = startpfn;
+ scp->numpfns = numpfns;
+ sc.sc_bufposition += sizeof(struct sc_entry);
+}
+
+/*
+ * Get a starting pfn and number of pfns for delete from bitmap.
+ * Return 0 for success, 1 for 'no more'
+ */
+int
+get_deletes(unsigned long *startpfn, unsigned long *numpfns)
+{
+ int i;
+ struct sc_entry *scp;
+
+ if (sc.sc_fileposition >= sc.sc_filelen) {
+ return 1;
+ }
+
+ if (sc.sc_bufposition == sc.sc_buflen) {
+ i = read(sc.sc_fd, sc.sc_buf, sc.sc_buflen);
+ if (i <= 0) {
+ fprintf(stderr, "Can't read a page from %s.\n", sc.sc_filename);
+ exit(1);
+ }
+ sc.sc_bufposition = 0;
+ }
+ scp = (struct sc_entry *)(sc.sc_buf + sc.sc_bufposition);
+ *startpfn = scp->startpfn;
+ *numpfns = scp->numpfns;
+ sc.sc_bufposition += sizeof(struct sc_entry);
+ sc.sc_fileposition += sizeof(struct sc_entry);
+ return 0;
+}
+
+/*
+ * Find the big holes in bitmap2; they represent ranges for which
+ * we do not need page structures.
+ * Bitmap1 is a map of dumpable (i.e existing) pages.
+ * They must only be pages that exist, so they will be 0 bits
+ * in the 2nd bitmap but 1 bits in the 1st bitmap.
+ * For speed, only worry about whole word full of bits.
+ */
+void
+find_unused_vmemmap_pages(void)
+{
+ struct dump_bitmap *bitmap1 = info->bitmap1;
+ struct dump_bitmap *bitmap2 = info->bitmap2;
+ unsigned long long pfn;
+ unsigned long *lp1, *lp2, startpfn, endpfn;
+ unsigned long vmapstartpfn, vmapnumpfns;
+ int i, sz, numpages=0, did_deletes;
+ int startword, numwords, do_break=0;
+ long deleted_pages = 0;
+ off_t new_offset1, new_offset2;
+
+ /* read each block of both bitmaps */
+ for (pfn = 0; pfn < info->max_mapnr; pfn += PFN_BUFBITMAP) { /* size in bits */
+ numpages++;
+ did_deletes = 0;
+ new_offset1 = bitmap1->offset + BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
+ if (lseek(bitmap1->fd, new_offset1, SEEK_SET) < 0 ) {
+ ERRMSG("Can't seek the bitmap(%s). %s\n",
+ bitmap1->file_name, strerror(errno));
+ return;
+ }
+ if (read(bitmap1->fd, bitmap1->buf, BUFSIZE_BITMAP) != BUFSIZE_BITMAP) {
+ ERRMSG("Can't read the bitmap(%s). %s\n",
+ bitmap1->file_name, strerror(errno));
+ return;
+ }
+ bitmap1->no_block = pfn / PFN_BUFBITMAP;
+
+ new_offset2 = bitmap2->offset + BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
+ if (lseek(bitmap2->fd, new_offset2, SEEK_SET) < 0 ) {
+ ERRMSG("Can't seek the bitmap(%s). %s\n",
+ bitmap2->file_name, strerror(errno));
+ return;
+ }
+ if (read(bitmap2->fd, bitmap2->buf, BUFSIZE_BITMAP) != BUFSIZE_BITMAP) {
+ ERRMSG("Can't read the bitmap(%s). %s\n",
+ bitmap2->file_name, strerror(errno));
+ return;
+ }
+ bitmap2->no_block = pfn / PFN_BUFBITMAP;
+
+ /* process this one page of both bitmaps at a time */
+ lp1 = (unsigned long *)bitmap1->buf;
+ lp2 = (unsigned long *)bitmap2->buf;
+ /* sz is words in the block */
+ sz = BUFSIZE_BITMAP / sizeof(unsigned long);
+ startword = -1;
+ for (i = 0; i < sz; i++, lp1++, lp2++) {
+ /* for each whole word in the block */
+ /* deal in full 64-page chunks only */
+ if (*lp1 == 0xffffffffffffffffUL) {
+ if (*lp2 == 0) {
+ /* we are in a series we want */
+ if (startword == -1) {
+ /* starting a new group */
+ startword = i;
+ }
+ } else {
+ /* we hit a used page */
+ if (startword >= 0)
+ do_break = 1;
+ }
+ } else {
+ /* we hit a hole in real memory, or part of one */
+ if (startword >= 0)
+ do_break = 1;
+ }
+ if (do_break) {
+ do_break = 0;
+ if (startword >= 0) {
+ numwords = i - startword;
+ /* 64 bits represents 64 page structs, which
+ are not even one page of them (takes
+ at least 73) */
+ if (numwords > 1) {
+ startpfn = pfn +
+ (startword * BITS_PER_WORD);
+ /* pfn ranges are literally start and end,
+ not start and end + 1 */
+ endpfn = startpfn +
+ (numwords * BITS_PER_WORD) - 1;
+ if (find_vmemmap_pages(startpfn, endpfn,
+ &vmapstartpfn, &vmapnumpfns)) {
+ save_deletes(vmapstartpfn,
+ vmapnumpfns);
+ deleted_pages += vmapnumpfns;
+ did_deletes = 1;
+ }
+ }
+ }
+ startword = -1;
+ }
+ }
+ if (startword >= 0) {
+ numwords = i - startword;
+ if (numwords > 1) {
+ startpfn = pfn + (startword * BITS_PER_WORD);
+ /* pfn ranges are literally start and end,
+ not start and end + 1 */
+ endpfn = startpfn + (numwords * BITS_PER_WORD) - 1;
+ if (find_vmemmap_pages(startpfn, endpfn,
+ &vmapstartpfn, &vmapnumpfns)) {
+ save_deletes(vmapstartpfn, vmapnumpfns);
+ deleted_pages += vmapnumpfns;
+ did_deletes = 1;
+ }
+ }
+ }
+ }
+ PROGRESS_MSG("\nExcluded %ld unused vmemmap pages\n", deleted_pages);
+
+ return;
+}
+
+/*
+ * Retrieve the list of pfn's and delete them from bitmap2;
+ */
+void
+delete_unused_vmemmap_pages(void)
+{
+ unsigned long startpfn, numpfns, pfn, i;
+
+ while (!get_deletes(&startpfn, &numpfns)) {
+ for (i = 0, pfn = startpfn; i < numpfns; i++, pfn++) {
+ clear_bit_on_2nd_bitmap_for_kernel(pfn, (struct cycle *)0);
+ // note that this is never to be used in cyclic mode!
+ }
+ }
+ return;
+}
+
int
create_2nd_bitmap(void)
{
@@ -5202,6 +5540,15 @@ create_2nd_bitmap(void)
if (!sync_2nd_bitmap())
return FALSE;
+ /* -e means exclude vmemmap page structures for unused pages */
+ if (eflag) {
+ init_save_control();
+ find_unused_vmemmap_pages();
+ reset_save_control();
+ delete_unused_vmemmap_pages();
+ finalize_save_control();
+ }
+
return TRUE;
}
@@ -5719,6 +6066,11 @@ write_kdump_header(struct cache_data *cd
memcpy(&dh->timestamp, &info->timestamp, sizeof(dh->timestamp));
memcpy(&dh->utsname, &info->system_utsname, sizeof(dh->utsname));
blocksize = dh->block_size;
+
+ if (eflag) {
+ dh->status |= DUMP_DH_EXCLUDED_VMEMMAP;
+ }
+
if (info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
dh->status |= DUMP_DH_COMPRESSED_ZLIB;
#ifdef USELZO
@@ -8436,6 +8788,315 @@ writeout_multiple_dumpfiles(void)
return ret;
}
+/*
+ * Scan the kernel page table for the pfn's of the page structs
+ * Place them in array gvmem_pfns[nr_gvmem_pfns]
+ */
+void
+find_vmemmap()
+{
+ int i, verbose = 0;
+ int pgd_index, pud_index;
+ int start_range = 1;
+ int num_pmds=0, num_pmds_valid=0;
+ int break_in_valids, break_after_invalids;
+ int do_break, done = 0;
+ int last_valid=0, last_invalid=0;
+ int pagestructsize, structsperhpage, hugepagesize;
+ long page_structs_per_pud;
+ long num_puds, groups = 0;
+ long pgdindex, pudindex, pmdindex;
+ long vaddr, vaddr_base;
+ long rep_pfn_start = 0, rep_pfn_end = 0;
+ unsigned long init_level4_pgt;
+ unsigned long max_paddr, high_pfn;
+ unsigned long pgd_addr, pud_addr, pmd_addr;
+ unsigned long *pgdp, *pudp, *pmdp;
+ unsigned long pud_page[PTRS_PER_PUD];
+ unsigned long pmd_page[PTRS_PER_PMD];
+ unsigned long vmap_offset_start = 0, vmap_offset_end = 0;
+ unsigned long pmd, tpfn;
+ unsigned long pvaddr = 0;
+ unsigned long data_addr = 0, last_data_addr = 0, start_data_addr = 0;
+ /*
+ * data_addr is the paddr of the page holding the page structs.
+ * We keep lists of contiguous pages and the pfn's that their
+ * page structs represent.
+ * start_data_addr and last_data_addr mark start/end of those
+ * contiguous areas.
+ * An area descriptor is vmap start/end pfn and rep start/end
+ * of the pfn's represented by the vmap start/end.
+ */
+ struct vmap_pfns *vmapp, *vmaphead = NULL, *cur, *tail;
+
+ init_level4_pgt = SYMBOL(init_level4_pgt);
+ if (init_level4_pgt == NOT_FOUND_SYMBOL) {
+ fprintf(stderr, "init_level4_pgt not found\n");
+ return;
+ }
+ pagestructsize = size_table.page;
+ hugepagesize = PTRS_PER_PMD * info->page_size;
+ vaddr_base = info->vmemmap_start;
+ vaddr = vaddr_base;
+ max_paddr = get_max_paddr();
+ /*
+ * the page structures are mapped at VMEMMAP_START (info->vmemmap_start)
+ * for max_paddr >> 12 page structures
+ */
+ high_pfn = max_paddr >> 12;
+ pgd_index = pgd4_index(vaddr_base);
+ pud_index = pud_index(vaddr_base);
+ pgd_addr = vaddr_to_paddr(init_level4_pgt); /* address of pgd */
+ pgd_addr += pgd_index * sizeof(unsigned long);
+ page_structs_per_pud = (PTRS_PER_PUD * PTRS_PER_PMD * info->page_size) /
+ pagestructsize;
+ num_puds = (high_pfn + page_structs_per_pud - 1) / page_structs_per_pud;
+ pvaddr = VMEMMAP_START;
+ structsperhpage = hugepagesize / pagestructsize;
+
+ /* outer loop is for pud entries in the pgd */
+ for (pgdindex = 0, pgdp = (unsigned long *)pgd_addr; pgdindex < num_puds;
+ pgdindex++, pgdp++) {
+ /* read the pgd one word at a time, into pud_addr */
+ if (!readmem(PADDR, (unsigned long long)pgdp, (void *)&pud_addr,
+ sizeof(unsigned long))) {
+ ERRMSG("Can't get pgd entry for slot %d.\n", pgd_index);
+ return;
+ }
+ /* mask the pgd entry for the address of the pud page */
+ pud_addr &= PMASK;
+ /* read the entire pud page */
+ if (!readmem(PADDR, (unsigned long long)pud_addr, (void *)pud_page,
+ PTRS_PER_PUD * sizeof(unsigned long))) {
+ ERRMSG("Can't get pud entry for pgd slot %ld.\n", pgdindex);
+ return;
+ }
+ /* step thru each pmd address in the pud page */
+ /* pudp points to an entry in the pud page */
+ for (pudp = (unsigned long *)pud_page, pudindex = 0;
+ pudindex < PTRS_PER_PUD; pudindex++, pudp++) {
+ pmd_addr = *pudp & PMASK;
+ /* read the entire pmd page */
+ if (!readmem(PADDR, pmd_addr, (void *)pmd_page,
+ PTRS_PER_PMD * sizeof(unsigned long))) {
+ ERRMSG("Can't get pud entry for slot %ld.\n", pudindex);
+ return;
+ }
+ /* pmdp points to an entry in the pmd */
+ for (pmdp = (unsigned long *)pmd_page, pmdindex = 0;
+ pmdindex < PTRS_PER_PMD; pmdindex++, pmdp++) {
+ /* linear page position in this page table: */
+ pmd = *pmdp;
+ num_pmds++;
+ tpfn = (pvaddr - VMEMMAP_START) /
+ pagestructsize;
+ if (tpfn >= high_pfn) {
+ done = 1;
+ break;
+ }
+ /*
+ * vmap_offset_start:
+ * Starting logical position in the
+ * vmemmap array for the group stays
+ * constant until a hole in the table
+ * or a break in contiguousness.
+ */
+
+ /*
+ * Ending logical position in the
+ * vmemmap array:
+ */
+ vmap_offset_end += hugepagesize;
+ do_break = 0;
+ break_in_valids = 0;
+ break_after_invalids = 0;
+ /*
+ * We want breaks either when:
+ * - we hit a hole (invalid)
+ * - we discontiguous page is a string of valids
+ */
+ if (pmd) {
+ data_addr = (pmd & PMASK);
+ if (start_range) {
+ /* first-time kludge */
+ start_data_addr = data_addr;
+ last_data_addr = start_data_addr
+ - hugepagesize;
+ start_range = 0;
+ }
+ if (last_invalid) {
+ /* end of a hole */
+ start_data_addr = data_addr;
+ last_data_addr = start_data_addr
+ - hugepagesize;
+ /* trigger update of offset */
+ do_break = 1;
+ }
+ last_valid = 1;
+ last_invalid = 0;
+ /*
+ * we have it a gap in physical
+ * contiguousness in the table.
+ */
+ /* ?? consecutive holes will have
+ same data_addr */
+ if (data_addr !=
+ last_data_addr + hugepagesize) {
+ do_break = 1;
+ break_in_valids = 1;
+ }
+ if (verbose)
+ printf("valid: pud %ld pmd %ld pfn %#lx"
+ " pvaddr %#lx pfns %#lx-%lx"
+ " start %#lx end %#lx\n",
+ pudindex, pmdindex,
+ data_addr >> 12,
+ pvaddr, tpfn,
+ tpfn + structsperhpage - 1,
+ vmap_offset_start,
+ vmap_offset_end);
+ num_pmds_valid++;
+ if (!(pmd & PSE)) {
+ printf("vmemmap pmd not huge, abort\n");
+ exit(1);
+ }
+ } else {
+ if (last_valid) {
+ /* this a hole after some valids */
+ do_break = 1;
+ break_in_valids = 1;
+ break_after_invalids = 0;
+ }
+ last_valid = 0;
+ last_invalid = 1;
+ /*
+ * There are holes in this sparsely
+ * populated table; they are 2MB gaps
+ * represented by null pmd entries.
+ */
+ if (verbose)
+ printf("invalid: pud %ld pmd %ld %#lx"
+ " pfns %#lx-%lx start %#lx end"
+ " %#lx\n", pudindex, pmdindex,
+ pvaddr, tpfn,
+ tpfn + structsperhpage - 1,
+ vmap_offset_start,
+ vmap_offset_end);
+ }
+ if (do_break) {
+ /* The end of a hole is not summarized.
+ * It must be the start of a hole or
+ * hitting a discontiguous series.
+ */
+ if (break_in_valids || break_after_invalids) {
+ /*
+ * calculate that pfns
+ * represented by the current
+ * offset in the vmemmap.
+ */
+ /* page struct even partly on this page */
+ rep_pfn_start = vmap_offset_start /
+ pagestructsize;
+ /* ending page struct entirely on
+ this page */
+ rep_pfn_end = ((vmap_offset_end -
+ hugepagesize) / pagestructsize);
+ if (verbose)
+ printf("vmap pfns %#lx-%lx "
+ "represent pfns %#lx-%lx\n\n",
+ start_data_addr >> PAGESHFT,
+ last_data_addr >> PAGESHFT,
+ rep_pfn_start, rep_pfn_end);
+ groups++;
+ vmapp = (struct vmap_pfns *)malloc(
+ sizeof(struct vmap_pfns));
+ /* pfn of this 2MB page of page structs */
+ vmapp->vmap_pfn_start = start_data_addr
+ >> PTE_SHIFT;
+ vmapp->vmap_pfn_end = last_data_addr
+ >> PTE_SHIFT;
+ /* these (start/end) are literal pfns
+ * on this page, not start and end+1 */
+ vmapp->rep_pfn_start = rep_pfn_start;
+ vmapp->rep_pfn_end = rep_pfn_end;
+
+ if (!vmaphead) {
+ vmaphead = vmapp;
+ vmapp->next = vmapp;
+ vmapp->prev = vmapp;
+ } else {
+ tail = vmaphead->prev;
+ vmaphead->prev = vmapp;
+ tail->next = vmapp;
+ vmapp->next = vmaphead;
+ vmapp->prev = tail;
+ }
+ }
+
+ /* update logical position at every break */
+ vmap_offset_start =
+ vmap_offset_end - hugepagesize;
+ start_data_addr = data_addr;
+ }
+
+ last_data_addr = data_addr;
+ pvaddr += hugepagesize;
+ /*
+ * pvaddr is current virtual address
+ * eg 0xffffea0004200000 if
+ * vmap_offset_start is 4200000
+ */
+ }
+ }
+ tpfn = (pvaddr - VMEMMAP_START) / pagestructsize;
+ if (tpfn >= high_pfn) {
+ done = 1;
+ break;
+ }
+ }
+ rep_pfn_start = vmap_offset_start / pagestructsize;
+ rep_pfn_end = (vmap_offset_end - hugepagesize) / pagestructsize;
+ if (verbose)
+ printf("vmap pfns %#lx-%lx represent pfns %#lx-%lx\n\n",
+ start_data_addr >> PAGESHFT, last_data_addr >> PAGESHFT,
+ rep_pfn_start, rep_pfn_end);
+ groups++;
+ vmapp = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns));
+ vmapp->vmap_pfn_start = start_data_addr >> PTE_SHIFT;
+ vmapp->vmap_pfn_end = last_data_addr >> PTE_SHIFT;
+ vmapp->rep_pfn_start = rep_pfn_start;
+ vmapp->rep_pfn_end = rep_pfn_end;
+ if (!vmaphead) {
+ vmaphead = vmapp;
+ vmapp->next = vmapp;
+ vmapp->prev = vmapp;
+ } else {
+ tail = vmaphead->prev;
+ vmaphead->prev = vmapp;
+ tail->next = vmapp;
+ vmapp->next = vmaphead;
+ vmapp->prev = tail;
+ }
+ if (verbose)
+ printf("num_pmds: %d num_pmds_valid %d\n", num_pmds, num_pmds_valid);
+
+ /* transfer the linked list to an array */
+ cur = vmaphead;
+ gvmem_pfns = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns) * groups);
+ i = 0;
+ do {
+ vmapp = gvmem_pfns + i;
+ vmapp->vmap_pfn_start = cur->vmap_pfn_start;
+ vmapp->vmap_pfn_end = cur->vmap_pfn_end;
+ vmapp->rep_pfn_start = cur->rep_pfn_start;
+ vmapp->rep_pfn_end = cur->rep_pfn_end;
+ cur = cur->next;
+ free(cur->prev);
+ i++;
+ } while (cur != vmaphead);
+ nr_gvmem_pfns = i;
+}
+
int
create_dumpfile(void)
{
@@ -8454,6 +9115,10 @@ create_dumpfile(void)
if (!initial())
return FALSE;
+ /* create an array of translations from pfn to vmemmap pages */
+ if (eflag)
+ find_vmemmap();
+
print_vtop();
if (jflag)
@@ -9634,7 +10299,7 @@ main(int argc, char *argv[])
info->block_order = DEFAULT_ORDER;
message_level = DEFAULT_MSG_LEVEL;
- while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:jlpRvXx:", longopts,
+ while ((opt = getopt_long(argc, argv, "b:cDd:eEFfg:hi:jlpRvXx:", longopts,
NULL)) != -1) {
switch (opt) {
case OPT_BLOCK_ORDER:
@@ -9682,6 +10347,10 @@ main(int argc, char *argv[])
jflag = 1;
info->flag_cyclic = FALSE; // saving memory to avoid cyclic
break;
+ case 'e':
+ eflag = 1;
+ /* exclude unused vmemmap pages */
+ break;
case OPT_DISKSET:
if (!sadump_add_diskset_info(optarg))
goto out;
More information about the kexec
mailing list