[PATCH 3/3] makedumpfile: Rewrite readpage_elf

Petr Tesarik ptesarik at suse.cz
Tue Feb 9 23:50:09 PST 2016


The current code in readpage_elf (and readpage_elf_parallel) is extremely
hard to follow. Additionally, it still does not cover all possible cases.
For example, attempts to read outside of any ELF segment will end up with
phys_start being 0, frac_head a negative number, interpreted as a large
positive number by memset() and write past buffer end.

Instead of trying to handle even more "corner cases", I rewrote the
algorithm from scratch. The basic idea is simple: set a goal to fill the
page buffer with data, then work towards that goal by:

  - filling holes with zeroes (see Note below),
  - p_filesz portions with file data and
  - remaining p_memsz portions again with zeroes.

Repeat this method for each LOAD until the goal is achieved, or an error
occurs. In most cases, the loop runs only once.

Note: A "hole" is data at a physical address that is not covered by any
ELF LOAD program header. In other words, the ELF file does not specify
any data for such a hole (not even zeroes). So, why does makedumpfile
fill them with zeroes? It's because makedumpfile works with page
granularity (the compressed format does not even have a way to store
a partial page), so if only part of a page is stored, a complete page
must be provided to make this partial data accessible.

Credits to Ivan Delalande <colona at arista.com> who first found the
problem and wrote the original fix.

Signed-off-by: Petr Tesarik <ptesarik at suse.com>

---
 elf_info.c     |   28 +++++++
 elf_info.h     |    1 
 makedumpfile.c |  208 ++++++++++++++++++++++-----------------------------------
 3 files changed, 110 insertions(+), 127 deletions(-)

--- a/elf_info.c
+++ b/elf_info.c
@@ -691,6 +691,34 @@ get_max_paddr(void)
 	return max_paddr;
 }
 
+/*
+ * Find the LOAD segment which is closest to the requested
+ * physical address within a given distance.
+ *  If there is no such segment, return a negative number.
+ */
+int
+closest_pt_load(unsigned long long paddr, unsigned long distance)
+{
+	int i, bestidx;
+	struct pt_load_segment *pls;
+	unsigned long bestdist;
+
+	bestdist = distance;
+	bestidx = -1;
+	for (i = 0; i < num_pt_loads; ++i) {
+		pls = &pt_loads[i];
+		if (paddr >= pls->phys_end)
+			continue;
+		if (paddr >= pls->phys_start)
+			return i;	/* Exact match */
+		if (bestdist > pls->phys_start - paddr) {
+			bestdist = pls->phys_start - paddr;
+			bestidx = i;
+		}
+	}
+	return bestidx;
+}
+
 int
 get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr)
 {
--- a/elf_info.h
+++ b/elf_info.h
@@ -39,6 +39,7 @@ off_t offset_to_pt_load_end(off_t offset
 unsigned long long vaddr_to_paddr_general(unsigned long long vaddr);
 off_t vaddr_to_offset_slow(int fd, char *filename, unsigned long long vaddr);
 unsigned long long get_max_paddr(void);
+int closest_pt_load(unsigned long long paddr, unsigned long distance);
 
 int page_is_fractional(off_t page_offset);
 
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -648,73 +648,51 @@ read_from_vmcore_parallel(int fd_memory,
 static int
 readpage_elf(unsigned long long paddr, void *bufptr)
 {
-	off_t offset1, offset2;
-	size_t size1, size2;
-	unsigned long long phys_start, phys_end, frac_head = 0;
-
-	offset1 = paddr_to_offset(paddr);
-	offset2 = paddr_to_offset(paddr + info->page_size);
-	phys_start = paddr;
-	phys_end = paddr + info->page_size;
-
-	/*
-	 * Check the case phys_start isn't aligned by page size like below:
-	 *
-	 *                           phys_start
-	 *                           = 0x40ffda7000
-	 *         |<-- frac_head -->|------------- PT_LOAD -------------
-	 *     ----+-----------------------+---------------------+----
-	 *         |         pfn:N         |       pfn:N+1       | ...
-	 *     ----+-----------------------+---------------------+----
-	 *         |
-	 *     pfn_to_paddr(pfn:N)               # page size = 16k
-	 *     = 0x40ffda4000
-	 */
-	if (!offset1) {
-		phys_start = page_head_to_phys_start(paddr);
-		offset1 = paddr_to_offset(phys_start);
-		frac_head = phys_start - paddr;
-		memset(bufptr, 0, frac_head);
-	}
-
-	/*
-	 * Check the case phys_end isn't aligned by page size like the
-	 * phys_start's case.
-	 */
-	if (!offset2) {
-		phys_end = page_head_to_phys_end(paddr);
-		offset2 = paddr_to_offset(phys_end);
-		memset(bufptr + (phys_end - paddr), 0, info->page_size - (phys_end - paddr));
-	}
+	int idx;
+	off_t offset, size;
+	void *p, *endp;
+	unsigned long long phys_start, phys_end;
+
+	p = bufptr;
+	endp = p + info->page_size;
+	while (p < endp) {
+		idx = closest_pt_load(paddr + (p - bufptr), endp - p);
+		if (idx < 0)
+			break;
+
+		get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size);
+		if (phys_start > paddr + (p - bufptr)) {
+			memset(p, 0, phys_start - paddr);
+			p += phys_start - paddr;
+		}
 
-	/*
-	 * Check the separated page on different PT_LOAD segments.
-	 */
-	if (offset1 + (phys_end - phys_start) == offset2) {
-		size1 = phys_end - phys_start;
-	} else {
-		for (size1 = 1; size1 < info->page_size - frac_head; size1++) {
-			offset2 = paddr_to_offset(phys_start + size1);
-			if (offset1 + size1 != offset2)
-				break;
+		offset += paddr - phys_start;
+		if (size > paddr - phys_start) {
+			size -= paddr - phys_start;
+			if (size > endp - p)
+				size = endp - p;
+			if (!read_from_vmcore(offset, p, size)) {
+				ERRMSG("Can't read the dump memory(%s).\n",
+				       info->name_memory);
+				return FALSE;
+			}
+			p += size;
+		}
+		if (p < endp) {
+			size = phys_end - paddr;
+			if (size > endp - p)
+				size = endp - p;
+			memset(p, 0, size);
+			p += size;
 		}
 	}
 
-	if(!read_from_vmcore(offset1, bufptr + frac_head, size1)) {
-		ERRMSG("Can't read the dump memory(%s).\n",
-		       info->name_memory);
+	if (p == bufptr) {
+		ERRMSG("Attempt to read non-existent page at 0x%llx.\n",
+		       paddr);
 		return FALSE;
-	}
-
-	if (size1 + frac_head != info->page_size) {
-		size2 = phys_end - (phys_start + size1);
-
-		if(!read_from_vmcore(offset2, bufptr + frac_head + size1, size2)) {
-			ERRMSG("Can't read the dump memory(%s).\n",
-			       info->name_memory);
-			return FALSE;
-		}
-	}
+	} else if (p < bufptr)
+		memset(p, 0, endp - p);
 
 	return TRUE;
 }
@@ -722,76 +700,52 @@ readpage_elf(unsigned long long paddr, v
 static int
 readpage_elf_parallel(int fd_memory, unsigned long long paddr, void *bufptr)
 {
-	off_t offset1, offset2;
-	size_t size1, size2;
-	unsigned long long phys_start, phys_end, frac_head = 0;
-
-	offset1 = paddr_to_offset(paddr);
-	offset2 = paddr_to_offset(paddr + info->page_size);
-	phys_start = paddr;
-	phys_end = paddr + info->page_size;
-
-	/*
-	 * Check the case phys_start isn't aligned by page size like below:
-	 *
-	 *                           phys_start
-	 *                           = 0x40ffda7000
-	 *         |<-- frac_head -->|------------- PT_LOAD -------------
-	 *     ----+-----------------------+---------------------+----
-	 *         |         pfn:N         |       pfn:N+1       | ...
-	 *     ----+-----------------------+---------------------+----
-	 *         |
-	 *     pfn_to_paddr(pfn:N)               # page size = 16k
-	 *     = 0x40ffda4000
-	 */
-	if (!offset1) {
-		phys_start = page_head_to_phys_start(paddr);
-		offset1 = paddr_to_offset(phys_start);
-		frac_head = phys_start - paddr;
-		memset(bufptr, 0, frac_head);
-	}
-
-	/*
-	 * Check the case phys_end isn't aligned by page size like the
-	 * phys_start's case.
-	 */
-	if (!offset2) {
-		phys_end = page_head_to_phys_end(paddr);
-		offset2 = paddr_to_offset(phys_end);
-		memset(bufptr + (phys_end - paddr), 0, info->page_size
-							- (phys_end - paddr));
-	}
+	int idx;
+	off_t offset, size;
+	void *p, *endp;
+	unsigned long long phys_start, phys_end;
+
+	p = bufptr;
+	endp = p + info->page_size;
+	while (p < endp) {
+		idx = closest_pt_load(paddr + (p - bufptr), endp - p);
+		if (idx < 0)
+			break;
+
+		get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size);
+		if (phys_start > paddr + (p - bufptr)) {
+			memset(p, 0, phys_start - paddr);
+			p += phys_start - paddr;
+		}
 
-	/*
-	 * Check the separated page on different PT_LOAD segments.
-	 */
-	if (offset1 + (phys_end - phys_start) == offset2) {
-		size1 = phys_end - phys_start;
-	} else {
-		for (size1 = 1; size1 < info->page_size - frac_head; size1++) {
-			offset2 = paddr_to_offset(phys_start + size1);
-			if (offset1 + size1 != offset2)
-				break;
+		offset += paddr - phys_start;
+		if (size > paddr - phys_start) {
+			size -= paddr - phys_start;
+			if (size > endp - p)
+				size = endp - p;
+			if (!read_from_vmcore_parallel(fd_memory, offset, bufptr,
+						       size)) {
+				ERRMSG("Can't read the dump memory(%s).\n",
+				       info->name_memory);
+				return FALSE;
+			}
+			p += size;
+		}
+		if (p < endp) {
+			size = phys_end - paddr;
+			if (size > endp - p)
+				size = endp - p;
+			memset(p, 0, size);
+			p += size;
 		}
 	}
 
-	if(!read_from_vmcore_parallel(fd_memory, offset1, bufptr + frac_head,
-								size1)) {
-		ERRMSG("Can't read the dump memory(%s).\n",
-		       info->name_memory);
+	if (p == bufptr) {
+		ERRMSG("Attempt to read non-existent page at 0x%llx.\n",
+		       paddr);
 		return FALSE;
-	}
-
-	if (size1 + frac_head != info->page_size) {
-		size2 = phys_end - (phys_start + size1);
-
-		if(!read_from_vmcore_parallel(fd_memory, offset2,
-					bufptr + frac_head + size1, size2)) {
-			ERRMSG("Can't read the dump memory(%s).\n",
-			       info->name_memory);
-			return FALSE;
-		}
-	}
+	} else if (p < bufptr)
+		memset(p, 0, endp - p);
 
 	return TRUE;
 }



More information about the kexec mailing list