[RFC PATCH 1/3] makedumpfile/arm64: Add support for ARMv8.2-LVA (52-bit kernel VA support)

Alexander Kamensky alexander.kamensky42 at gmail.com
Sun Nov 22 23:37:56 EST 2020


From: Bhupesh Sharma <bhsharma at redhat.com>

With ARMv8.2-LVA architecture extension availability, arm64 hardware
which supports this extension can support upto 52-bit virtual
addresses. It is specially useful for having a 52-bit user-space virtual
address space while the kernel can still retain 48-bit/52-bit virtual
addressing.

Since at the moment we enable the support of this extension in the
kernel via a CONFIG flag (CONFIG_ARM64_VA_BITS_52), so there are
no clear mechanisms in user-space to determine this CONFIG
flag value and use it to determine the kernel-space VA address range
values.

'makedumpfile' can instead use 'TCR_EL1.T1SZ' value from vmcoreinfo
which indicates the size offset of the memory region addressed by
TTBR1_EL1 (and hence can be used for determining the
vabits_actual value).

Using the vmcoreinfo variable exported by kernel commit
 bbdbc11804ff ("arm64/crash_core: Export  TCR_EL1.T1SZ in vmcoreinfo"),
the user-space can use the following computation for determining whether
 an address lies in the linear map range (for newer kernels >= 5.4):

  #define __is_lm_address(addr)	(!(((u64)addr) & BIT(vabits_actual - 1)))

Note that for the --mem-usage case though we need to calculate
vabits_actual value before the vmcoreinfo read functionality is ready,
so we can instead read the architecture register ID_AA64MMFR2_EL1
directly to see if the underlying hardware supports 52-bit addressing
and accordingly set vabits_actual as:

   read_id_aa64mmfr2_el1();
   if (hardware supports 52-bit addressing)
	vabits_actual = 52;
   else
	vabits_actual = va_bits value calculated via _stext symbol;

Also make sure that the page_offset, is_linear_addr(addr) and __pa()
calculations work both for older (< 5.4) and newer kernels (>= 5.4).

I have tested several combinations with both kernel categories
[for e.g. with different VA (39, 42, 48 and 52-bit) and PA combinations
(48 and 52-bit)] on at-least 3 different boards.

Unfortunately, this means that we need to call 'populate_kernel_version()'
earlier 'get_page_offset_arm64()' as 'info->kernel_version' remains
uninitialized before its first use otherwise.

This patch is in accordance with ARMv8 Architecture Reference Manual

Cc: Kazuhito Hagio <k-hagio at ab.jp.nec.com>
Cc: John Donnelly <john.p.donnelly at oracle.com>
Cc: kexec at lists.infradead.org
Signed-off-by: Bhupesh Sharma <bhsharma at redhat.com>
---
 arch/arm64.c   | 233 ++++++++++++++++++++++++++++++++++++++++++-------
 common.h       |  10 +++
 makedumpfile.c |   4 +-
 makedumpfile.h |   6 +-
 4 files changed, 218 insertions(+), 35 deletions(-)

diff --git a/arch/arm64.c b/arch/arm64.c
index 3d7b416..7fd7039 100644
--- a/arch/arm64.c
+++ b/arch/arm64.c
@@ -19,10 +19,23 @@
 
 #ifdef __aarch64__
 
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
 #include "../elf_info.h"
 #include "../makedumpfile.h"
 #include "../print_info.h"
 
+/* ID_AA64MMFR2_EL1 related helpers: */
+#define ID_AA64MMFR2_LVA_SHIFT	16
+#define ID_AA64MMFR2_LVA_MASK	(0xf << ID_AA64MMFR2_LVA_SHIFT)
+
+/* CPU feature ID registers */
+#define get_cpu_ftr(id) ({							\
+		unsigned long __val;						\
+		asm volatile("mrs %0, " __stringify(id) : "=r" (__val));	\
+		__val;								\
+})
+
 typedef struct {
 	unsigned long pgd;
 } pgd_t;
@@ -47,6 +60,7 @@ typedef struct {
 static int lpa_52_bit_support_available;
 static int pgtable_level;
 static int va_bits;
+static int vabits_actual;
 static unsigned long kimage_voffset;
 
 #define SZ_4K			4096
@@ -58,7 +72,6 @@ static unsigned long kimage_voffset;
 #define PAGE_OFFSET_42		((0xffffffffffffffffUL) << 42)
 #define PAGE_OFFSET_47		((0xffffffffffffffffUL) << 47)
 #define PAGE_OFFSET_48		((0xffffffffffffffffUL) << 48)
-#define PAGE_OFFSET_52		((0xffffffffffffffffUL) << 52)
 
 #define pgd_val(x)		((x).pgd)
 #define pud_val(x)		(pgd_val((x).pgd))
@@ -218,13 +231,25 @@ pmd_page_paddr(pmd_t pmd)
 #define pte_index(vaddr)		(((vaddr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
 #define pte_offset(dir, vaddr)		(pmd_page_paddr((*dir)) + pte_index(vaddr) * sizeof(pte_t))
 
+/*
+ * The linear kernel range starts at the bottom of the virtual address
+ * space. Testing the top bit for the start of the region is a
+ * sufficient check and avoids having to worry about the tag.
+ */
+#define is_linear_addr(addr)	((info->kernel_version < KERNEL_VERSION(5, 4, 0)) ?	\
+	(!!((unsigned long)(addr) & (1UL << (vabits_actual - 1)))) : \
+	(!((unsigned long)(addr) & (1UL << (vabits_actual - 1)))))
+
 static unsigned long long
 __pa(unsigned long vaddr)
 {
 	if (kimage_voffset == NOT_FOUND_NUMBER ||
-			(vaddr >= PAGE_OFFSET))
-		return (vaddr - PAGE_OFFSET + info->phys_base);
-	else
+			is_linear_addr(vaddr)) {
+		if (info->kernel_version < KERNEL_VERSION(5, 4, 0))
+			return ((vaddr & ~PAGE_OFFSET) + info->phys_base);
+		else
+			return (vaddr + info->phys_base - PAGE_OFFSET);
+	} else
 		return (vaddr - kimage_voffset);
 }
 
@@ -253,6 +278,7 @@ static int calculate_plat_config(void)
 			(PAGESIZE() == SZ_64K && va_bits == 42)) {
 		pgtable_level = 2;
 	} else if ((PAGESIZE() == SZ_64K && va_bits == 48) ||
+			(PAGESIZE() == SZ_64K && va_bits == 52) ||
 			(PAGESIZE() == SZ_4K && va_bits == 39) ||
 			(PAGESIZE() == SZ_16K && va_bits == 47)) {
 		pgtable_level = 3;
@@ -287,8 +313,14 @@ get_phys_base_arm64(void)
 		return TRUE;
 	}
 
+	/* Ignore the 1st PT_LOAD */
 	if (get_num_pt_loads() && PAGE_OFFSET) {
-		for (i = 0;
+		/* Note that the following loop starts with i = 1.
+		 * This is required to make sure that the following logic
+		 * works both for old and newer kernels (with flipped
+		 * VA space, i.e. >= 5.4.0)
+		 */
+		for (i = 1;
 		    get_pt_load(i, &phys_start, NULL, &virt_start, NULL);
 		    i++) {
 			if (virt_start != NOT_KV_ADDR
@@ -345,6 +377,139 @@ get_stext_symbol(void)
 	return(found ? kallsym : FALSE);
 }
 
+static int
+get_va_bits_from_stext_arm64(void)
+{
+	ulong _stext;
+
+	_stext = get_stext_symbol();
+	if (!_stext) {
+		ERRMSG("Can't get the symbol of _stext.\n");
+		return FALSE;
+	}
+
+	/* Derive va_bits as per arch/arm64/Kconfig. Note that this is a
+	 * best case approximation at the moment, as there can be
+	 * inconsistencies in this calculation (for e.g., for
+	 * 52-bit kernel VA case, the 48th bit is set in
+	 * the _stext symbol).
+	 *
+	 * So, we need to rely on the vabits_actual symbol in the
+	 * vmcoreinfo or read via system register for a accurate value
+	 * of the virtual addressing supported by the underlying kernel.
+	 */
+	if ((_stext & PAGE_OFFSET_48) == PAGE_OFFSET_48) {
+		va_bits = 48;
+	} else if ((_stext & PAGE_OFFSET_47) == PAGE_OFFSET_47) {
+		va_bits = 47;
+	} else if ((_stext & PAGE_OFFSET_42) == PAGE_OFFSET_42) {
+		va_bits = 42;
+	} else if ((_stext & PAGE_OFFSET_39) == PAGE_OFFSET_39) {
+		va_bits = 39;
+	} else if ((_stext & PAGE_OFFSET_36) == PAGE_OFFSET_36) {
+		va_bits = 36;
+	} else {
+		ERRMSG("Cannot find a proper _stext for calculating VA_BITS\n");
+		return FALSE;
+	}
+
+	DEBUG_MSG("va_bits       : %d (approximation via _stext)\n", va_bits);
+
+	return TRUE;
+}
+
+/* Note that its important to note that the
+ * ID_AA64MMFR2_EL1 architecture register can be read
+ * only when we give an .arch hint to the gcc/binutils,
+ * so we use the gcc construct '__attribute__ ((target ("arch=armv8.2-a")))'
+ * here which is an .arch directive (see AArch64-Target-selection-directives
+ * documentation from ARM for details). This is required only for
+ * this function to make sure it compiles well with gcc/binutils.
+ */
+__attribute__ ((target ("arch=armv8.2-a")))
+static unsigned long
+read_id_aa64mmfr2_el1(void)
+{
+	return get_cpu_ftr(ID_AA64MMFR2_EL1);
+}
+
+static int
+get_vabits_actual_from_id_aa64mmfr2_el1(void)
+{
+	int l_vabits_actual;
+	unsigned long val;
+
+	/* Check if ID_AA64MMFR2_EL1 CPU-ID register indicates
+	 * ARMv8.2/LVA support:
+	 * VARange, bits [19:16]
+	 *   From ARMv8.2:
+	 *   Indicates support for a larger virtual address.
+	 *   Defined values are:
+	 *     0b0000 VMSAv8-64 supports 48-bit VAs.
+	 *     0b0001 VMSAv8-64 supports 52-bit VAs when using the 64KB
+	 *            page size. The other translation granules support
+	 *            48-bit VAs.
+	 *
+	 * See ARMv8 ARM for more details.
+	 */
+	if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
+		ERRMSG("arm64 CPUID registers unavailable.\n");
+		return ERROR;
+	}
+
+	val = read_id_aa64mmfr2_el1();
+	val = (val & ID_AA64MMFR2_LVA_MASK) > ID_AA64MMFR2_LVA_SHIFT;
+
+	if ((val == 0x1) && (PAGESIZE() == SZ_64K))
+		l_vabits_actual = 52;
+	else
+		l_vabits_actual = 48;
+
+	return l_vabits_actual;
+}
+
+static void
+get_page_offset_arm64(void)
+{
+	/* Check if 'vabits_actual' is initialized yet.
+	 * If not, our best bet is to read ID_AA64MMFR2_EL1 CPU-ID
+	 * register.
+	 */
+	if (!vabits_actual) {
+		vabits_actual = get_vabits_actual_from_id_aa64mmfr2_el1();
+		if ((vabits_actual == ERROR) || (vabits_actual != 52)) {
+			/* If we cannot read ID_AA64MMFR2_EL1 arch
+			 * register or if this register does not indicate
+			 * support for a larger virtual address, our last
+			 * option is to use the VA_BITS to calculate the
+			 * PAGE_OFFSET value, i.e. vabits_actual = VA_BITS.
+			 */
+			vabits_actual = va_bits;
+			DEBUG_MSG("vabits_actual : %d (approximation via va_bits)\n",
+					vabits_actual);
+		} else
+			DEBUG_MSG("vabits_actual : %d (via id_aa64mmfr2_el1)\n",
+					vabits_actual);
+	}
+
+	if (!populate_kernel_version()) {
+		ERRMSG("Cannot get information about current kernel\n");
+		return;
+	}
+
+	/* See arch/arm64/include/asm/memory.h for more details of
+	 * the PAGE_OFFSET calculation.
+	 */
+	if (info->kernel_version < KERNEL_VERSION(5, 4, 0))
+		info->page_offset = ((0xffffffffffffffffUL) -
+				((1UL) << (vabits_actual - 1)) + 1);
+	else
+		info->page_offset = (-(1UL << vabits_actual));
+
+	DEBUG_MSG("page_offset   : %lx (via vabits_actual)\n",
+			info->page_offset);
+}
+
 int
 get_machdep_info_arm64(void)
 {
@@ -359,8 +524,33 @@ get_machdep_info_arm64(void)
 	/* Check if va_bits is still not initialized. If still 0, call
 	 * get_versiondep_info() to initialize the same.
 	 */
+	if (NUMBER(VA_BITS) != NOT_FOUND_NUMBER) {
+		va_bits = NUMBER(VA_BITS);
+		DEBUG_MSG("va_bits       : %d (vmcoreinfo)\n",
+				va_bits);
+	}
+
+	/* Check if va_bits is still not initialized. If still 0, call
+	 * get_versiondep_info() to initialize the same from _stext
+	 * symbol.
+	 */
 	if (!va_bits)
-		get_versiondep_info_arm64();
+		if (get_va_bits_from_stext_arm64() == FALSE)
+			return FALSE;
+
+	/* See TCR_EL1, Translation Control Register (EL1) register
+	 * description in the ARMv8 Architecture Reference Manual.
+	 * Basically, we can use the TCR_EL1.T1SZ
+	 * value to determine the virtual addressing range supported
+	 * in the kernel-space (i.e. vabits_actual).
+	 */
+	if (NUMBER(TCR_EL1_T1SZ) != NOT_FOUND_NUMBER) {
+		vabits_actual = 64 - NUMBER(TCR_EL1_T1SZ);
+		DEBUG_MSG("vabits_actual : %d (vmcoreinfo)\n",
+				vabits_actual);
+	}
+
+	get_page_offset_arm64();
 
 	if (!calculate_plat_config()) {
 		ERRMSG("Can't determine platform config values\n");
@@ -398,34 +588,11 @@ get_xen_info_arm64(void)
 int
 get_versiondep_info_arm64(void)
 {
-	ulong _stext;
-
-	_stext = get_stext_symbol();
-	if (!_stext) {
-		ERRMSG("Can't get the symbol of _stext.\n");
-		return FALSE;
-	}
-
-	/* Derive va_bits as per arch/arm64/Kconfig */
-	if ((_stext & PAGE_OFFSET_36) == PAGE_OFFSET_36) {
-		va_bits = 36;
-	} else if ((_stext & PAGE_OFFSET_39) == PAGE_OFFSET_39) {
-		va_bits = 39;
-	} else if ((_stext & PAGE_OFFSET_42) == PAGE_OFFSET_42) {
-		va_bits = 42;
-	} else if ((_stext & PAGE_OFFSET_47) == PAGE_OFFSET_47) {
-		va_bits = 47;
-	} else if ((_stext & PAGE_OFFSET_48) == PAGE_OFFSET_48) {
-		va_bits = 48;
-	} else {
-		ERRMSG("Cannot find a proper _stext for calculating VA_BITS\n");
-		return FALSE;
-	}
-
-	info->page_offset = (0xffffffffffffffffUL) << (va_bits - 1);
+	if (!va_bits)
+		if (get_va_bits_from_stext_arm64() == FALSE)
+			return FALSE;
 
-	DEBUG_MSG("va_bits      : %d\n", va_bits);
-	DEBUG_MSG("page_offset  : %lx\n", info->page_offset);
+	get_page_offset_arm64();
 
 	return TRUE;
 }
diff --git a/common.h b/common.h
index 6e2f657..1901df1 100644
--- a/common.h
+++ b/common.h
@@ -50,5 +50,15 @@
 #define NOT_PADDR	(ULONGLONG_MAX)
 #define BADADDR  	((ulong)(-1))
 
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ *
+ * Copied from linux source: 'include/linux/stringify.h'
+ */
+
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
+
 #endif  /* COMMON_H */
 
diff --git a/makedumpfile.c b/makedumpfile.c
index cdde040..a955912 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1133,7 +1133,7 @@ fallback_to_current_page_size(void)
 	return TRUE;
 }
 
-static int populate_kernel_version(void)
+int populate_kernel_version(void)
 {
 	struct utsname utsname;
 
@@ -2323,6 +2323,7 @@ write_vmcoreinfo_data(void)
 	WRITE_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR);
 #ifdef __aarch64__
 	WRITE_NUMBER("VA_BITS", VA_BITS);
+	WRITE_NUMBER_UNSIGNED("TCR_EL1_T1SZ", TCR_EL1_T1SZ);
 	WRITE_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET);
 	WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
 #endif
@@ -2729,6 +2730,7 @@ read_vmcoreinfo(void)
 	READ_NUMBER("KERNEL_IMAGE_SIZE", KERNEL_IMAGE_SIZE);
 #ifdef __aarch64__
 	READ_NUMBER("VA_BITS", VA_BITS);
+	READ_NUMBER_UNSIGNED("TCR_EL1_T1SZ", TCR_EL1_T1SZ);
 	READ_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET);
 	READ_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
 #endif
diff --git a/makedumpfile.h b/makedumpfile.h
index 698c054..b95c7b6 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -974,7 +974,9 @@ unsigned long long vaddr_to_paddr_arm64(unsigned long vaddr);
 int get_versiondep_info_arm64(void);
 int get_xen_basic_info_arm64(void);
 int get_xen_info_arm64(void);
-#define paddr_to_vaddr_arm64(X) (((X) - info->phys_base) | PAGE_OFFSET)
+#define paddr_to_vaddr_arm64(X) ((info->kernel_version < KERNEL_VERSION(5, 4, 0)) ?	\
+				 ((X) - (info->phys_base - PAGE_OFFSET)) :		\
+				 (((X) - info->phys_base) | PAGE_OFFSET))
 
 #define find_vmemmap()		stub_false()
 #define vaddr_to_paddr(X)	vaddr_to_paddr_arm64(X)
@@ -1938,6 +1940,7 @@ struct number_table {
 	long	KERNEL_IMAGE_SIZE;
 #ifdef __aarch64__
 	long 	VA_BITS;
+	unsigned long	TCR_EL1_T1SZ;
 	unsigned long	PHYS_OFFSET;
 	unsigned long	kimage_voffset;
 #endif
@@ -2389,5 +2392,6 @@ ulong htol(char *s, int flags);
 int hexadecimal(char *s, int count);
 int decimal(char *s, int count);
 int file_exists(char *file);
+int populate_kernel_version(void);
 
 #endif /* MAKEDUMPFILE_H */
-- 
2.26.2




More information about the kexec mailing list