[kvm-unit-tests PATCH 18/33] arm: realm: Add test for FPU/SIMD context save/restore

Suzuki K Poulose suzuki.poulose at arm.com
Fri Apr 12 03:33:53 PDT 2024


From: Subhasish Ghosh <subhasish.ghosh at arm.com>

Test that the FPU/SIMD registers are saved and restored correctly when
context switching CPUs.

In order to test fpu/simd functionality, we need to make sure that
kvm-unit-tests doesn't generate code that uses the fpu registers, as that
might interfere with the test results. Thus make sure we compile the tests
with -mgeneral-regs-only.

Signed-off-by: Subhasish Ghosh <subhasish.ghosh at arm.com>
[ Added SVE register tests ]
Signed-off-by: Joey Gouly <joey.gouly at arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose at arm.com>
---
 arm/Makefile.arm64        |   9 +
 arm/cstart64.S            |   1 +
 arm/fpu.c                 | 424 ++++++++++++++++++++++++++++++++++++++
 arm/unittests.cfg         |   8 +
 lib/arm64/asm/processor.h |  26 +++
 lib/arm64/asm/sysreg.h    |   7 +
 6 files changed, 475 insertions(+)
 create mode 100644 arm/fpu.c

diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index 90d95e79..5a9943c8 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -10,9 +10,17 @@ arch_LDFLAGS = -pie -n
 arch_LDFLAGS += -z notext
 CFLAGS += -mstrict-align
 
+sve_flag := $(call cc-option, -march=armv8.5-a+sve, "")
+ifneq ($(strip $(sve_flag)),)
+# Don't pass the option to the compiler, we don't
+# want the compiler to generate SVE instructions.
+CFLAGS += -DCC_HAS_SVE
+endif
+
 mno_outline_atomics := $(call cc-option, -mno-outline-atomics, "")
 CFLAGS += $(mno_outline_atomics)
 CFLAGS += -DCONFIG_RELOC
+CFLAGS += -mgeneral-regs-only
 
 define arch_elf_check =
 	$(if $(shell ! $(READELF) -rW $(1) >&/dev/null && echo "nok"),
@@ -49,6 +57,7 @@ tests = $(TEST_DIR)/timer.$(exe)
 tests += $(TEST_DIR)/micro-bench.$(exe)
 tests += $(TEST_DIR)/cache.$(exe)
 tests += $(TEST_DIR)/debug.$(exe)
+tests += $(TEST_DIR)/fpu.$(exe)
 tests += $(TEST_DIR)/realm-rsi.$(exe)
 
 include $(SRCDIR)/$(TEST_DIR)/Makefile.common
diff --git a/arm/cstart64.S b/arm/cstart64.S
index c081365f..53acf796 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -12,6 +12,7 @@
 #include <asm/ptrace.h>
 #include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
+#include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/sysreg.h>
 #include <asm/smc-rsi.h>
diff --git a/arm/fpu.c b/arm/fpu.c
new file mode 100644
index 00000000..06e5a845
--- /dev/null
+++ b/arm/fpu.c
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Limited.
+ * All rights reserved.
+ */
+
+#include <libcflat.h>
+#include <asm/smp.h>
+#include <stdlib.h>
+
+#include <asm/rsi.h>
+
+#define CPU0_ID			0
+#define CPU1_ID			(CPU0_ID + 1)
+#define CPUS_MAX		(CPU1_ID + 1)
+#define FPU_QREG_MAX	32
+#define FPU_RESULT_PASS	(-1U)
+
+/*
+ * Write 8 bytes of random data in random. Returns true on success, false on
+ * failure.
+ */
+static inline bool arch_collect_entropy(uint64_t *random)
+{
+	unsigned long ret;
+
+	asm volatile(
+	"	mrs  %[ptr], " xstr(RNDR) "\n"
+	"	cset %[ret], ne\n" /* RNDR sets NZCV to 0b0100 on failure */
+	:
+	  [ret] "=r" (ret),
+	  [ptr] "=r" (*random)
+	:
+	: "cc"
+	);
+
+	return ret == 1;
+}
+
+#define fpu_reg_read(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile("stp q0, q1, [%0], #32\n\t"	\
+		     "stp q2, q3, [%0], #32\n\t"	\
+		     "stp q4, q5, [%0], #32\n\t"	\
+		     "stp q6, q7, [%0], #32\n\t"	\
+		     "stp q8, q9, [%0], #32\n\t"	\
+		     "stp q10, q11, [%0], #32\n\t"	\
+		     "stp q12, q13, [%0], #32\n\t"	\
+		     "stp q14, q15, [%0], #32\n\t"	\
+		     "stp q16, q17, [%0], #32\n\t"	\
+		     "stp q18, q19, [%0], #32\n\t"	\
+		     "stp q20, q21, [%0], #32\n\t"	\
+		     "stp q22, q23, [%0], #32\n\t"	\
+		     "stp q24, q25, [%0], #32\n\t"	\
+		     "stp q26, q27, [%0], #32\n\t"	\
+		     "stp q28, q29, [%0], #32\n\t"	\
+		     "stp q30, q31, [%0], #32\n\t"	\
+		     : "=r" (__val)			\
+		     :					\
+		     : "q0", "q1", "q2", "q3",		\
+			"q4", "q5", "q6", "q7",		\
+			"q8", "q9", "q10", "q11",	\
+			"q12", "q13", "q14",		\
+			"q15", "q16", "q17",		\
+			"q18", "q19", "q20",		\
+			"q21", "q22", "q23",		\
+			"q24", "q25", "q26",		\
+			"q27", "q28", "q29",		\
+			"q30", "q31", "memory");	\
+})
+
+#define fpu_reg_write(val)				\
+do {							\
+	uint64_t *__val = (val);			\
+	asm volatile("ldp q0, q1, [%0], #32\n\t"	\
+		     "ldp q2, q3, [%0], #32\n\t"	\
+		     "ldp q4, q5, [%0], #32\n\t"	\
+		     "ldp q6, q7, [%0], #32\n\t"	\
+		     "ldp q8, q9, [%0], #32\n\t"	\
+		     "ldp q10, q11, [%0], #32\n\t"	\
+		     "ldp q12, q13, [%0], #32\n\t"	\
+		     "ldp q14, q15, [%0], #32\n\t"	\
+		     "ldp q16, q17, [%0], #32\n\t"	\
+		     "ldp q18, q19, [%0], #32\n\t"	\
+		     "ldp q20, q21, [%0], #32\n\t"	\
+		     "ldp q22, q23, [%0], #32\n\t"	\
+		     "ldp q24, q25, [%0], #32\n\t"	\
+		     "ldp q26, q27, [%0], #32\n\t"	\
+		     "ldp q28, q29, [%0], #32\n\t"	\
+		     "ldp q30, q31, [%0], #32\n\t"	\
+		     :					\
+		     : "r" (__val)			\
+		     : "q0", "q1", "q2", "q3",		\
+			"q4", "q5", "q6", "q7",		\
+			"q8", "q9", "q10", "q11",	\
+			"q12", "q13", "q14",		\
+			"q15", "q16", "q17",		\
+			"q18", "q19", "q20",		\
+			"q21", "q22", "q23",		\
+			"q24", "q25", "q26",		\
+			"q27", "q28", "q29",		\
+			"q30", "q31", "memory");	\
+} while (0)
+
+#ifdef CC_HAS_SVE
+#define sve_reg_read(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile(".arch_extension sve\n"		\
+		     "str z0, [%0, #0, MUL VL]\n"	\
+		     "str z1, [%0, #1, MUL VL]\n"	\
+		     "str z2, [%0, #2, MUL VL]\n"	\
+		     "str z3, [%0, #3, MUL VL]\n"	\
+		     "str z4, [%0, #4, MUL VL]\n"	\
+		     "str z5, [%0, #5, MUL VL]\n"	\
+		     "str z6, [%0, #6, MUL VL]\n"	\
+		     "str z7, [%0, #7, MUL VL]\n"	\
+		     "str z8, [%0, #8, MUL VL]\n"	\
+		     "str z9, [%0, #9, MUL VL]\n"	\
+		     "str z10, [%0, #10, MUL VL]\n"	\
+		     "str z11, [%0, #11, MUL VL]\n"	\
+		     "str z12, [%0, #12, MUL VL]\n"	\
+		     "str z13, [%0, #13, MUL VL]\n"	\
+		     "str z14, [%0, #14, MUL VL]\n"	\
+		     "str z15, [%0, #15, MUL VL]\n"	\
+		     "str z16, [%0, #16, MUL VL]\n"	\
+		     "str z17, [%0, #17, MUL VL]\n"	\
+		     "str z18, [%0, #18, MUL VL]\n"	\
+		     "str z19, [%0, #19, MUL VL]\n"	\
+		     "str z20, [%0, #20, MUL VL]\n"	\
+		     "str z21, [%0, #21, MUL VL]\n"	\
+		     "str z22, [%0, #22, MUL VL]\n"	\
+		     "str z23, [%0, #23, MUL VL]\n"	\
+		     "str z24, [%0, #24, MUL VL]\n"	\
+		     "str z25, [%0, #25, MUL VL]\n"	\
+		     "str z26, [%0, #26, MUL VL]\n"	\
+		     "str z27, [%0, #27, MUL VL]\n"	\
+		     "str z28, [%0, #28, MUL VL]\n"	\
+		     "str z29, [%0, #29, MUL VL]\n"	\
+		     "str z30, [%0, #30, MUL VL]\n"	\
+		     "str z31, [%0, #31, MUL VL]\n"	\
+		     : "=r" (__val)			\
+		     :					\
+		     : "z0", "z1", "z2", "z3",		\
+			"z4", "z5", "z6", "z7",		\
+			"z8", "z9", "z10", "z11",	\
+			"z12", "z13", "z14",		\
+			"z15", "z16", "z17",		\
+			"z18", "z19", "z20",		\
+			"z21", "z22", "z23",		\
+			"z24", "z25", "z26",		\
+			"z27", "z28", "z29",		\
+			"z30", "z31", "memory");	\
+})
+
+#define sve_reg_write(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile(".arch_extension sve\n"		\
+		     "ldr z0, [%0, #0, MUL VL]\n"	\
+		     "ldr z1, [%0, #1, MUL VL]\n"	\
+		     "ldr z2, [%0, #2, MUL VL]\n"	\
+		     "ldr z3, [%0, #3, MUL VL]\n"	\
+		     "ldr z4, [%0, #4, MUL VL]\n"	\
+		     "ldr z5, [%0, #5, MUL VL]\n"	\
+		     "ldr z6, [%0, #6, MUL VL]\n"	\
+		     "ldr z7, [%0, #7, MUL VL]\n"	\
+		     "ldr z8, [%0, #8, MUL VL]\n"	\
+		     "ldr z9, [%0, #9, MUL VL]\n"	\
+		     "ldr z10, [%0, #10, MUL VL]\n"	\
+		     "ldr z11, [%0, #11, MUL VL]\n"	\
+		     "ldr z12, [%0, #12, MUL VL]\n"	\
+		     "ldr z13, [%0, #13, MUL VL]\n"	\
+		     "ldr z14, [%0, #14, MUL VL]\n"	\
+		     "ldr z15, [%0, #15, MUL VL]\n"	\
+		     "ldr z16, [%0, #16, MUL VL]\n"	\
+		     "ldr z17, [%0, #17, MUL VL]\n"	\
+		     "ldr z18, [%0, #18, MUL VL]\n"	\
+		     "ldr z19, [%0, #19, MUL VL]\n"	\
+		     "ldr z20, [%0, #20, MUL VL]\n"	\
+		     "ldr z21, [%0, #21, MUL VL]\n"	\
+		     "ldr z22, [%0, #22, MUL VL]\n"	\
+		     "ldr z23, [%0, #23, MUL VL]\n"	\
+		     "ldr z24, [%0, #24, MUL VL]\n"	\
+		     "ldr z25, [%0, #25, MUL VL]\n"	\
+		     "ldr z26, [%0, #26, MUL VL]\n"	\
+		     "ldr z27, [%0, #27, MUL VL]\n"	\
+		     "ldr z28, [%0, #28, MUL VL]\n"	\
+		     "ldr z29, [%0, #29, MUL VL]\n"	\
+		     "ldr z30, [%0, #30, MUL VL]\n"	\
+		     "ldr z31, [%0, #31, MUL VL]\n"	\
+		     :					\
+		     : "r" (__val)			\
+		     : "z0", "z1", "z2", "z3",		\
+			"z4", "z5", "z6", "z7",		\
+			"z8", "z9", "z10", "z11",	\
+			"z12", "z13", "z14",		\
+			"z15", "z16", "z17",		\
+			"z18", "z19", "z20",		\
+			"z21", "z22", "z23",		\
+			"z24", "z25", "z26",		\
+			"z27", "z28", "z29",		\
+			"z30", "z31", "memory");	\
+})
+#else
+#define sve_reg_read(val)	report_abort("SVE: not supported")
+#define sve_reg_write(val)	report_abort("SVE: not supported")
+#endif
+
+static void nr_cpu_check(int nr)
+{
+	if (nr_cpus < nr)
+		report_abort("At least %d cpus required", nr);
+}
+
+/**
+ * @brief check if the FPU/SIMD/SVE register contents are the same as
+ * the input data provided.
+ */
+static uint32_t __fpuregs_testall(uint64_t *indata, int sve)
+{
+	/* 128b aligned array to read data into */
+	uint64_t outdata[FPU_QREG_MAX * 2]
+			 __attribute__((aligned(sizeof(__uint128_t)))) = {
+			[0 ... ((FPU_QREG_MAX * 2) - 1)] = 0 };
+	uint8_t regcnt	= 0;
+	uint32_t result	= 0;
+
+	if (indata == NULL)
+		report_abort("invalid data pointer received");
+
+	/* Read data from FPU/SVE registers */
+	if (sve)
+		sve_reg_read(outdata);
+	else
+		fpu_reg_read(outdata);
+
+	/* Check is the data is the same */
+	for (regcnt = 0; regcnt < (FPU_QREG_MAX * 2); regcnt += 2) {
+		if ((outdata[regcnt] != indata[regcnt]) ||
+			(outdata[regcnt + 1] != indata[regcnt + 1])) {
+			report_info(
+			"%s save/restore failed for reg: %c%u expected: %lx_%lx received: %lx_%lx\n",
+			sve ? "SVE" : "FPU/SIMD",
+			sve ? 'z' : 'q',
+			regcnt / 2,
+			indata[regcnt + 1], indata[regcnt],
+			outdata[regcnt + 1], outdata[regcnt]);
+		} else {
+			/* populate a bitmask indicating which
+			 * registers passed/failed
+			 */
+			result |= (1 << (regcnt / 2));
+		}
+	}
+
+	return result;
+}
+
+/**
+ * @brief writes randomly sampled data into the FPU/SIMD registers.
+ */
+static void __fpuregs_writeall_random(uint64_t **indata, int sve)
+{
+	/* allocate 128b aligned memory */
+	*indata = memalign(sizeof(__uint128_t), sizeof(uint64_t) * FPU_QREG_MAX);
+
+	if (system_supports_rndr()) {
+		/* Populate memory with random data */
+		for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+			while (!arch_collect_entropy(&(*indata)[i])) {}
+	} else {
+		/* Populate memory with data from the counter register */
+		for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+			(*indata)[i] = get_cntvct();
+	}
+
+	/* Write data into FPU registers */
+	if (sve)
+		sve_reg_write(*indata);
+	else
+		fpu_reg_write(*indata);
+}
+
+static void fpuregs_writeall_run(void *data)
+{
+	uint64_t **indata	= (uint64_t **)data;
+
+	__fpuregs_writeall_random(indata, 0);
+}
+
+static void sveregs_writeall_run(void *data)
+{
+	uint64_t **indata	= (uint64_t **)data;
+
+	__fpuregs_writeall_random(indata, 1);
+}
+
+static void fpuregs_testall_run(void *data)
+{
+	uint64_t *indata	= (uint64_t *)data;
+	uint32_t result		= 0;
+
+	result = __fpuregs_testall(indata, 0);
+	report((result == FPU_RESULT_PASS),
+	       "FPU/SIMD register save/restore mask: 0x%x", result);
+}
+
+static void sveregs_testall_run(void *data)
+{
+	uint64_t *indata	= (uint64_t *)data;
+	uint32_t result		= 0;
+
+	result = __fpuregs_testall(indata, 1);
+	report((result == FPU_RESULT_PASS),
+	       "SVE register save/restore mask: 0x%x", result);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU1 writes random data into FPU/SIMD registers,
+ * CPU0 corrupts/overwrites the data and finally CPU1 checks
+ * if the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu1(int sve)
+{
+	int target		= CPU1_ID;
+	uint64_t *indata_remote	= NULL;
+	uint64_t *indata_local	= NULL;
+
+	/* write data from CPU1 */
+	on_cpu(target, sve ? sveregs_writeall_run
+	                   : fpuregs_writeall_run,
+	       &indata_remote);
+
+	/* Overwrite from CPU0 */
+	__fpuregs_writeall_random(&indata_local, sve);
+
+	/* Check data consistency */
+	on_cpu(target, sve ? sveregs_testall_run
+	                   : fpuregs_testall_run,
+	       indata_remote);
+
+	free(indata_remote);
+	free(indata_local);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU0 writes random data into FPU/SIMD registers,
+ * CPU1 corrupts/overwrites the data and finally CPU0 checks if
+ * the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu0(int sve)
+{
+	int target		= CPU1_ID;
+	uint64_t *indata_local	= NULL;
+	uint64_t *indata_remote	= NULL;
+	uint32_t result		= 0;
+
+	/* write data from CPU0 */
+	__fpuregs_writeall_random(&indata_local, sve);
+
+	/* Overwrite from CPU1 */
+	on_cpu(target, sve ? sveregs_writeall_run
+	                   : fpuregs_writeall_run,
+	       &indata_remote);
+
+	/* Check data consistency */
+	result = __fpuregs_testall(indata_local, sve);
+	report((result == FPU_RESULT_PASS),
+	       "%s register save/restore mask: 0x%x", sve ? "SVE" : "FPU/SIMD", result);
+
+	free(indata_remote);
+	free(indata_local);
+}
+
+/**
+ * Checks if during context switch, FPU/SIMD registers
+ * are saved/restored.
+ */
+static void fpuregs_context_switch(void)
+{
+	fpuregs_context_switch_cpu0(0);
+	fpuregs_context_switch_cpu1(0);
+}
+
+/**
+ * Checks if during realm context switch, SVE registers
+ * are saved/restored.
+ */
+static void sveregs_context_switch(void)
+{
+	unsigned long zcr = read_sysreg(ZCR_EL1);
+
+	// Set the SVE vector length to 128-bits
+	write_sysreg(zcr & ~ZCR_EL1_LEN, ZCR_EL1);
+
+	fpuregs_context_switch_cpu0(1);
+	fpuregs_context_switch_cpu1(1);
+}
+
+static bool should_run_sve_tests(void)
+{
+#ifdef CC_HAS_SVE
+	if (system_supports_sve())
+		return true;
+#endif
+	return false;
+}
+
+int main(int argc, char **argv)
+{
+	report_prefix_pushf("fpu");
+
+	nr_cpu_check(CPUS_MAX);
+	fpuregs_context_switch();
+
+	if (should_run_sve_tests())
+		sveregs_context_switch();
+
+	return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index b5be6668..e35e8506 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -303,3 +303,11 @@ groups = nodefault realms
 extra_params = -append 'hvc'
 accel = kvm
 arch = arm64
+
+# FPU/SIMD test
+[fpu-context]
+file = fpu.flat
+smp = 2
+groups = nodefault realms
+accel = kvm
+arch = arm64
diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
index 320ebaef..cc993c6a 100644
--- a/lib/arm64/asm/processor.h
+++ b/lib/arm64/asm/processor.h
@@ -122,6 +122,8 @@ static inline unsigned long get_id_aa64pfr0_el1(void)
 #define ID_AA64PFR0_EL1_EL3	(0xf << 12)
 #define ID_AA64PFR0_EL1_EL3_NI	(0x0 << 12)
 
+#define ID_AA64PFR0_EL1_SVE_SHIFT	32
+
 static inline bool system_supports_granule(size_t granule)
 {
 	u32 shift;
@@ -145,5 +147,29 @@ static inline bool system_supports_granule(size_t granule)
 	return ((mmfr0 >> shift) & 0xf) == val;
 }
 
+static inline bool system_supports_sve(void)
+{
+	return ((get_id_aa64pfr0_el1() >> ID_AA64PFR0_EL1_SVE_SHIFT) & 0xf) != 0;
+}
+
+static inline int sve_vl(void)
+{
+	int vl;
+
+	asm volatile(".arch_extension sve\n"
+		     "rdvl %0, #8"
+		     : "=r" (vl));
+
+	return vl;
+}
+
+
+static inline bool system_supports_rndr(void)
+{
+	u64 id_aa64isar0_el1 = read_sysreg(ID_AA64ISAR0_EL1);
+
+	return ((id_aa64isar0_el1 >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf) != 0;
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASMARM64_PROCESSOR_H_ */
diff --git a/lib/arm64/asm/sysreg.h b/lib/arm64/asm/sysreg.h
index 6cae8b84..f214a4f0 100644
--- a/lib/arm64/asm/sysreg.h
+++ b/lib/arm64/asm/sysreg.h
@@ -73,6 +73,8 @@ asm(
 );
 #endif /* __ASSEMBLY__ */
 
+#define ID_AA64ISAR0_EL1_RNDR_SHIFT	60
+
 #define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
 #define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
 #define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
@@ -102,4 +104,9 @@ asm(
 			 SCTLR_EL1_TSCXT | SCTLR_EL1_EIS | SCTLR_EL1_SPAN | \
 			 SCTLR_EL1_NTLSMD | SCTLR_EL1_LSMAOE)
 
+#define ZCR_EL1		S3_0_C1_C2_0
+#define ZCR_EL1_LEN	GENMASK(3, 0)
+
+#define RNDR		S3_3_C2_C4_0
+
 #endif /* _ASMARM64_SYSREG_H_ */
-- 
2.34.1




More information about the linux-arm-kernel mailing list