[RFC PATCH v2 11/41] arm64/sve: Expand task_struct for Scalable Vector Extension state
Dave Martin
Dave.Martin at arm.com
Wed Mar 22 07:50:41 PDT 2017
This patch expands task_struct to accommodate the Scalable Vector
Extension state.
The extra space is not used for anything yet.
Signed-off-by: Dave Martin <Dave.Martin at arm.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/fpsimd.h | 11 ++++++
arch/arm64/kernel/fpsimd.c | 75 ++++++++++++++++++++++++++++++++++++++++-
arch/arm64/kernel/process.c | 2 +-
arch/arm64/kernel/setup.c | 3 ++
5 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 289dcb9..820fad1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,6 +23,7 @@ config ARM64
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 92f45ee..757d304 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -51,6 +51,15 @@ struct fpsimd_partial_state {
__uint128_t vregs[32];
};
+/*
+ * Scalable Vector Extension state structure template.
+ * The layout is vector length dependent, with vector length = vl * 16 bytes.
+ */
+#define fpsimd_sve_state(vl) { \
+ __uint128_t zregs[32][vl]; \
+ u16 pregs[16][vl]; \
+ u16 ffr[vl]; \
+}
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -83,6 +92,8 @@ extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr);
+extern unsigned int sve_get_vl(void);
+extern void __init fpsimd_init_task_struct_size(void);
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 06da8ea..bc7a2d5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -27,6 +27,7 @@
#include <asm/fpsimd.h>
#include <asm/cputype.h>
+#include <asm/hwcap.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -89,6 +90,29 @@
*/
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+#ifdef CONFIG_ARM64_SVE
+
+static void *__sve_state(struct task_struct *task)
+{
+ return (char *)task + ALIGN(sizeof(*task), 16);
+}
+
+static void *sve_pffr(struct task_struct *task)
+{
+ unsigned int vl = sve_get_vl();
+
+ BUG_ON(vl % 16);
+ return (char *)__sve_state(task) + 34 * vl;
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Dummy declarations for usage protected with IS_ENABLED(CONFIG_ARM64_SVE): */
+extern void *__sve_state(struct task_struct *task);
+extern void *sve_pffr(struct task_struct *task);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
/*
* Trapped FP/ASIMD access.
*/
@@ -125,6 +149,27 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
send_sig_info(SIGFPE, &info, current);
}
+static void task_fpsimd_load(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ sve_load_state(sve_pffr(task),
+ &task->thread.fpsimd_state.fpsr);
+ else
+ fpsimd_load_state(&task->thread.fpsimd_state);
+}
+
+static void task_fpsimd_save(struct task_struct *task)
+{
+ /* FIXME: remove task argument? */
+ BUG_ON(task != current);
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ sve_save_state(sve_pffr(task),
+ &task->thread.fpsimd_state.fpsr);
+ else
+ fpsimd_save_state(&task->thread.fpsimd_state);
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
if (!system_supports_fpsimd())
@@ -161,8 +206,21 @@ void fpsimd_flush_thread(void)
{
if (!system_supports_fpsimd())
return;
- memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
+
fpsimd_flush_task_state(current);
+
+ memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE)) {
+ BUG_ON((char *)__sve_state(current) < (char *)current);
+ BUG_ON(arch_task_struct_size <
+ ((char *)__sve_state(current) - (char *)current));
+
+ memset(__sve_state(current), 0,
+ arch_task_struct_size -
+ ((char *)__sve_state(current) - (char *)current));
+ }
+
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
@@ -329,6 +387,21 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
+void __init fpsimd_init_task_struct_size(void)
+{
+ arch_task_struct_size = sizeof(struct task_struct);
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ ((read_cpuid(ID_AA64PFR0_EL1) >> ID_AA64PFR0_SVE_SHIFT)
+ & 0xf) == 1) {
+ arch_task_struct_size = sizeof(struct task_struct) +
+ 35 * sve_get_vl();
+
+ pr_info("SVE: enabled with maximum %u bits per vector\n",
+ sve_get_vl() * 8);
+ }
+}
+
/*
* FP/SIMD support code initialisation.
*/
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 043d373..717dd0f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -246,7 +246,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
- *dst = *src;
+ memcpy(dst, src, arch_task_struct_size);
return 0;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 42274bd..1412a35 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -236,6 +236,9 @@ void __init setup_arch(char **cmdline_p)
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
sprintf(init_utsname()->machine, UTS_MACHINE);
+
+ fpsimd_init_task_struct_size();
+
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
--
2.1.4
More information about the linux-arm-kernel
mailing list