[RFC PATCH v2 21/31] kvx: Add system call support

Yann Sionneau ysionneau at kalray.eu
Fri Jan 20 06:09:52 PST 2023


Add system call support and related uaccess.h for kvx.

Co-developed-by: Clement Leger <clement at clement-leger.fr>
Signed-off-by: Clement Leger <clement at clement-leger.fr>
Co-developed-by: Guillaume Thouvenin <gthouvenin at kalray.eu>
Signed-off-by: Guillaume Thouvenin <gthouvenin at kalray.eu>
Co-developed-by: Julian Vetter <jvetter at kalray.eu>
Signed-off-by: Julian Vetter <jvetter at kalray.eu>
Co-developed-by: Julien Villette <jvillette at kalray.eu>
Signed-off-by: Julien Villette <jvillette at kalray.eu>
Co-developed-by: Marius Gligor <mgligor at kalray.eu>
Signed-off-by: Marius Gligor <mgligor at kalray.eu>
Co-developed-by: Yann Sionneau <ysionneau at kalray.eu>
Signed-off-by: Yann Sionneau <ysionneau at kalray.eu>
---

Notes:
    V1 -> V2:
     - typo fixes (clober* -> clobber*)
     - use generic __access_ok

 arch/kvx/include/asm/syscall.h       |   73 ++
 arch/kvx/include/asm/syscalls.h      |   21 +
 arch/kvx/include/asm/uaccess.h       |  317 +++++
 arch/kvx/include/asm/unistd.h        |   11 +
 arch/kvx/include/uapi/asm/cachectl.h |   25 +
 arch/kvx/include/uapi/asm/unistd.h   |   16 +
 arch/kvx/kernel/entry.S              | 1759 ++++++++++++++++++++++++++
 arch/kvx/kernel/sys_kvx.c            |   58 +
 arch/kvx/kernel/syscall_table.c      |   19 +
 9 files changed, 2299 insertions(+)
 create mode 100644 arch/kvx/include/asm/syscall.h
 create mode 100644 arch/kvx/include/asm/syscalls.h
 create mode 100644 arch/kvx/include/asm/uaccess.h
 create mode 100644 arch/kvx/include/asm/unistd.h
 create mode 100644 arch/kvx/include/uapi/asm/cachectl.h
 create mode 100644 arch/kvx/include/uapi/asm/unistd.h
 create mode 100644 arch/kvx/kernel/entry.S
 create mode 100644 arch/kvx/kernel/sys_kvx.c
 create mode 100644 arch/kvx/kernel/syscall_table.c

diff --git a/arch/kvx/include/asm/syscall.h b/arch/kvx/include/asm/syscall.h
new file mode 100644
index 000000000000..a3f6cef73e4a
--- /dev/null
+++ b/arch/kvx/include/asm/syscall.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_SYSCALL_H
+#define _ASM_KVX_SYSCALL_H
+
+#include <linux/err.h>
+#include <linux/audit.h>
+
+#include <asm/ptrace.h>
+
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+
+void scall_machine_exit(unsigned char value);
+
+/**
+ * syscall_get_nr - find which system call a task is executing
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ *
+ * If @task is executing a system call or is at system call
+ * tracing about to attempt one, returns the system call number.
+ * If @task is not executing a system call, i.e. it's blocked
+ * inside the kernel for a fault or signal, returns -1.
+ *
+ * Note this returns int even on 64-bit machines.  Only 32 bits of
+ * system call number can be meaningful.  If the actual arch value
+ * is 64 bits, this truncates to 32 bits so 0xffffffff means -1.
+ *
+ * It's only valid to call this when @task is known to be blocked.
+ */
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+{
+	if (!in_syscall(regs))
+		return -1;
+
+	return es_sysno(regs);
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	/* 0 if syscall succeeded, otherwise -Errorcode */
+	return IS_ERR_VALUE(regs->r0) ? regs->r0 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+	return AUDIT_ARCH_KVX;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	args[0] = regs->orig_r0;
+	args++;
+	memcpy(args, &regs->r1, 5 * sizeof(args[0]));
+}
+
+int __init setup_syscall_sigreturn_page(void *sigpage_addr);
+
+#endif
diff --git a/arch/kvx/include/asm/syscalls.h b/arch/kvx/include/asm/syscalls.h
new file mode 100644
index 000000000000..beec95ebb97a
--- /dev/null
+++ b/arch/kvx/include/asm/syscalls.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_SYSCALLS_H
+#define _ASM_KVX_SYSCALLS_H
+
+#include <asm-generic/syscalls.h>
+
+/* We redefine clone in assembly for special slowpath */
+asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp,
+			int __user *parent_tid, int __user *child_tid, int tls);
+
+#define sys_clone __sys_clone
+
+long sys_cachectl(unsigned long addr, unsigned long len, unsigned long cache,
+		  unsigned long flags);
+
+#endif
diff --git a/arch/kvx/include/asm/uaccess.h b/arch/kvx/include/asm/uaccess.h
new file mode 100644
index 000000000000..24f91d75c1dd
--- /dev/null
+++ b/arch/kvx/include/asm/uaccess.h
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * derived from arch/riscv/include/asm/uaccess.h
+ *
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ *            Guillaume Thouvenin
+ */
+
+#ifndef _ASM_KVX_UACCESS_H
+#define _ASM_KVX_UACCESS_H
+
+#include <linux/sched.h>
+#include <linux/types.h>
+
+/**
+ * access_ok: - Checks if a user space pointer is valid
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * Note that, depending on architecture, this function probably just
+ * checks that the pointer is in the user space range - after calling
+ * this function, memory access functions may still return -EFAULT.
+ */
+#define access_ok(addr, size) ({					\
+	__chk_user_ptr(addr);						\
+	likely(__access_ok((addr), (size)));				\
+})
+
+#include <asm-generic/access_ok.h>
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or TLB entries.
+ */
+
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/**
+ * Assembly defined function (usercopy.S)
+ */
+extern unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+
+extern unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+extern unsigned long
+asm_clear_user(void __user *to, unsigned long n);
+
+#define __clear_user asm_clear_user
+
+static inline __must_check unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+	might_fault();
+	if (!access_ok(to, n))
+		return n;
+
+	return asm_clear_user(to, n);
+}
+
+extern __must_check long strnlen_user(const char __user *str, long n);
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+
+#define __enable_user_access()
+#define __disable_user_access()
+
+/**
+ * get_user: - Get a simple variable from user space.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x, ptr)						\
+({									\
+	long __e = -EFAULT;						\
+	const __typeof__(*(ptr)) __user *__p = (ptr);			\
+	might_fault();							\
+	if (likely(access_ok(__p, sizeof(*__p)))) {			\
+		__e = __get_user(x, __p);				\
+	} else {							\
+		x = 0;							\
+	}								\
+	__e;								\
+})
+
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x, ptr)						\
+({									\
+	unsigned long __err = 0;					\
+	__chk_user_ptr(ptr);						\
+									\
+	__enable_user_access();						\
+	__get_user_nocheck(x, ptr, __err);				\
+	__disable_user_access();					\
+									\
+	__err;								\
+})
+
+#define __get_user_nocheck(x, ptr, err)					\
+do {									\
+	unsigned long __gu_addr = (unsigned long)(ptr);			\
+	unsigned long __gu_val;						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_asm("lbz", __gu_val, __gu_addr, err);	\
+		break;							\
+	case 2:								\
+		__get_user_asm("lhz", __gu_val, __gu_addr, err);	\
+		break;							\
+	case 4:								\
+		__get_user_asm("lwz", __gu_val, __gu_addr, err);	\
+		break;							\
+	case 8:								\
+		__get_user_asm("ld", __gu_val, __gu_addr, err);		\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	(x) = (__typeof__(*(ptr)))__gu_val;				\
+} while (0)
+
+#define __get_user_asm(op, x, addr, err)				\
+({									\
+	__asm__ __volatile__(						\
+			"1:     "op" %1 = 0[%2]\n"			\
+			"       ;;\n"					\
+			"2:\n"						\
+			".section .fixup,\"ax\"\n"			\
+			"3:     make %0 = 2b\n"				\
+			"	make %1 = 0\n"				\
+			"       ;;\n"					\
+			"       make %0 = %3\n"				\
+			"       igoto %0\n"				\
+			"       ;;\n"					\
+			".previous\n"					\
+			".section __ex_table,\"a\"\n"			\
+			"       .align 8\n"				\
+			"       .dword 1b,3b\n"				\
+			".previous"					\
+			: "=r"(err), "=r"(x)				\
+			: "r"(addr), "i"(-EFAULT), "0"(err));		\
+})
+
+/**
+ * put_user: - Write a simple value into user space.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x, ptr)						\
+({									\
+	long __e = -EFAULT;						\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	if (likely(access_ok(__p, sizeof(*__p)))) {			\
+		__e = __put_user(x, __p);				\
+	}								\
+	__e;								\
+})
+
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define __put_user(x, ptr)						\
+({									\
+	unsigned long __err = 0;					\
+	__chk_user_ptr(ptr);						\
+									\
+	__enable_user_access();						\
+	__put_user_nocheck(x, ptr, __err);				\
+	__disable_user_access();					\
+									\
+	__err;								\
+})
+
+#define __put_user_nocheck(x, ptr, err)					\
+do {									\
+	unsigned long __pu_addr = (unsigned long)(ptr);			\
+	__typeof__(*(ptr)) __pu_val = (x);				\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__put_user_asm("sb", __pu_val, __pu_addr, err);		\
+		break;							\
+	case 2:								\
+		__put_user_asm("sh", __pu_val, __pu_addr, err);		\
+		break;							\
+	case 4:								\
+		__put_user_asm("sw", __pu_val, __pu_addr, err);		\
+		break;							\
+	case 8:								\
+		__put_user_asm("sd", __pu_val, __pu_addr, err);		\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+} while (0)
+
+#define __put_user_asm(op, x, addr, err)				\
+({									\
+	__asm__ __volatile__(						\
+			"1:     "op" 0[%2], %1\n"			\
+			"       ;;\n"					\
+			"2:\n"						\
+			".section .fixup,\"ax\"\n"			\
+			"3:     make %0 = 2b\n"				\
+			"       ;;\n"					\
+			"       make %0 = %3\n"				\
+			"       igoto %0\n"				\
+			"       ;;\n"					\
+			".previous\n"					\
+			".section __ex_table,\"a\"\n"			\
+			"       .align 8\n"				\
+			"       .dword 1b,3b\n"				\
+			".previous"					\
+			: "=r"(err)					\
+			: "r"(x), "r"(addr), "i"(-EFAULT), "0"(err));	\
+})
+
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	long __kr_err;							\
+									\
+	__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err);	\
+	if (unlikely(__kr_err))						\
+		goto err_label;						\
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	long __kr_err;							\
+									\
+	__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err);	\
+	if (unlikely(__kr_err))						\
+		goto err_label;						\
+} while (0)
+
+
+#endif	/* _ASM_KVX_UACCESS_H */
diff --git a/arch/kvx/include/asm/unistd.h b/arch/kvx/include/asm/unistd.h
new file mode 100644
index 000000000000..6cd093dbf2d8
--- /dev/null
+++ b/arch/kvx/include/asm/unistd.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#define __ARCH_WANT_SYS_CLONE
+
+#include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/kvx/include/uapi/asm/cachectl.h b/arch/kvx/include/uapi/asm/cachectl.h
new file mode 100644
index 000000000000..be0a1aa23cf6
--- /dev/null
+++ b/arch/kvx/include/uapi/asm/cachectl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _UAPI_ASM_KVX_CACHECTL_H
+#define _UAPI_ASM_KVX_CACHECTL_H
+
+/*
+ * Cache type for cachectl system call
+ */
+#define CACHECTL_CACHE_DCACHE		(1 << 0)
+
+/*
+ * Flags for cachectl system call
+ */
+#define CACHECTL_FLAG_OP_INVAL		(1 << 0)
+#define CACHECTL_FLAG_OP_WB		(1 << 1)
+#define CACHECTL_FLAG_OP_MASK		(CACHECTL_FLAG_OP_INVAL | \
+					 CACHECTL_FLAG_OP_WB)
+
+#define CACHECTL_FLAG_ADDR_PHYS		(1 << 2)
+
+#endif /* _UAPI_ASM_KVX_CACHECTL_H */
diff --git a/arch/kvx/include/uapi/asm/unistd.h b/arch/kvx/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..5f86d81dbb76
--- /dev/null
+++ b/arch/kvx/include/uapi/asm/unistd.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_CLONE3
+
+#include <asm-generic/unistd.h>
+
+/* Additional KVX specific syscalls */
+#define __NR_cachectl (__NR_arch_specific_syscall)
+__SYSCALL(__NR_cachectl, sys_cachectl)
diff --git a/arch/kvx/kernel/entry.S b/arch/kvx/kernel/entry.S
new file mode 100644
index 000000000000..e73dc2221c06
--- /dev/null
+++ b/arch/kvx/kernel/entry.S
@@ -0,0 +1,1759 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ *            Guillaume Thouvenin
+ *            Marius Gligor
+ *            Yann Sionneau
+ *            Julien Villette
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/pgtable-bits.h>
+#include <asm/sys_arch.h>
+#include <asm/sfr_defs.h>
+#include <asm/tlb_defs.h>
+#include <asm/mem_map.h>
+#include <asm/traps.h>
+#include <asm/unistd.h>
+
+#define MMU_SET_MASK		((1 << KVX_SFR_MMC_SS_WIDTH) - 1)
+
+/* Mask to replicate data from 32bits LSB to MSBs */
+#define REPLICATE_32_MASK 0x0804020108040201
+
+#define PS_HWLOOP_ENABLE	(KVX_SFR_PS_HLE_MASK << 32)
+#define PS_HWLOOP_DISABLE	(KVX_SFR_PS_HLE_MASK)
+#define PS_HWLOOP_EN_ET_CLEAR_DAUS_DIS	\
+	(PS_HWLOOP_ENABLE | KVX_SFR_PS_ET_MASK | SFR_SET_VAL_WFXL(PS, DAUS, 0))
+#define PS_ET_CLEAR KVX_SFR_PS_ET_MASK
+#define PS_HLE_EN_IT_EN_ET_CLEAR_DAUS_DIS	\
+		(SFR_SET_VAL_WFXL(PS, HLE, 1) | \
+		SFR_SET_VAL_WFXL(PS, IE, 1) | \
+		SFR_SET_VAL_WFXL(PS, ET, 0) | \
+		SFR_SET_VAL_WFXL(PS, DAUS, 0))
+
+#define PS_IT_DIS		KVX_SFR_PS_IE_MASK
+#define PS_IL_CLEAR		KVX_SFR_PS_IL_WFXL_CLEAR
+
+#define MMC_CLEAR_TLB_CLEAR_WAY \
+	(SFR_CLEAR_WFXL(MMC, SB) | SFR_CLEAR_WFXL(MMC, SW))
+
+#define MMC_SEL_TLB_CLEAR_WAY(__tlb) \
+	(SFR_SET_WFXL(MMC, SB, __tlb) | MMC_CLEAR_TLB_CLEAR_WAY)
+#define MMC_SEL_JTLB_CLEAR_WAY	MMC_SEL_TLB_CLEAR_WAY(MMC_SB_JTLB)
+#define MMC_SEL_LTLB_CLEAR_WAY	MMC_SEL_TLB_CLEAR_WAY(MMC_SB_LTLB)
+
+#define MME_WFXL(_enable)	SFR_SET_VAL_WFXL(PS, MME, _enable)
+
+/* Temporary scract system register for trap handling */
+#define TMP_SCRATCH_SR	$sr_pl3
+
+.altmacro
+
+#define TEL_DEFAULT_VALUE  (TLB_ES_A_MODIFIED << KVX_SFR_TEL_ES_SHIFT)
+
+#define TASK_THREAD_SAVE_AREA_QUAD(__q) \
+				(TASK_THREAD_SAVE_AREA + (__q) * QUAD_REG_SIZE)
+
+#ifdef CONFIG_DEBUG_EXCEPTION_STACK
+.section .rodata
+stack_error_panic_str_label:
+	.string "Stack has been messed up !"
+#endif
+
+#ifdef CONFIG_KVX_DEBUG_TLB_WRITE
+.section .rodata
+mmc_error_panic_str_label:
+	.string "Failed to write entry to the JTLB (in assembly refill)"
+#endif
+
+#ifdef CONFIG_KVX_DEBUG_ASN
+.section .rodata
+asn_error_panic_str_label:
+	.string "ASN mismatch !"
+#endif
+
+/**
+ * call_trace_hardirqs: hardirqs tracing call
+ * state: State of hardirqs to be reported (on/off)
+ */
+.macro call_trace_hardirqs state
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call trace_hardirqs_\state
+	;;
+#endif
+.endm
+
+/**
+ * disable_interrupts: Disable interrupts
+ * tmp_reg: Temporary register to use for interrupt disabling
+ */
+.macro disable_interrupt tmp_reg
+	make \tmp_reg = KVX_SFR_PS_IE_MASK
+	;;
+	wfxl $ps, \tmp_reg
+	;;
+.endm
+
+/**
+ * call_do_work_pending: Call do_work_pending and set stack argument ($r0)
+ * NOTE: Since do_work_pending requires thread_flags in $r1, they must
+ * be provided in $r1 before calling this macro.
+ * Moreover, do_work_pending expects interrupts to be disabled.
+ */
+.macro call_do_work_pending
+	copyd $r0 = $sp
+	call do_work_pending
+	;;
+.endm
+
+/**
+ * save_quad_regs: Save quad registers in temporary thread area
+ * After call, the quad is saved in task_struct.thread.save_area.
+ * sr_swap_reg is only used as a temporary value and will be
+ * restored after returning from this macro
+ *
+ * quad: 	Quad to saved
+ * sr_swap_reg:	Register used for sr swap and quad saving.
+ */
+.macro save_quad_regs quad sr_swap_reg
+	rswap \sr_swap_reg = $sr
+	;;
+	/* Save registers in save_area */
+	so __PA(TASK_THREAD_SAVE_AREA_QUAD(0))[\sr_swap_reg] = \quad
+	/* Restore register */
+	rswap \sr_swap_reg = $sr
+	;;
+.endm
+
+/**
+ * Switch task when entering kernel if needed.
+ * sp_reg: register which will be used to store original stack pointer when
+ * entering the kernel
+ * task_reg: is a register containing the current task pointer
+ * save_regs_label: label to jump to save register immediately. This label is
+ * used when we are already in kernel execution context.
+ * NOTE: when returning from this macro, we consider that the assembly following
+ * it will be executed only when coming from user space
+ */
+.macro switch_stack sp_reg task_reg save_regs_label
+	get \sp_reg = $sps
+	;;
+	/* Check if $sps.pl bit is 0 (ie in kernel mode)
+	 * if so, then we dont have to switch stack */
+	cb.even \sp_reg? \save_regs_label
+	/* Copy original $sp in a scratch reg */
+	copyd \sp_reg = $sp
+	;;
+	/* restore sp from kernel stack pointer */
+	ld $sp = __PA(TASK_THREAD_KERNEL_SP)[\task_reg]
+	;;
+.endm
+
+/**
+ * Disable MMU using a specified register
+ * scratch_reg: Scratch register to be used for $ps modification (clobbered)
+ * ps_val: Additionnal $ps modifications after returning disabling MMU
+ */
+.macro disable_mmu scratch_reg ps_val=0
+	/* Disable MME in $sps */
+	make \scratch_reg = MME_WFXL(0)
+	;;
+	wfxl $sps = \scratch_reg
+	;;
+	/* Enable DAUS in $ps */
+	make \scratch_reg = SFR_SET_VAL_WFXL(PS, DAUS, 1) | \ps_val
+	;;
+	wfxl $ps = \scratch_reg
+	/* We are now accessing data with MMU disabled */
+	;;
+.endm
+
+/**
+ * Disable MMU when entering exception path
+ * This macro does not require any register since used register will be
+ * restored after use. This can safely be used directly after entering
+ * exceptions.
+ */
+.macro exception_entry_disable_mmu
+	set TMP_SCRATCH_SR = $r0
+	disable_mmu $r0
+	;;
+	get $r0 = TMP_SCRATCH_SR
+	;;
+.endm
+
+/* Save callee registers on stack */
+.macro save_callee
+	sq PT_R18R19[$sp] = $r18r19
+	;;
+	so PT_Q20[$sp] = $r20r21r22r23
+	;;
+	so PT_Q24[$sp] = $r24r25r26r27
+	;;
+	so PT_Q28[$sp] = $r28r29r30r31
+	;;
+.endm
+
+/**
+ * Save registers for entry in kernel space.
+ * sp_reg should point to the original stack pointer when entering kernel space
+ * task_reg is a register containing the current task pointer
+ * both task_reg and sp_reg must be in saved_quad since they have been
+ * clobberred and will be restored when restoring saved_quad.
+ * pt_regs_sp is a single register which will be filled by pt_regs addr.
+ * When "returning" from this macro, hardware loop are enabled and exception
+ * is cleared, allowing to call kernel function without any risk.
+ */
+.macro save_regs_for_exception sp_reg task_reg saved_quad pt_regs_sp
+.if (\saved_quad==$r4r5r6r7 || \saved_quad==$r60r61r62r63)
+.error "saved_quad must not be $r4r5r6r7 or $r60r61r62r63 !"
+.endif
+	/* make some room on stack to save registers */
+	addd $sp = $sp, -(PT_SIZE_ON_STACK)
+	so __PA(PT_Q4-PT_SIZE_ON_STACK)[$sp] = $r4r5r6r7
+	/* Compute physical address of stack to avoid using 64bits immediate */
+	addd $r6 = $sp, VA_TO_PA_OFFSET - (PT_SIZE_ON_STACK)
+	;;
+	so PT_Q60[$r6] = $r60r61r62r63
+	/* Now that $r60r61r62r63 is saved, we can use it for saving
+	 * original stack stored in scratch_reg. Note that we can not
+	 * use the r12r13r14r15 quad to do that because it would
+	 * modify the current $r12/sp ! This is if course not what we
+	 * want and hence we use the freshly saved quad $r60r61r62r63.
+	 *
+	 * Note that we must use scratch_reg before reloading the saved
+	 * quad since the scratch reg is contained in it, so reloading
+	 * it before copying it would overwrite it.
+	 */
+	copyd $r60 = \sp_reg
+	;;
+	/* Reload the saved quad registers to save correct values
+	 * Since we use the scratch reg before that */
+	lo \saved_quad = __PA(TASK_THREAD_SAVE_AREA_QUAD(0))[\task_reg]
+	;;
+	so PT_Q8[$r6] = $r8r9r10r11
+	;;
+	so PT_Q0[$r6] = $r0r1r2r3
+	copyd $r61 = $r13
+	get $r5 = $le
+	;;
+	sq PT_R16R17[$r6] = $r16r17
+	make $r10 = 0x0
+	copyd $r62 = $r14
+	;;
+	so PT_Q32[$r6] = $r32r33r34r35
+	/* Since we are going to enable hardware loop, we must be careful
+	 * and reset le (loop exit) to avoid any exploit and return to
+	 * user with kernel mode */
+	set $le = $r10
+	copyd $r63 = $r15
+	;;
+	so PT_Q36[$r6] = $r36r37r38r39
+	get $r0 = $cs
+	;;
+	so PT_Q40[$r6] = $r40r41r42r43
+	get $r1 = $spc
+	;;
+	so PT_Q44[$r6] = $r44r45r46r47
+	get $r2 = $sps
+	;;
+	so PT_Q48[$r6] = $r48r49r50r51
+	get $r3 = $es
+	;;
+	so PT_Q52[$r6] = $r52r53r54r55
+	get $r7 = $ra
+	;;
+	so PT_Q56[$r6] = $r56r57r58r59
+	get $r4 = $lc
+	;;
+	so PT_Q12[$r6] = $r60r61r62r63
+	copyd $r63 = $r6
+	get $r6 = $ls
+	;;
+	so PT_CS_SPC_SPS_ES[$r63] = $r0r1r2r3
+	;;
+	so PT_LC_LE_LS_RA[$r63] = $r4r5r6r7
+	/* Clear frame pointer */
+	make $fp = 0
+	;;
+	/* Copy regs stack pointer for macro caller */
+	copyd \pt_regs_sp = $sp
+#ifdef CONFIG_DEBUG_EXCEPTION_STACK
+	addd $sp = $sp, -STACK_REG_SIZE
+	;;
+	sd VA_TO_PA_OFFSET[$sp] = $sp
+	;;
+#endif
+	/* Reenable hwloop, MMU and clear exception taken */
+	make $r8 = PS_HWLOOP_EN_ET_CLEAR_DAUS_DIS
+	;;
+	wfxl $ps, $r8
+	;;
+.endm
+
+/***********************************************************************
+*                Exception vectors trampolines
+***********************************************************************/
+#define exception_trampoline(__type) \
+.section .exception.## __type, "ax", @progbits ;\
+ENTRY(kvx_##__type ##_handler_trampoline): ;\
+	goto kvx_## __type ##_handler ;\
+	;; ;\
+ENDPROC(kvx_ ## __type ## _handler_trampoline) ;\
+.section .early_exception.## __type, "ax", @progbits ;\
+ENTRY(kvx_## __type ##_early_handler): ;\
+1:	nop ;\
+	;; ;\
+	goto 1b ;\
+	;; ;\
+ENDPROC(kvx_ ## __type ## _early_handler)
+
+exception_trampoline(debug)
+exception_trampoline(trap)
+exception_trampoline(interrupt)
+exception_trampoline(syscall)
+
+#define EXCEPTION_ENTRY(__name) \
+.section .exception.text, "ax", @progbits ;\
+ENTRY(__name)
+
+
+/***********************************************************************
+*                  Common exception return path
+***********************************************************************/
+/**
+ * Restore registers after exception
+ * When entering this macro, $sp must be located right before regs
+ * storage.
+ */
+EXCEPTION_ENTRY(return_from_exception)
+#ifdef CONFIG_DEBUG_EXCEPTION_STACK
+	ld $r1 = 0[$sp]
+	;;
+	sbfd $r1 = $r1, $sp
+	;;
+	cb.deqz $r1, _check_ok
+	;;
+	make $r2 = panic
+	make $r0 = stack_error_panic_str_label
+	;;
+	icall $r2
+	;;
+_check_ok:
+	addd $sp = $sp, STACK_REG_SIZE
+	;;
+#endif
+	get $r11 = $sr
+	/* Load sps value from saved registers */
+	ld $r6 = PT_SPS[$sp]
+	;;
+	/* Disable interrupt to check task flags atomically */
+	disable_interrupt $r60
+	;;
+	/* Check PL bit of sps, if set, then it means we are returning
+	 * to a lower privilege level (ie to user), if so, we need to
+	 * check work pending. If coming from kernel, directly go to
+	 * register restoration */
+	cb.even $r6? _restore_regs
+	ld $r1 = TASK_TI_FLAGS[$r11]
+	;;
+	/* Do we have work pending ? */
+	andd $r5 = $r1, _TIF_WORK_MASK
+	;;
+	/**
+	 * If we do not have work pending (ie $r5 == 0) then we can
+	 * directly jump to _restore_regs without calling do_work_pending
+	 */
+	cb.deqz $r5? _restore_regs
+	;;
+	/*
+	 * Work pending can potentially call a signal handler and then return
+	 * via rt_sigreturn. Return path will be different (restore all regs)
+	 * and hence all registers are needed to be saved.
+	 */
+	save_callee
+	;;
+	call_do_work_pending
+	;;
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/* reload sps value from saved registers */
+	ld $r6 = PT_SPS[$sp]
+	;;
+#endif
+_restore_regs:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/* Check if IRQs are going to be reenable in next context */
+	andd $r6 = $r6, KVX_SFR_SPS_IE_MASK
+	;;
+	cb.deqz $r6? 1f
+	;;
+	call trace_hardirqs_on
+	;;
+1:
+#endif
+	disable_mmu $r0, PS_HWLOOP_DISABLE
+	;;
+	lo $r0r1r2r3 = __PA(PT_CS_SPC_SPS_ES)[$sp]
+	/* Compute physical address of stack to avoid using 64bits immediate */
+	addd $r11 = $sp, VA_TO_PA_OFFSET
+	;;
+	lo $r4r5r6r7 = PT_LC_LE_LS_RA[$r11]
+	;;
+	lo $r60r61r62r63 = PT_Q60[$r11]
+	;;
+	lo $r56r57r58r59 = PT_Q56[$r11]
+	;;
+	lo $r52r53r54r55 = PT_Q52[$r11]
+	get $r14 = $sps
+	;;
+	lo $r48r49r50r51 = PT_Q48[$r11]
+	/* Generate a mask of ones at each bit where the current $sps
+	 * differs from the $sps to be restored
+	 */
+	xord $r14 = $r2, $r14
+	/* prepare wfxl clear mask on LSBs */
+	notd $r15 = $r2
+	/* prepare wfxl set mask on MSBs */
+	slld $r13 = $r2, 32
+	;;
+	lo $r44r45r46r47 = PT_Q44[$r11]
+	/* Replicate mask of ones on the 32 MSBs */
+	sbmm8 $r14 = $r14, REPLICATE_32_MASK
+	/* Combine the set and clear mask for wfxl */
+	insf  $r13 = $r15, 31, 0
+	;;
+	lo $r40r41r42r43 = PT_Q40[$r11]
+	set $lc = $r4
+	/* Mask to drop identical bits in order to avoid useless
+	 * privilege traps
+	 */
+	andd $r13 = $r13, $r14
+	;;
+	lq $r16r17 = PT_R16R17[$r11]
+	set $le = $r5
+	;;
+	lo $r32r33r34r35 = PT_Q32[$r11]
+	set $ls = $r6
+	;;
+	lo $r36r37r38r39 = PT_Q36[$r11]
+	copyd $r14 = $r11
+	set $ra = $r7
+	;;
+	lo $r8r9r10r11 = PT_Q8[$r14]
+	set $cs = $r0
+	/* MME was disabled by disable_mmu, reenable it before leaving */
+	ord $r13 = $r13, SFR_SET_VAL_WFXL(SPS, MME, 1)
+	;;
+	lo $r4r5r6r7 = PT_Q4[$r14]
+	set $spc = $r1
+	;;
+	lo $r0r1r2r3 = PT_Q0[$r14]
+	/* Store $sps wfxl value in scratch system register */
+	set TMP_SCRATCH_SR = $r13
+	;;
+	lo $r12r13r14r15 = PT_Q12[$r14]
+	rswap $r0 = TMP_SCRATCH_SR
+	;;
+	wfxl $sps = $r0
+	;;
+	/* Finally, restore $r0 value */
+	rswap $r0 = TMP_SCRATCH_SR
+	;;
+	rfe
+	;;
+ENDPROC(return_from_exception)
+
+/***********************************************************************
+*                      Debug handling
+***********************************************************************/
+EXCEPTION_ENTRY(kvx_debug_handler):
+	exception_entry_disable_mmu
+	;;
+	save_quad_regs $r0r1r2r3 $r4
+	;;
+	get $r2 = $sr
+	;;
+	switch_stack $r1 $r2 debug_save_regs
+	;;
+debug_save_regs:
+	save_regs_for_exception $r1 $r2 $r0r1r2r3 $r1
+	;;
+	get $r0 = $ea
+	/* tail-call for return_from_exception */
+	make $r3 = return_from_exception
+	;;
+	set $ra = $r3
+	;;
+	goto debug_handler
+	;;
+ENDPROC(kvx_debug_handler)
+
+/***********************************************************************
+*                      Traps handling
+***********************************************************************/
+/* These labels will be used for instruction patching */
+.global kvx_perf_tlb_refill, kvx_std_tlb_refill
+EXCEPTION_ENTRY(kvx_trap_handler):
+	/* Enable DAUS for physical data access */
+	exception_entry_disable_mmu
+	;;
+	/* Save r3 in a temporary system register to check if the trap is a
+	* nomapping or not */
+	set TMP_SCRATCH_SR = $r3
+	;;
+	get $r3 = $es
+	;;
+	/* Hardware trap cause  */
+	extfz $r3 = $r3, KVX_SFR_END(ES_HTC), KVX_SFR_START(ES_HTC)
+	;;
+	/* Is this a nomapping trap ? */
+	compd.eq $r3 = $r3, KVX_TRAP_NOMAPPING
+	;;
+	/* if nomapping trap, try fast_refill */
+	cb.even $r3? trap_slow_path
+	;;
+	/*
+	 * Fast TLB refill routine
+	 *
+	 * On kvx, we do not have hardware page walking, hence, TLB refill is
+	 * done using the core on no-mapping traps.
+	 * This routine must be as fast as possible to avoid wasting CPU time.
+	 * For that purpose, it is called directly from trap_handle after saving
+	 * only 8 registers ($r0 -> $r7) in a dedicated buffer.
+	 * To avoid taking nomapping while accessing page tables inside this
+	 * refill handler we switch to physical accesses using DAUS.
+	 * Once the switch is done, we save up to 8 registers to compute
+	 * the refill data.
+	 * This allows to avoid computing a complete task switching in order
+	 * to greatly reduce the refill time.
+	 *
+	 * We refill the JTLB which contains 128 sets with 4 way each.
+	 * Currently, the way selection is done using a round robin algorithm.
+	 *
+	 * The refill is using the basic flow:
+	 * 1 -	Enable physical access using DAUS.
+	 * 2 -	Save necessary registers
+	 * 3 -	Walk the page table to find the TLB entry to add (virtual to
+	 *	physical)
+	 * 4 -	Compute the TLB entry to be written (convert PTE to TLB entry)
+	 * 5 - 	Compute the target set (0 -> 127) for the new TLB entry
+	 *	This is done by extracting the 6 lsb of page number
+	 * 6 -	Get the current way to be used which is selected using a
+		simple round robin
+	 * 7 -	Mark PTE entry as _PAGE_ACCESSED (and optionally PAGE_DIRTY)
+	 * 8 -	Commit the new tlb entry
+	 * 9 -  Restore the virtual memory by disabling DAUS.
+	 *
+	 */
+	/* Get current task */
+	rswap $r63 = $sr
+	;;
+#ifdef CONFIG_KVX_MMU_STATS
+	get $r3 = $pm0
+	;;
+	sd __PA(TASK_THREAD_ENTRY_TS)[$r63] = $r3
+	;;
+#endif
+	/* Restore $r3 from temporary system scratch register */
+	get $r3 = TMP_SCRATCH_SR
+	/* Save registers to save $tel, $teh and $mmc */
+	so __PA(TASK_THREAD_SAVE_AREA_QUAD(2))[$r63] = $r8r9r10r11
+	;;
+	get $r8 = $tel
+	/* Save register for refill handler */
+	so __PA(TASK_THREAD_SAVE_AREA_QUAD(1))[$r63] = $r4r5r6r7
+	;;
+	/* Get exception address */
+	get $r0 = $ea
+	/* Save more registers to be comfy */
+	so __PA(TASK_THREAD_SAVE_AREA_QUAD(0))[$r63] = $r0r1r2r3
+	;;
+	get $r9 = $teh
+	;;
+	get $r10 = $mmc
+	;;
+	/* Restore $r63 value */
+	rswap $r63 = $sr
+	/* Check kernel address range */
+	addd $r4 = $r0, -KERNEL_DIRECT_MEMORY_MAP_BASE
+	/* Load task active mm for pgd loading in macro_tlb_refill */
+	ld $r1 = __PA(TASK_ACTIVE_MM)[$r63]
+	;;
+kvx_perf_tlb_refill:
+	/* Check if the address is in the kernel direct memory mapping */
+	compd.ltu $r3 = $r4, KERNEL_DIRECT_MEMORY_MAP_SIZE
+	/* Clear low bits of virtual address to align on page size */
+	andd $r5 = $r0, ~(REFILL_PERF_PAGE_SIZE - 1)
+	/* Create corresponding physical address */
+	addd $r2 = $r4, PHYS_OFFSET
+	;;
+	/* If address is not a kernel one, take the standard path */
+	cb.deqz $r3? kvx_std_tlb_refill
+	/* Prepare $teh value with virtual address and kernel value */
+	ord $r7 = $r5, REFILL_PERF_TEH_VAL
+	;;
+	/* Get $pm0 value as a pseudo random value for LTLB way to use */
+	get $r4 = $pm0
+	/* Clear low bits of physical address to align on page size */
+	andd $r2 = $r2, ~(REFILL_PERF_PAGE_SIZE - 1)
+	/* Prepare value for $mmc wfxl to select LTLB and correct way */
+	make $r5 = MMC_SEL_LTLB_CLEAR_WAY
+	;;
+	/* Keep low bits of timer value */
+	andw $r4 = $r4, (REFILL_PERF_ENTRIES - 1)
+	/* Get current task pointer for register restoration */
+	get $r11 = $sr
+	;;
+	/* Add LTLB base way number for kernel refill way */
+	addw $r4 = $r4, LTLB_KERNEL_RESERVED
+	/* Prepare $tel value with physical address and kernel value */
+	ord $r6 = $r2, REFILL_PERF_TEL_VAL
+	set $teh = $r7
+	;;
+	/* insert way in $mmc wfxl value */
+	insf $r5 = $r4, KVX_SFR_END(MMC_SW) + 32, KVX_SFR_START(MMC_SW) + 32
+	set $tel = $r6
+	;;
+	wfxl $mmc = $r5
+	;;
+	goto do_tlb_write
+	;;
+kvx_std_tlb_refill:
+	/* extract PGD offset */
+	extfz $r3 = $r0, (ASM_PGDIR_SHIFT + ASM_PGDIR_BITS - 1), ASM_PGDIR_SHIFT
+	/* is mm NULL ? if so, use init_mm */
+	cmoved.deqz $r1? $r1 = init_mm
+	;;
+	get $r7 = $pcr
+	/* Load pgd base address into $r1 */
+	ld $r1 = __PA(MM_PGD)[$r1]
+	;;
+	/* Add offset for physical address */
+	addd $r1 = $r1, VA_TO_PA_OFFSET
+	/* Extract processor ID to compute cpu_offset*/
+	extfz $r7 = $r7, KVX_SFR_END(PCR_PID), KVX_SFR_START(PCR_PID)
+	;;
+	/* Load PGD entry offset */
+	ld.xs $r1 = $r3[$r1]
+	/* Load per_cpu_offset */
+#if defined(CONFIG_SMP)
+	make $r5 = __PA(__per_cpu_offset)
+#endif
+	;;
+	/* extract PMD offset*/
+	extfz $r3 = $r0, (ASM_PMD_SHIFT + ASM_PMD_BITS - 1), ASM_PMD_SHIFT
+	/* If pgd entry is null -> out */
+	cb.deqz $r1? refill_err_out
+#if defined(CONFIG_SMP)
+	/* Load cpu offset */
+	ld.xs $r7 = $r7[$r5]
+#else
+	/* Force cpu offset to 0 */
+	make $r7 = 0
+#endif
+	;;
+	/* Load PMD entry offset and keep pointer to the entry for huge page */
+	addx8d $r2 = $r3, $r1
+	ld.xs $r1 = $r3[$r1]
+	;;
+	/* Check if it is a huge page (2Mb or 512Mb in PMD table)*/
+	andd $r6 = $r1, _PAGE_HUGE
+	/* If pmd entry is null -> out */
+	cb.deqz $r1? refill_err_out
+	/* extract PTE offset */
+	extfz $r3 = $r0, (PAGE_SHIFT + 8), PAGE_SHIFT
+	;;
+	/*
+	 * If the page is a huge one we already have set the PTE and the
+	 * pointer to the PTE.
+	 */
+	cb.dnez $r6? is_huge_page
+	;;
+	/* Load PTE entry */
+	ld.xs $r1 = $r3[$r1]
+	addx8d $r2 = $r3, $r1
+	;;
+	/* Check if it is a huge page (64Kb in PTE table) */
+	andd $r6 = $r1, _PAGE_HUGE
+	;;
+	/* Check if PTE entry is for a huge page */
+	cb.dnez $r6? is_huge_page
+	;;
+	/* 4K: Extract set value */
+	extfz $r0 = $r1, (PAGE_SHIFT + KVX_SFR_MMC_SS_WIDTH - 1), PAGE_SHIFT
+	/* 4K: Extract virt page from ea */
+	andd $r4 = $r0, PAGE_MASK
+	;;
+/*
+ * This path expects the following:
+ * - $r0 = set index
+ * - $r1 = pte entry
+ * - $r2 = pte entry address
+ * - $r4 = virtual page address
+ */
+pte_prepared:
+	/* Compute per_cpu_offset + current way of set address */
+	addd $r5 = $r0, $r7
+	/* Get exception cause for access type handling (page dirtying) */
+	get $r7 = $es
+	/* Clear way and select JTLB */
+	make $r6 = MMC_SEL_JTLB_CLEAR_WAY
+	;;
+	/* Load current way to use for current set */
+	lbz $r0 = __PA(jtlb_current_set_way)[$r5]
+	/* Check if the access was a "write" access */
+	andd $r7 = $r7, (KVX_TRAP_RWX_WRITE << KVX_SFR_ES_RWX_SHIFT)
+	;;
+	/* If bit PRESENT of pte entry is 0, then entry is not present */
+	cb.even $r1? refill_err_out
+	/*
+	 * Set the JTLB way in $mmc value, add 32 bits to be in the set part.
+	 * Since we are refilling JTLB, we must make sure we insert only
+	 * relevant bits (ie 2 bits for ways) to avoid using nonexistent ways.
+	 */
+	insf $r6 = $r0, KVX_SFR_START(MMC_SW) + 32 + (MMU_JTLB_WAYS_SHIFT - 1),\
+						KVX_SFR_START(MMC_SW) + 32
+	/* Extract page global bit */
+	extfz $r3 = $r1, _PAGE_GLOBAL_SHIFT, _PAGE_GLOBAL_SHIFT
+	/* Increment way value, note that we do not care about overflow since
+	 * we only use the two lower byte */
+	addd $r0 = $r0, 1
+	;;
+	/* Prepare MMC */
+	wfxl $mmc, $r6
+	;;
+	/* Insert global bit (if any) to its position into $teh value */
+	insf $r4 = $r3, KVX_SFR_TEH_G_SHIFT, KVX_SFR_TEH_G_SHIFT
+	/* If "write" access ($r7 != 0), then set it as dirty */
+	cmoved.dnez $r7? $r7 = _PAGE_DIRTY
+	/* Clear bits not related to FN in the pte entry for TEL writing */
+	andd $r6 = $r1, KVX_PFN_MASK
+	/* Store new way */
+	sb __PA(jtlb_current_set_way)[$r5] = $r0
+	;;
+	/* Extract access perms from pte entry (discard PAGE_READ bit +1) */
+	extfz $r3 = $r1, KVX_ACCESS_PERM_STOP_BIT, KVX_ACCESS_PERM_START_BIT + 1
+	/* Move FN bits to their place */
+	srld $r6 = $r6, KVX_PFN_SHIFT - PAGE_SHIFT
+	/* Extract the page size + cache policy */
+	andd $r0 = $r1, (KVX_PAGE_SZ_MASK | KVX_PAGE_CP_MASK)
+	/* Prepare SBMM value */
+	make $r5 = KVX_SBMM_BYTE_SEL
+	;;
+	/* Add page size + cache policy to $tel value */
+	ord $r6 = $r6, $r0
+	/* Get $mmc to get current ASN */
+	get $r0 = $mmc
+	/* Add _PAGE_ACCESSED bit to PTE entry for writeback */
+	ord $r7 = $r7, _PAGE_ACCESSED
+	;;
+	/* OR PTE value with accessed/dirty flags */
+	ord $r1 = $r1, $r7
+	/* Generate the byte selection for sbmm */
+	slld $r5 = $r5, $r3
+	/* Compute the mask to extract set and mask exception address */
+	make $r7 = KVX_PAGE_PA_MATRIX
+	;;
+	ord $r0 = $r6, TEL_DEFAULT_VALUE
+	/* Add ASN from mmc into future $teh value */
+	insf $r4 = $r0, KVX_SFR_END(MMC_ASN), KVX_SFR_START(MMC_ASN)
+	/* Get the page permission value */
+	sbmm8 $r6 = $r7, $r5
+	/* Check PAGE_READ bit in PTE entry */
+	andd $r3 = $r1, _PAGE_READ
+	;;
+	/* If PAGE_READ bit is not set, set policy as NA_NA */
+	cmoved.deqz $r3? $r6 = TLB_PA_NA_NA
+	;;
+	/* Shift PA to correct position */
+	slld $r6 = $r6, KVX_SFR_TEL_PA_SHIFT
+	set $teh = $r4
+	;;
+	/* Store updated pte entry */
+	sd 0[$r2] = $r1
+	/* Prepare tel */
+	ord $r6 = $r0, $r6
+	/* Get current task pointer for register restoration */
+	get $r11 = $sr
+	;;
+	set $tel = $r6
+	;;
+do_tlb_write:
+	tlbwrite
+	;;
+#ifdef CONFIG_KVX_DEBUG_TLB_WRITE
+	goto mmc_error_check
+	;;
+mmc_error_check_ok:
+#endif
+#ifdef CONFIG_KVX_DEBUG_ASN
+	goto asn_check
+	;;
+asn_check_ok:
+#endif
+	set $tel = $r8
+	/* Restore registers */
+	lo $r4r5r6r7 = __PA(TASK_THREAD_SAVE_AREA_QUAD(1))[$r11]
+	;;
+	set $teh = $r9
+	lo $r0r1r2r3 = __PA(TASK_THREAD_SAVE_AREA_QUAD(0))[$r11]
+	;;
+	set $mmc = $r10
+	;;
+	lo $r8r9r10r11 = __PA(TASK_THREAD_SAVE_AREA_QUAD(2))[$r11]
+	;;
+#ifdef CONFIG_KVX_MMU_STATS
+	/*
+	 * Fence to simulate a direct data dependency after returning from trap
+	 * nomapping handling. This is the worst case that can happen and the
+	 * processor will be stalled waiting for previous loads to complete.
+	 */
+	fence
+	;;
+	get $r4 = $pm0
+	;;
+	get $r0 = $sr
+	;;
+	/* Get cycles measured on trap entry */
+	ld $r1 = __PA(TASK_THREAD_ENTRY_TS)[$r0]
+	;;
+	/* Compute refill time */
+	sbfd $r0 = $r1, $r4
+	;;
+#ifdef CONFIG_SMP
+	get $r1 = $pcr
+	;;
+	/* Extract processor ID to compute cpu_offset */
+	extfz $r1 = $r1, KVX_SFR_END(PCR_PID), KVX_SFR_START(PCR_PID)
+	make $r2 = __PA(__per_cpu_offset)
+	;;
+	/* Load cpu offset */
+	ld.xs $r1 = $r1[$r2]
+	;;
+	addd $r1 = $r1, __PA(mmu_stats)
+	;;
+#else
+	make $r1 = __PA(mmu_stats)
+	;;
+#endif
+	/* Load time between refill + last refill cycle */
+	lq $r2r3 = MMU_STATS_CYCLES_BETWEEN_REFILL_OFF[$r1]
+	;;
+	/* Set last update time to current if 0 */
+	cmoved.deqz $r3? $r3 = $r4
+	/* remove current refill time to current cycle */
+	sbfd $r4 = $r0, $r4
+	;;
+	/* Compute time between last refill and current refill */
+	sbfd $r5 = $r3, $r4
+	/* Update last cycle time */
+	copyd $r3 = $r4
+	;;
+	/* Increment total time between refill */
+	addd $r2 = $r2, $r5
+	;;
+	sq MMU_STATS_CYCLES_BETWEEN_REFILL_OFF[$r1] = $r2r3
+	/* Get exception address */
+	get $r4 = $ea
+	;;
+	/* $r2 holds refill type (user/kernel/kernel_direct) */
+	make $r2 = MMU_STATS_REFILL_KERNEL_OFF
+	/* Check if address is a kernel direct mapping one */
+	compd.ltu $r3 = $r4, (KERNEL_DIRECT_MEMORY_MAP_BASE + \
+			      KERNEL_DIRECT_MEMORY_MAP_SIZE)
+	;;
+	cmoved.dnez $r3? $r2 = MMU_STATS_REFILL_KERNEL_DIRECT_OFF
+	/* Check if address is a user (ie below kernel) */
+	compd.ltu $r3 = $r4, KERNEL_DIRECT_MEMORY_MAP_BASE
+	;;
+	cmoved.dnez $r3? $r2 = MMU_STATS_REFILL_USER_OFF
+	;;
+	/* Compute refill struct addr into one reg */
+	addd $r1 = $r2, $r1
+	/* Load refill_struct values */
+	lo $r4r5r6r7 = $r2[$r1]
+	;;
+	/* Increment count */
+	addd $r4 = $r4, 1
+	/* Increment total cycles count */
+	addd $r5 = $r5, $r0
+	;;
+	/* Set min to ~0 if 0 */
+	cmoved.deqz $r6? $r6 = ~0
+	;;
+	/* Compare min and max */
+	compd.ltu $r2 = $r0, $r6
+	compd.gtu $r3 = $r0, $r7
+	;;
+	/* Update min and max*/
+	cmoved.dnez $r2? $r6 = $r0
+	cmoved.dnez $r3? $r7 = $r0
+	;;
+	/* store back all values */
+	so 0[$r1] = $r4r5r6r7
+	;;
+	get $r0 = $sr
+	;;
+	/* Restore clobberred registers */
+	lo $r4r5r6r7 = __PA(TASK_THREAD_SAVE_AREA_QUAD(1))[$r0]
+	;;
+	lo $r0r1r2r3 = __PA(TASK_THREAD_SAVE_AREA_QUAD(0))[$r0]
+	;;
+#endif
+	/* Save $r4 for reenabling mmu and data cache in sps */
+	set TMP_SCRATCH_SR = $r4
+	/* Enable MME in $sps */
+	make $r4 = MME_WFXL(1)
+	;;
+	/* Reenable $mme in $sps */
+	wfxl $sps = $r4
+	;;
+	get $r4 = TMP_SCRATCH_SR
+	;;
+	rfe
+	;;
+
+is_huge_page:
+	/*
+	 * When entering this path:
+	 * - $r0 = $ea
+	 * - $r1 = pte entry
+	 * - $r7 = cpu offset for tlb_current_set_way
+	 *
+	 * From now on, we have the pte value in $r1 so we can extract the page
+	 * size. This value is stored as it is expected by the MMU (ie between
+	 * 0 and 3).
+	 * Note that page size value is located at the same place as in $tel
+	 * and this is checked at build time so we can use TEL_PS defines.
+	 * In this codepath, we will extract the set and mask exception address
+	 * and align virt and phys address with what the hardware expect.
+	 * Indeed, MMU expect lsb of the virtual and physycal address to be 0
+	 * according to page size.
+	 * This means that for 4K pages, the 12 lsb must be 0, for 64K
+	 * pages, the 16 lsb must be 0 and so on.
+	 */
+	extfz $r5 = $r1, KVX_SFR_END(TEL_PS), KVX_SFR_START(TEL_PS)
+	/* Compute the mask to extract set and mask exception address */
+	make $r4 = KVX_PS_SHIFT_MATRIX
+	make $r6 = KVX_SBMM_BYTE_SEL
+	;;
+	/* Generate the byte selection for sbmm */
+	slld $r6 = $r6, $r5
+	;;
+	/* Get the shift value */
+	sbmm8 $r5 = $r4, $r6
+	make $r4 = 0xFFFFFFFFFFFFFFFF
+	;;
+	/* extract TLB set from ea (6 lsb of virtual page) */
+	srld $r5 = $r0, $r5
+	/* Generate ea masking according to page shift */
+	slld $r4 = $r4, $r5
+	;;
+	/* Mask to get the set value */
+	andd $r0 = $r5, MMU_SET_MASK
+	/* Extract virt page from ea */
+	andd $r4 = $r0, $r4
+	;;
+	/* Returned to fast path */
+	goto pte_prepared
+	;;
+
+#ifdef CONFIG_KVX_DEBUG_TLB_WRITE
+mmc_error_check:
+	get $r1 = $mmc
+	;;
+	andd $r1 = $r1, KVX_SFR_MMC_E_MASK
+	;;
+	cb.deqz $r1? mmc_error_check_ok
+	;;
+	make $r0 = mmc_error_panic_str_label
+	goto asm_panic
+	;;
+#endif
+#ifdef CONFIG_KVX_DEBUG_ASN
+/*
+ * When entering this path $r11 = $sr.
+ * WARNING: Do not clobber it here if you don't want to mess up with registers
+ * restoration above.
+ */
+asn_check:
+	get $r1 = $ea
+	;;
+	/* Check if kernel address, if so, there is no ASN */
+	compd.geu $r2 = $r1, PAGE_OFFSET
+	;;
+	cb.dnez $r2? asn_check_ok
+	;;
+	get $r2 = $pcr
+	/* Load active mm addr */
+	ld $r3 = __PA(TASK_ACTIVE_MM)[$r11]
+	;;
+	get $r5 = $mmc
+	/* Extract processor ID to compute cpu_offset*/
+	extfz $r2 = $r2, KVX_SFR_END(PCR_PID), KVX_SFR_START(PCR_PID)
+	addd $r3 = $r3, MM_CTXT_ASN
+	;;
+	extfz $r4 = $r5, KVX_SFR_END(MMC_ASN), KVX_SFR_START(MMC_ASN)
+	addd $r3 = $r3, VA_TO_PA_OFFSET
+	;;
+	/* Load current asn from active_mm */
+	ld.xs $r3 = $r2[$r3]
+	;;
+	/* Error if ASN is not set */
+	cb.deqz $r3? asn_check_err
+	/* Mask $r3 asn cycle part */
+	andd $r5 = $r3, ((1 << KVX_SFR_MMC_ASN_WIDTH) - 1)
+	;;
+	/* Compare asn in $mmc and asn in current task mm */
+	compd.eq $r3 = $r5, $r4
+	;;
+	cb.dnez $r3? asn_check_ok
+	;;
+asn_check_err:
+	/* We are fried, die peacefully */
+	make $r0 = asn_error_panic_str_label
+	goto asm_panic
+	;;
+#endif
+
+#if defined(CONFIG_KVX_DEBUG_ASN) || defined(CONFIG_KVX_DEBUG_TLB_WRITE)
+
+/**
+ * This routine calls panic from assembly after setting appropriate things
+ * $r0 = panic string
+ */
+asm_panic:
+	/*
+	 * Reenable hardware loop and traps (for nomapping) since some functions
+	 * might need it. Moreover, disable DAUS to reenable MMU accesses.
+	 */
+	make $r32 = PS_HWLOOP_EN_ET_CLEAR_DAUS_DIS
+	make $r33 = 0
+	get $r34 = $sr
+	;;
+	/* Clear hw loop exit to disable current loop */
+	set $le = $r33
+	;;
+	wfxl $ps = $r32
+	;;
+	/* Restore kernel stack */
+	ld $r12 = TASK_THREAD_KERNEL_SP[$r34]
+	;;
+	call panic
+	;;
+#endif
+
+/* Error path for TLB refill */
+refill_err_out:
+	get $r2 = $sr
+	;;
+	/* Restore clobbered registers */
+	lo $r8r9r10r11 = __PA(TASK_THREAD_SAVE_AREA_QUAD(2))[$r2]
+	;;
+	lo $r4r5r6r7 = __PA(TASK_THREAD_SAVE_AREA_QUAD(1))[$r2]
+	goto trap_switch_stack
+	;;
+
+/* This path is entered only when the trap is NOT a NOMAPPING */
+trap_slow_path:
+	/* Restore $r3 from temporary scratch system register */
+	get $r3 = TMP_SCRATCH_SR
+	;;
+	save_quad_regs $r0r1r2r3 $r4
+	;;
+	get $r2 = $sr
+	;;
+trap_switch_stack:
+	switch_stack $r1 $r2 trap_save_regs
+	;;
+trap_save_regs:
+	save_regs_for_exception $r1 $r2 $r0r1r2r3 $r2
+	;;
+	get $r1 = $ea
+	/* tail-call for return_from_exception */
+	make $r3 = return_from_exception
+	;;
+	set $ra = $r3
+	;;
+	/* Handler call */
+	get $r0 = $es
+	;;
+	goto trap_handler
+	;;
+ENDPROC(kvx_trap_handler)
+
+/***********************************************************************
+*                      Interrupts handling
+***********************************************************************/
+EXCEPTION_ENTRY(kvx_interrupt_handler):
+	exception_entry_disable_mmu
+	;;
+	save_quad_regs $r0r1r2r3 $r4
+	;;
+	get $r0 = $sr
+	;;
+	switch_stack $r1 $r0 it_save_regs
+	;;
+#ifdef CONFIG_SECURE_DAME_HANDLING
+	/**
+	 * In order to securely Handle Data Asynchronous Memory Error,
+	 * we need to have a correct entry point. This means we do not
+	 * want to handle a user induced DAME interrupt when entering
+	 * kernel.
+	 * In order to do that, we need to do a barrier, which will
+	 * reflect the DAME status in $ilr (if any).
+	 */
+	barrier
+	;;
+#endif
+it_save_regs:
+	save_regs_for_exception $r1 $r0 $r0r1r2r3 $r1
+	;;
+	get $r0 = $ilr
+	;;
+	get $r2 = $ile
+	;;
+	/**
+	 * When an interrupt happens, the processor automatically clears the
+	 * corresponding bit in $ilr. However, as we are using $ilr to get the
+	 * list of irqs we want to handle, we need to add the automatically
+	 * cleared interrupt bit. This is done by getting the interrupt number
+	 * from $es.
+	 */
+	get $r3 = $es
+	make $r4 = 1
+	;;
+	/* Extract interrupt number from $es */
+	extfz $r3 = $r3, KVX_SFR_END(ES_ITN), KVX_SFR_START(ES_ITN)
+	/**
+	 * Mask requests with enabled line since ILR will also contain disabled
+	 * interrupt lines (ie not enabled in $ile) and we want to respect the
+	 * current state of interrupt lines.
+	 */
+	andd $r0 = $r0, $r2
+	;;
+	/* Clear $ilr with bits we are going to handle */
+	wfxl $ilr = $r0
+	slld $r4 = $r4, $r3
+	;;
+	/* OR the irq mask with the current pending irq */
+	ord $r0 = $r0, $r4
+	call do_IRQ
+	;;
+	/* From now on, lower the interrupt level (IL) to allow IT nesting.
+	 * If returning to user, we will call schedule which will reenable
+	 * interrupts by itself when ready.
+	 * If returning to kernel and with CONFIG_PREEMPT, we will call
+	 * preempt_schedule_irq which will do the same.
+	 */
+	make $r0 = PS_IL_CLEAR
+	;;
+	wfxl $ps = $r0
+	;;
+	goto return_from_exception
+	;;
+ENDPROC(kvx_interrupt_handler)
+
+/***********************************************************************
+*                      Syscall handling
+***********************************************************************/
+EXCEPTION_ENTRY(kvx_syscall_handler):
+	/**
+	 * Syscalls are seen as standard func call from ABI POV
+	 * We may use all caller saved register whithout causing havoc
+	 * in the userspace. We need to save callee registers because they
+	 * will be restored when returning from fork.
+	 * Note that r0 -> r11 MUST not be used since they are
+	 * containing syscall parameters !
+	 * During this function, $r38 is the syscall handler address. Hence,
+	 * this register must not be clobberred during function calls (tracing
+	 * for instance.
+	 */
+	disable_mmu $r43
+	;;
+	get $r43 = $es
+	copyd $r52 = $sp
+	copyd $r53 = $tp
+	;;
+	get $r36 = $sr
+	copyd $r54 = $fp
+	copyd $r55 = $r15
+	;;
+	/* Extract syscall number */
+	extfz $r32 = $r43, KVX_SFR_END(ES_SN), KVX_SFR_START(ES_SN)
+	/* Get regs to save on stack */
+	get $r63 = $ra
+	addd $r36 = $r36, VA_TO_PA_OFFSET
+	;;
+	ld $r39 = TASK_TI_FLAGS[$r36]
+	get $r41 = $spc
+	make $r42 = __PA(sys_call_table)
+	;;
+	/* Check for out-of-bound syscall number */
+	sbfd $r50 = $r32, __NR_syscalls
+	/* Compute syscall func addr (ie sys_call_table[$r32])*/
+	ld.xs $r38 = $r32[$r42]
+	get $r42 = $sps
+	;;
+	/* True if trace syscall enable */
+	andd $r37 = $r39, _TIF_SYSCALL_WORK
+	/* Restore kernel stack pointer */
+	ld $sp = TASK_THREAD_KERNEL_SP[$r36]
+	/* If the syscall number is invalid, set invalid handler */
+	cmoved.dlez $r50? $r38 = sys_ni_syscall
+	;;
+	/* Prepare space on stack */
+	addd $sp = $sp, -PT_SIZE_ON_STACK
+	get $r40 = $cs
+	/* Save regs r0 -> r3 in pt_regs for restart & trace if needed */
+	so __PA(PT_Q0 - PT_SIZE_ON_STACK)[$sp] = $r0r1r2r3
+	;;
+	/* Store user stack pointer, frame pointer, thread pointer and r15 */
+	so __PA(PT_Q12)[$sp] = $r52r53r54r55
+	addd $r36 = $sp, VA_TO_PA_OFFSET
+	get $r60 = $lc
+	;;
+#ifdef CONFIG_SECURE_DAME_HANDLING
+	get $r44 = $ilr
+	make $r45 = SFR_CLEAR_WFXL(ILR, IT16);
+	;;
+	/* Extract $ilr.dame bit */
+	extfz $r44 = $r44, KVX_SFR_END(ILR_IT16), KVX_SFR_START(ILR_IT16)
+	/* Save $ilr value */
+	sd PT_ILR[$r36] = $r44
+	/* Clear $ilr.dame */
+	wfxl $ilr = $r45
+	;;
+#endif
+	so PT_CS_SPC_SPS_ES[$r36] = $r40r41r42r43
+	get $r61 = $le
+	make $r43 = 0x0
+	;;
+	/* Reenable hardware loops, IT, exceptions and disable DAUS */
+	make $r44 = PS_HLE_EN_IT_EN_ET_CLEAR_DAUS_DIS
+	get $r62 = $ls
+	/* Save regs r4 -> r7 in pt_regs for restart & trace if needed */
+	so PT_Q4[$r36] = $r4r5r6r7
+	;;
+	/* Clear $le on entry */
+	set $le = $r43
+	/* Save hw loop stuff */
+	so PT_LC_LE_LS_RA[$r36] = $r60r61r62r63
+	/* Clear frame pointer for kernel */
+	make $fp = 0
+	;;
+	/* Enable hwloop and interrupts and MMU
+	 * Note that we have to reenable interrupts after saving context
+	 * to avoid losing registers content */
+	wfxl $ps, $r44
+	;;
+	/* Do we have to trace the syscall ? */
+	cb.dnez $r37? trace_syscall_enter
+	/* Stroe original r0 value */
+	sd PT_ORIG_R0[$sp] = $r0
+	;;
+do_syscall:
+	/* Call the syscall handler */
+	icall $r38
+	;;
+	/*
+	 * rt_sigreturn syscall will not take this exit path (see
+	 * sys_rt_sigreturn for more information).
+	 */
+	/* Reload task flags since the syscall might have generated a signal*/
+	get $r36 = $sr
+	;;
+#ifdef CONFIG_SECURE_DAME_HANDLING
+	/* Barrier to force DAME interrupt to be generated if any pending */
+	barrier
+	;;
+#endif
+	/* Disable interrupt to check task flags atomically */
+	disable_interrupt $r60
+	;;
+	ld $r38 = TASK_TI_FLAGS[$r36]
+	;;
+	andd $r37 = $r38, _TIF_SYSCALL_WORK
+	;;
+	/* Save r0 which was returned from do_scall previously and will be
+	 * clobberred by do_work_pending (and potentially do_syscall_trace_exit
+	 * if tracing is enabled)
+	 * If do_signal is called and that syscall is restarted,
+	 * it will be modified by handle_restart to restore original
+	 * r0 value
+	 */
+	sd PT_Q0[$sp] = $r0
+	/* used to store if trace system */
+	cb.dnez $r37? trace_syscall_exit
+	;;
+check_work_pending:
+	/* Do we have work pending ? */
+	andd $r1 = $r38, _TIF_WORK_MASK
+	;;
+	/* If no work pending, directly continue to ret_to_user */
+	cb.dnez $r1? call_work_pending
+	;;
+ret_to_user:
+	disable_mmu $r60, PS_HWLOOP_DISABLE
+	;;
+	/* Compute $sp virtual address to avoid using 64 bits offset */
+	addd $r15 = $sp, VA_TO_PA_OFFSET
+	;;
+	/* Restore registers */
+	lo $r60r61r62r63 = PT_LC_LE_LS_RA[$r15]
+	get $r33 = $sps
+	;;
+	lo $r40r41r42r43 = PT_CS_SPC_SPS_ES[$r15];
+	set $ra = $r63
+	;;
+	/* Restore syscall arguments since they might be needed for
+	 * syscall restart
+	 */
+	lo $r0r1r2r3 = PT_Q0[$r15]
+	set $cs = $r40
+	/* Generate a mask of ones at each bit where the current $sps
+	 * differs from the $sps to be restored
+	 */
+	xord $r33 = $r42, $r33
+	/* prepare wfxl clear mask on LSBs */
+	notd $r34 = $r42
+	/* prepare wfxl set mask on MSBs */
+	slld $r35 = $r42, 32
+	;;
+	set $lc = $r60
+	/* Replicate mask of ones on the 32 MSBs */
+	sbmm8 $r33 = $r33, REPLICATE_32_MASK
+	/* Combine the set and clear mask for wfxl */
+	insf  $r35 = $r34, 31, 0
+	;;
+	lo $r4r5r6r7 = PT_Q4[$r15]
+	set $le = $r61
+	/* Mask to drop identical bits in order to avoid useless
+	 * privilege traps
+	 */
+	andd $r35 = $r35, $r33
+	;;
+	/* Restore $ilr */
+	ld $r44 = PT_ILR[$r15]
+	set $ls = $r62
+	/* Reenable MMU in $sps */
+	ord $r35 = $r35, SFR_SET_VAL_WFXL(SPS, MME, 1)
+	;;
+	/* Restore user pointer */
+	lo $r12r13r14r15 = PT_Q12[$r15]
+	/* Prepare $r44 as a set mask for $ilr wfxl */
+	slld $r44 = $r44,  32
+	;;
+	/**
+	 * wfxl on $ilr to avoid privilege traps when restoring with set
+	 * Note that we do that after disabling interrupt since we explicitly
+	 * want to serve DAME itnerrupt to the user (ie not in kernel mode).
+	 */
+	wfxl $ilr = $r44
+	;;
+	wfxl $sps = $r35
+	;;
+	set $spc = $r41
+	;;
+	/* TODO: we might have to clear some registers to avoid leaking
+	 * information to user space ! callee saved regs have been
+	 * restored by called function but caller saved regs might
+	 * have been used without being cleared */
+	rfe
+	;;
+
+/* Slow paths handling */
+call_work_pending:
+	/*
+	 * Work pending can potentially call a signal handler and then return
+	 * via rt_sigreturn. Return path will be different (restore all regs)
+	 * and hence all registers are needed to be saved.
+	 */
+	save_callee
+	;;
+	call_do_work_pending
+	;;
+	/* Since we are returning to user, interrupts will be reenabled */
+	call_trace_hardirqs on
+	;;
+	goto ret_to_user
+	;;
+
+trace_syscall_enter:
+	/* Save $r38 (syscall handler) which was computed above */
+	sd PT_R38[$sp] = $r38
+	;;
+	/* do_syscall_trace_enter expect pt_regs and syscall number
+	 * as argument */
+	copyd $r0 = $sp
+	copyd $r1 = $r32
+	;;
+	call do_syscall_trace_enter
+	;;
+	/* Restore r38 (syscall handler) which might has been clobbered by
+	 * do_syscall_trace_enter */
+	ld $r38 = PT_R38[$sp]
+	;;
+	/* if the trace system requested to abort syscall, set $r38 to
+	 * non implemented syscall */
+	cmoved.dnez $r0? $r38 = sys_ni_syscall
+	;;
+	/* Restore registers since the do_syscall_trace_enter call might
+	 * have clobbered them and we need them for the actual syscall
+	 * call */
+	lo $r0r1r2r3 = PT_Q0[$sp]
+	;;
+	lo $r4r5r6r7 = PT_Q4[$sp]
+	;;
+	goto do_syscall
+	;;
+trace_syscall_exit:
+	copyd $r0 = $sp
+	call do_syscall_trace_exit
+	;;
+	/* ptrace_notify might re-enable interrupts, disable them to be sure
+	 * it will not mess up context restoration path */
+	disable_interrupt $r36
+	;;
+	get $r36 = $sr
+	;;
+	ld $r38 = TASK_TI_FLAGS[$r36]
+	goto check_work_pending
+	;;
+ENDPROC(kvx_syscall_handler)
+
+.text
+/**
+ * sys_rt_sigreturn: Special handler for sigreturn
+ * rt_sigreturn is called after invoking a user signal handler (see
+ * user_scall_rt_sigreturn). Since this signal handler can be invoked after
+ * interrupts have been handled, this means we must restore absolutely all user
+ * registers. Normally, this has been done by setup_sigcontext which saved all
+ * user registers on the user stack.
+ * In restore sigcontext, they have been restored onto entry stack (stack to be
+ * restored). However, the standard syscall path do not restore it completely
+ * since only callee-saved registers are restored for fork and al.
+ * Here, we will restore all registers which might have been clobberred.
+ */
+ENTRY(sys_rt_sigreturn)
+	/*
+	 * If syscall tracing is enable (stored in $r37 during syscall
+	 * fastpath), tail-call to trace_exit. If not, tail-call to
+	 * ret_from_kernel.
+	 */
+	cmoved.dnez $r37? $r38 = scall_trace_exit
+	cmoved.deqz $r37? $r38 = ret_from_kernel
+	;;
+	set $ra = $r38
+	;;
+	goto _sys_rt_sigreturn
+	;;
+scall_trace_exit:
+	copyd $r0 = $sp
+	call do_syscall_trace_exit
+	;;
+	goto ret_from_kernel
+	;;
+ENDPROC(sys_rt_sigreturn)
+
+/**
+ * __sys_clone: slow path handler for clone
+ *
+ * Save callee registers (from $r18 to $31) since they are needed for
+ * child process when restoring it.
+ * Indeed, when forking we want it to have the same registers contents
+ * as its parent. These registers will then be restored in
+ * ret_from_fork.
+ * This slow path saves them out of the fast path to avoid bloating all syscall
+ * just for one special case.
+ */
+ENTRY(__sys_clone)
+	save_callee
+	;;
+	/*
+	 * Use goto since we want sys_clone to "ret" to the previous caller.
+	 * This allows to simply go back in the normal syscall fastpath
+	 */
+	goto sys_clone
+	;;
+ENDPROC(__sys_clone)
+/***********************************************************************
+*                      Context switch
+***********************************************************************/
+
+#define SAVE_TCA_REG(__reg_num, __task_reg, __zero_reg1, __zero_reg2) \
+	xso (CTX_SWITCH_TCA_REGS + (__reg_num * TCA_REG_SIZE))[__task_reg] = \
+							$a##__reg_num ;\
+	movetq $a##__reg_num##.lo = __zero_reg1, __zero_reg2 ;\
+	movetq $a##__reg_num##.hi = __zero_reg1, __zero_reg2 ;\
+	;;
+
+.macro save_tca_regs task_reg zero_reg1 zero_reg2
+	SAVE_TCA_REG(0, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(1, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(2, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(3, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(4, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(5, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(6, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(7, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(8, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(9, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(10, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(11, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(12, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(13, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(14, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(15, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(16, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(17, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(18, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(19, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(20, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(21, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(22, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(23, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(24, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(25, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(26, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(27, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(28, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(29, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(30, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(31, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(32, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(33, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(34, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(35, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(36, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(37, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(38, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(39, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(40, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(41, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(42, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(43, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(44, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(45, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(46, \task_reg, \zero_reg1, \zero_reg2)
+	SAVE_TCA_REG(47, \task_reg, \zero_reg1, \zero_reg2)
+.endm
+
+#define RESTORE_TCA_REG(__reg_num, __task_reg) \
+	xlo.u $a##__reg_num = (CTX_SWITCH_TCA_REGS + (__reg_num * TCA_REG_SIZE)) \
+								[__task_reg] ;\
+	;;
+
+.macro restore_tca_regs task_reg
+	RESTORE_TCA_REG(0, \task_reg)
+	RESTORE_TCA_REG(1, \task_reg)
+	RESTORE_TCA_REG(2, \task_reg)
+	RESTORE_TCA_REG(3, \task_reg)
+	RESTORE_TCA_REG(4, \task_reg)
+	RESTORE_TCA_REG(5, \task_reg)
+	RESTORE_TCA_REG(6, \task_reg)
+	RESTORE_TCA_REG(7, \task_reg)
+	RESTORE_TCA_REG(8, \task_reg)
+	RESTORE_TCA_REG(9, \task_reg)
+	RESTORE_TCA_REG(10, \task_reg)
+	RESTORE_TCA_REG(11, \task_reg)
+	RESTORE_TCA_REG(12, \task_reg)
+	RESTORE_TCA_REG(13, \task_reg)
+	RESTORE_TCA_REG(14, \task_reg)
+	RESTORE_TCA_REG(15, \task_reg)
+	RESTORE_TCA_REG(16, \task_reg)
+	RESTORE_TCA_REG(17, \task_reg)
+	RESTORE_TCA_REG(18, \task_reg)
+	RESTORE_TCA_REG(19, \task_reg)
+	RESTORE_TCA_REG(20, \task_reg)
+	RESTORE_TCA_REG(21, \task_reg)
+	RESTORE_TCA_REG(22, \task_reg)
+	RESTORE_TCA_REG(23, \task_reg)
+	RESTORE_TCA_REG(24, \task_reg)
+	RESTORE_TCA_REG(25, \task_reg)
+	RESTORE_TCA_REG(26, \task_reg)
+	RESTORE_TCA_REG(27, \task_reg)
+	RESTORE_TCA_REG(28, \task_reg)
+	RESTORE_TCA_REG(29, \task_reg)
+	RESTORE_TCA_REG(30, \task_reg)
+	RESTORE_TCA_REG(31, \task_reg)
+	RESTORE_TCA_REG(32, \task_reg)
+	RESTORE_TCA_REG(33, \task_reg)
+	RESTORE_TCA_REG(34, \task_reg)
+	RESTORE_TCA_REG(35, \task_reg)
+	RESTORE_TCA_REG(36, \task_reg)
+	RESTORE_TCA_REG(37, \task_reg)
+	RESTORE_TCA_REG(38, \task_reg)
+	RESTORE_TCA_REG(39, \task_reg)
+	RESTORE_TCA_REG(40, \task_reg)
+	RESTORE_TCA_REG(41, \task_reg)
+	RESTORE_TCA_REG(42, \task_reg)
+	RESTORE_TCA_REG(43, \task_reg)
+	RESTORE_TCA_REG(44, \task_reg)
+	RESTORE_TCA_REG(45, \task_reg)
+	RESTORE_TCA_REG(46, \task_reg)
+	RESTORE_TCA_REG(47, \task_reg)
+.endm
+
+.text
+/*
+ * When entering in ret_from_kernel_thread, r20 and r21 where set by
+ * copy_thread and have been restored in switch_to.
+ * These registers contains the values needed to call a function
+ * specified by the switch_to caller (or where set by copy_thread).
+ */
+ENTRY(ret_from_kernel_thread)
+	call schedule_tail
+	;;
+	/* Call fn(arg) */
+	copyd $r0 = $r21
+	;;
+	icall $r20
+	;;
+	goto ret_from_kernel
+	;;
+ENDPROC(ret_from_kernel_thread)
+
+/**
+ * Return from fork.
+ * start_thread will set return stack and pc. Then copy_thread will
+ * take care of the copying logic.
+ * $r20 will then contains 0 if tracing disabled (set by copy_thread)
+ * The mechanism is almost the same as for ret_from_kernel_thread.
+ */
+ENTRY(ret_from_fork)
+	call schedule_tail
+	;;
+	/* $r20 contains 0 if tracing disable */
+	cb.deqz $r20? ret_from_kernel
+	;;
+	copyd $r0 = $sp
+	call do_syscall_trace_exit
+	;;
+ret_from_kernel:
+	/*
+	 * When returning from a fork, the child will take this path.
+	 * Since we did not restore callee in return_from_exception, we
+	 * must do it before.
+	 */
+	lo $r28r29r30r31 = PT_Q28[$sp]
+	;;
+	lo $r24r25r26r27 = PT_Q24[$sp]
+	;;
+	lo $r20r21r22r23 = PT_Q20[$sp]
+	;;
+	lq $r18r19 = PT_R18R19[$sp]
+	;;
+#ifdef CONFIG_DEBUG_EXCEPTION_STACK
+	/**
+	* Debug code expect entry stack to be stored at current $sp.
+	* Make some room and store current $sp to avoid triggering false alarm.
+	*/
+	addd $sp = $sp, -STACK_REG_SIZE
+	;;
+	sd 0[$sp] = $sp
+#endif
+	;;
+	goto return_from_exception
+	;;
+ENDPROC(ret_from_fork)
+
+/*
+ * The callee-saved registers must be saved and restored.
+ * When entering:
+ * - r0 = previous task struct
+ * - r1 = next task struct
+ * Moreover, the parameters for function call (given by copy_thread)
+ * are stored in:
+ * - r20 = Func to call
+ * - r21 = Argument for function
+ */
+ENTRY(__switch_to)
+	sd CTX_SWITCH_FP[$r0] = $fp
+	;;
+	/* Save previous task context */
+	so CTX_SWITCH_Q20[$r0] = $r20r21r22r23
+	;;
+	so CTX_SWITCH_Q24[$r0] = $r24r25r26r27
+	get $r16 = $ra
+	;;
+	so CTX_SWITCH_Q28[$r0] = $r28r29r30r31
+	copyd $r17 = $sp
+	get $r2 = $cs
+	;;
+	so CTX_SWITCH_RA_SP_R18_R19[$r0] = $r16r17r18r19
+	/* Extract XMF bit which means coprocessor was used by user */
+	andd $r3 = $r2, KVX_SFR_CS_XMF_MASK
+	;;
+#ifdef CONFIG_ENABLE_TCA
+	make $r4 = 0
+	make $r5 = 0
+	make $r6 = 1
+	cb.dnez $r3? save_tca_registers
+	/* Check if next task needs TCA registers to be restored */
+	ld $r7 = CTX_SWITCH_TCA_REGS_SAVED[$r1]
+	;;
+check_restore_tca:
+	cb.dnez $r7? restore_tca_registers
+	;;
+restore_fast_path:
+#endif
+	/* Restore next task context */
+	lo $r16r17r18r19 = CTX_SWITCH_RA_SP_R18_R19[$r1]
+	;;
+	lo $r20r21r22r23 = CTX_SWITCH_Q20[$r1]
+	;;
+	lo $r24r25r26r27 = CTX_SWITCH_Q24[$r1]
+	copyd $sp = $r17
+	set $ra = $r16
+	;;
+	lo $r28r29r30r31 = CTX_SWITCH_Q28[$r1]
+	set $sr = $r1
+	;;
+	ld $fp = CTX_SWITCH_FP[$r1]
+	;;
+	ret
+	;;
+#ifdef CONFIG_ENABLE_TCA
+save_tca_registers:
+	save_tca_regs $r0 $r4 $r5
+	;;
+	/* Indicates that we saved the TCA context */
+	sb CTX_SWITCH_TCA_REGS_SAVED[$r0] = $r6
+	goto check_restore_tca
+	;;
+restore_tca_registers:
+	restore_tca_regs $r1
+	;;
+	/* Clear TCA registers saved hint */
+	sb CTX_SWITCH_TCA_REGS_SAVED[$r1] = $r5
+	goto restore_fast_path
+	;;
+#endif
+ENDPROC(__switch_to)
+
+/***********************************************************************
+*          		 Misc functions
+***********************************************************************/
+
+/**
+ * Avoid hardcoding trampoline for rt_sigreturn by using this code and
+ * copying it on user trampoline
+ */
+.pushsection .text
+.global user_scall_rt_sigreturn_end
+ENTRY(user_scall_rt_sigreturn)
+	make $r6 = __NR_rt_sigreturn
+	;;
+	scall $r6
+	;;
+user_scall_rt_sigreturn_end:
+ENDPROC(user_scall_rt_sigreturn)
+.popsection
diff --git a/arch/kvx/kernel/sys_kvx.c b/arch/kvx/kernel/sys_kvx.c
new file mode 100644
index 000000000000..b32a821d76c9
--- /dev/null
+++ b/arch/kvx/kernel/sys_kvx.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ *            Guillaume Thouvenin
+ */
+
+#include <linux/syscalls.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags,
+	unsigned long, fd, off_t, off)
+{
+	/* offset must be a multiple of the page size */
+	if (unlikely(offset_in_page(off) != 0))
+		return -EINVAL;
+
+	/* Unlike mmap2 where the offset is in PAGE_SIZE-byte units, here it
+	 * is in bytes. So we need to use PAGE_SHIFT.
+	 */
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+SYSCALL_DEFINE4(cachectl, unsigned long, addr, unsigned long, len,
+		unsigned long, cache, unsigned long, flags)
+{
+	bool wb = !!(flags & CACHECTL_FLAG_OP_WB);
+	bool inval = !!(flags & CACHECTL_FLAG_OP_INVAL);
+
+	if (len == 0)
+		return 0;
+
+	/* Check for overflow */
+	if (addr + len < addr)
+		return -EFAULT;
+
+	if (cache != CACHECTL_CACHE_DCACHE)
+		return -EINVAL;
+
+	if ((flags & CACHECTL_FLAG_OP_MASK) == 0)
+		return -EINVAL;
+
+	if (flags & CACHECTL_FLAG_ADDR_PHYS) {
+		if (!IS_ENABLED(CONFIG_CACHECTL_UNSAFE_PHYS_OPERATIONS))
+			return -EINVAL;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		dcache_wb_inval_phys_range(addr, len, wb, inval);
+		return 0;
+	}
+
+	return dcache_wb_inval_virt_range(addr, len, wb, inval);
+}
diff --git a/arch/kvx/kernel/syscall_table.c b/arch/kvx/kernel/syscall_table.c
new file mode 100644
index 000000000000..7859d25e728d
--- /dev/null
+++ b/arch/kvx/kernel/syscall_table.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * derived from arch/riscv/kernel/syscall_table.c
+ *
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#include <linux/syscalls.h>
+
+#include <asm/syscalls.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call)	[nr] = (call),
+
+void *sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
-- 
2.37.2








More information about the linux-riscv mailing list