[RFC PATCH v3 24/37] kvx: Add memory management
ysionneau at kalrayinc.com
ysionneau at kalrayinc.com
Mon Jul 22 02:41:35 PDT 2024
From: Yann Sionneau <ysionneau at kalrayinc.com>
Add memory management support for kvx, including: cache and tlb
management, page fault handling, ioremap/mmap and streaming dma support.
Co-developed-by: Clement Leger <clement at clement-leger.fr>
Signed-off-by: Clement Leger <clement at clement-leger.fr>
Co-developed-by: Guillaume Thouvenin <thouveng at gmail.com>
Signed-off-by: Guillaume Thouvenin <thouveng at gmail.com>
Co-developed-by: Jean-Christophe Pince <jcpince at gmail.com>
Signed-off-by: Jean-Christophe Pince <jcpince at gmail.com>
Co-developed-by: Jules Maselbas <jmaselbas at zdiv.net>
Signed-off-by: Jules Maselbas <jmaselbas at zdiv.net>
Co-developed-by: Julian Vetter <jvetter at kalrayinc.com>
Signed-off-by: Julian Vetter <jvetter at kalrayinc.com>
Co-developed-by: Julien Hascoet <jhascoet at kalrayinc.com>
Signed-off-by: Julien Hascoet <jhascoet at kalrayinc.com>
Co-developed-by: Louis Morhet <lmorhet at kalrayinc.com>
Signed-off-by: Louis Morhet <lmorhet at kalrayinc.com>
Co-developed-by: Marc Poulhiès <dkm at kataplop.net>
Signed-off-by: Marc Poulhiès <dkm at kataplop.net>
Co-developed-by: Marius Gligor <mgligor at kalrayinc.com>
Signed-off-by: Marius Gligor <mgligor at kalrayinc.com>
Co-developed-by: Vincent Chardon <vincent.chardon at elsys-design.com>
Signed-off-by: Vincent Chardon <vincent.chardon at elsys-design.com>
Signed-off-by: Yann Sionneau <ysionneau at kalrayinc.com>
---
Notes:
V1 -> V2: removed L2 cache management
V2 -> V3:
- replace KVX_PFN by KVX_PTE
- fix dcache inval when virt range spans several VMAs
- replace BUGS with WARN_ON_ONCE
- setup_bootmem: memblock_reserve() all smem at boot
---
arch/kvx/include/asm/cache.h | 44 +++
arch/kvx/include/asm/cacheflush.h | 158 ++++++++++
arch/kvx/include/asm/fixmap.h | 47 +++
arch/kvx/include/asm/hugetlb.h | 36 +++
arch/kvx/include/asm/mem_map.h | 44 +++
arch/kvx/include/asm/mmu.h | 289 +++++++++++++++++
arch/kvx/include/asm/mmu_context.h | 156 ++++++++++
arch/kvx/include/asm/mmu_stats.h | 38 +++
arch/kvx/include/asm/page.h | 172 ++++++++++
arch/kvx/include/asm/page_size.h | 29 ++
arch/kvx/include/asm/pgalloc.h | 70 +++++
arch/kvx/include/asm/pgtable-bits.h | 105 +++++++
arch/kvx/include/asm/pgtable.h | 468 ++++++++++++++++++++++++++++
arch/kvx/include/asm/sparsemem.h | 15 +
arch/kvx/include/asm/symbols.h | 16 +
arch/kvx/include/asm/tlb.h | 24 ++
arch/kvx/include/asm/tlb_defs.h | 132 ++++++++
arch/kvx/include/asm/tlbflush.h | 60 ++++
arch/kvx/include/asm/vmalloc.h | 10 +
arch/kvx/mm/cacheflush.c | 158 ++++++++++
arch/kvx/mm/dma-mapping.c | 83 +++++
arch/kvx/mm/extable.c | 24 ++
arch/kvx/mm/fault.c | 270 ++++++++++++++++
arch/kvx/mm/init.c | 297 ++++++++++++++++++
arch/kvx/mm/mmap.c | 31 ++
arch/kvx/mm/mmu.c | 202 ++++++++++++
arch/kvx/mm/mmu_stats.c | 94 ++++++
arch/kvx/mm/tlb.c | 445 ++++++++++++++++++++++++++
28 files changed, 3517 insertions(+)
create mode 100644 arch/kvx/include/asm/cache.h
create mode 100644 arch/kvx/include/asm/cacheflush.h
create mode 100644 arch/kvx/include/asm/fixmap.h
create mode 100644 arch/kvx/include/asm/hugetlb.h
create mode 100644 arch/kvx/include/asm/mem_map.h
create mode 100644 arch/kvx/include/asm/mmu.h
create mode 100644 arch/kvx/include/asm/mmu_context.h
create mode 100644 arch/kvx/include/asm/mmu_stats.h
create mode 100644 arch/kvx/include/asm/page.h
create mode 100644 arch/kvx/include/asm/page_size.h
create mode 100644 arch/kvx/include/asm/pgalloc.h
create mode 100644 arch/kvx/include/asm/pgtable-bits.h
create mode 100644 arch/kvx/include/asm/pgtable.h
create mode 100644 arch/kvx/include/asm/sparsemem.h
create mode 100644 arch/kvx/include/asm/symbols.h
create mode 100644 arch/kvx/include/asm/tlb.h
create mode 100644 arch/kvx/include/asm/tlb_defs.h
create mode 100644 arch/kvx/include/asm/tlbflush.h
create mode 100644 arch/kvx/include/asm/vmalloc.h
create mode 100644 arch/kvx/mm/cacheflush.c
create mode 100644 arch/kvx/mm/dma-mapping.c
create mode 100644 arch/kvx/mm/extable.c
create mode 100644 arch/kvx/mm/fault.c
create mode 100644 arch/kvx/mm/init.c
create mode 100644 arch/kvx/mm/mmap.c
create mode 100644 arch/kvx/mm/mmu.c
create mode 100644 arch/kvx/mm/mmu_stats.c
create mode 100644 arch/kvx/mm/tlb.c
diff --git a/arch/kvx/include/asm/cache.h b/arch/kvx/include/asm/cache.h
new file mode 100644
index 0000000000000..f95792883622b
--- /dev/null
+++ b/arch/kvx/include/asm/cache.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Julian Vetter
+ */
+
+#ifndef _ASM_KVX_CACHE_H
+#define _ASM_KVX_CACHE_H
+
+/**
+ * On KVX I$ and D$ have the same size (16KB).
+ * Caches are 16KB big, 4-way set associative, true lru, with 64-byte
+ * lines. The D$ is also write-through.
+ */
+#define KVX_ICACHE_WAY_COUNT 4
+#define KVX_ICACHE_SET_COUNT 64
+#define KVX_ICACHE_LINE_SHIFT 6
+#define KVX_ICACHE_LINE_SIZE (1 << KVX_ICACHE_LINE_SHIFT)
+#define KVX_ICACHE_SIZE \
+ (KVX_ICACHE_WAY_COUNT * KVX_ICACHE_SET_COUNT * KVX_ICACHE_LINE_SIZE)
+
+/**
+ * Invalidate the whole I-cache if the size to flush is more than this value
+ */
+#define KVX_ICACHE_INVAL_SIZE (KVX_ICACHE_SIZE)
+
+/* D-Cache */
+#define KVX_DCACHE_WAY_COUNT 4
+#define KVX_DCACHE_SET_COUNT 64
+#define KVX_DCACHE_LINE_SHIFT 6
+#define KVX_DCACHE_LINE_SIZE (1 << KVX_DCACHE_LINE_SHIFT)
+#define KVX_DCACHE_SIZE \
+ (KVX_DCACHE_WAY_COUNT * KVX_DCACHE_SET_COUNT * KVX_DCACHE_LINE_SIZE)
+
+/**
+ * Same for I-cache
+ */
+#define KVX_DCACHE_INVAL_SIZE (KVX_DCACHE_SIZE)
+
+#define L1_CACHE_SHIFT KVX_DCACHE_LINE_SHIFT
+#define L1_CACHE_BYTES KVX_DCACHE_LINE_SIZE
+
+#endif /* _ASM_KVX_CACHE_H */
diff --git a/arch/kvx/include/asm/cacheflush.h b/arch/kvx/include/asm/cacheflush.h
new file mode 100644
index 0000000000000..256a201e423a7
--- /dev/null
+++ b/arch/kvx/include/asm/cacheflush.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Yann Sionneau
+ * Guillaume Thouvenin
+ * Marius Gligor
+ */
+
+#ifndef _ASM_KVX_CACHEFLUSH_H
+#define _ASM_KVX_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+
+#define flush_cache_mm(mm) do { } while (0)
+#define flush_cache_range(vma, start, end) do { } while (0)
+#define flush_cache_dup_mm(mm) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
+
+#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vunmap(start, end) do { } while (0)
+
+#define flush_dcache_page(page) do { } while (0)
+
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+
+#define l1_inval_dcache_all __builtin_kvx_dinval
+#define kvx_fence __builtin_kvx_fence
+#define l1_inval_icache_all __builtin_kvx_iinval
+
+int dcache_wb_inval_virt_range(unsigned long vaddr, unsigned long len, bool wb,
+ bool inval);
+void dcache_wb_inval_phys_range(phys_addr_t addr, unsigned long len, bool wb,
+ bool inval);
+
+/*
+ * The L1 is indexed by virtual addresses and as such, invalidation takes
+ * virtual addresses as arguments.
+ */
+static inline
+void l1_inval_dcache_range(unsigned long vaddr, unsigned long size)
+{
+ unsigned long end = vaddr + size;
+
+ /* Then inval L1 */
+ if (size >= KVX_DCACHE_INVAL_SIZE) {
+ __builtin_kvx_dinval();
+ return;
+ }
+
+ vaddr = ALIGN_DOWN(vaddr, KVX_DCACHE_LINE_SIZE);
+ for (; vaddr < end; vaddr += KVX_DCACHE_LINE_SIZE)
+ __builtin_kvx_dinvall((void *) vaddr);
+}
+
+static inline
+void inval_dcache_range(phys_addr_t paddr, unsigned long size)
+{
+ l1_inval_dcache_range((unsigned long) phys_to_virt(paddr), size);
+}
+
+static inline
+void wb_dcache_range(phys_addr_t paddr, unsigned long size)
+{
+ /* Fence to ensure all write are committed */
+ kvx_fence();
+}
+
+static inline
+void wbinval_dcache_range(phys_addr_t paddr, unsigned long size)
+{
+ /* Fence to ensure all write are committed */
+ kvx_fence();
+
+ l1_inval_dcache_range((unsigned long) phys_to_virt(paddr), size);
+}
+
+static inline
+void l1_inval_icache_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+ unsigned long size = end - start;
+
+ if (size >= KVX_ICACHE_INVAL_SIZE) {
+ __builtin_kvx_iinval();
+ __builtin_kvx_barrier();
+ return;
+ }
+
+ start = ALIGN_DOWN(start, KVX_ICACHE_LINE_SIZE);
+ for (addr = start; addr < end; addr += KVX_ICACHE_LINE_SIZE)
+ __builtin_kvx_iinvals((void *) addr);
+
+ __builtin_kvx_barrier();
+}
+
+static inline
+void wbinval_icache_range(phys_addr_t paddr, unsigned long size)
+{
+ unsigned long vaddr = (unsigned long) phys_to_virt(paddr);
+
+ /* Fence to ensure all write are committed */
+ kvx_fence();
+
+ l1_inval_icache_range(vaddr, vaddr + size);
+}
+
+static inline
+void sync_dcache_icache(unsigned long start, unsigned long end)
+{
+ /* Fence to ensure all write are committed */
+ kvx_fence();
+ /* Then invalidate the L1 icache */
+ l1_inval_icache_range(start, end);
+}
+
+static inline
+void local_flush_icache_range(unsigned long start, unsigned long end)
+{
+ sync_dcache_icache(start, end);
+}
+
+#ifdef CONFIG_SMP
+void flush_icache_range(unsigned long start, unsigned long end);
+#else
+#define flush_icache_range local_flush_icache_range
+#endif
+
+static inline
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+ unsigned long start = (unsigned long) page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+
+ sync_dcache_icache(start, end);
+}
+
+static inline
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+ unsigned long vaddr, int len)
+{
+ sync_dcache_icache(vaddr, vaddr + len);
+}
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+ do { \
+ memcpy(dst, src, len); \
+ if (vma->vm_flags & VM_EXEC) \
+ flush_icache_user_range(vma, page, vaddr, len); \
+ } while (0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+ memcpy(dst, src, len)
+
+#endif /* _ASM_KVX_CACHEFLUSH_H */
diff --git a/arch/kvx/include/asm/fixmap.h b/arch/kvx/include/asm/fixmap.h
new file mode 100644
index 0000000000000..3863e410d71de
--- /dev/null
+++ b/arch/kvx/include/asm/fixmap.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Marius Gligor
+ */
+
+#ifndef _ASM_KVX_FIXMAP_H
+#define _ASM_KVX_FIXMAP_H
+
+/**
+ * Use the latest available kernel address minus one page.
+ * This is needed since __fix_to_virt returns
+ * (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+ * Due to that, first member will be shifted by 0 and will be equal to
+ * FIXADDR_TOP.
+ * Some other architectures simply add a FIX_HOLE at the beginning of
+ * the fixed_addresses enum (I think ?).
+ */
+#define FIXADDR_TOP (-PAGE_SIZE)
+
+#define ASM_FIX_TO_VIRT(IDX) \
+ (FIXADDR_TOP - ((IDX) << PAGE_SHIFT))
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+enum fixed_addresses {
+ FIX_EARLYCON_MEM_BASE,
+ FIX_GDB_BARE_DISPLACED_MEM_BASE,
+ /* Used to access text early in RW mode (jump label) */
+ FIX_TEXT_PATCH,
+ __end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXMAP_PAGE_IO (PAGE_KERNEL_DEVICE)
+
+void __set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t prot);
+
+#include <asm-generic/fixmap.h>
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/kvx/include/asm/hugetlb.h b/arch/kvx/include/asm/hugetlb.h
new file mode 100644
index 0000000000000..a5984e8ede7eb
--- /dev/null
+++ b/arch/kvx/include/asm/hugetlb.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ */
+
+#ifndef _ASM_KVX_HUGETLB_H
+#define _ASM_KVX_HUGETLB_H
+
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#include <asm-generic/hugetlb.h>
+
+#endif /* _ASM_KVX_HUGETLB_H */
diff --git a/arch/kvx/include/asm/mem_map.h b/arch/kvx/include/asm/mem_map.h
new file mode 100644
index 0000000000000..ea90144209ce1
--- /dev/null
+++ b/arch/kvx/include/asm/mem_map.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ */
+
+#ifndef _ASM_KVX_MEM_MAP_H
+#define _ASM_KVX_MEM_MAP_H
+
+#include <linux/const.h>
+#include <linux/sizes.h>
+
+#include <asm/page.h>
+#include <asm/fixmap.h>
+
+/**
+ * kvx memory mapping defines
+ * For more information on memory mapping, please see
+ * Documentation/kvx/kvx-mmu.txt
+ *
+ * All _BASE defines are relative to PAGE_OFFSET
+ */
+
+/* Guard between various memory map zones */
+#define MAP_GUARD_SIZE SZ_1G
+
+/**
+ * Kernel direct memory mapping
+ */
+#define KERNEL_DIRECT_MEMORY_MAP_BASE PAGE_OFFSET
+#define KERNEL_DIRECT_MEMORY_MAP_SIZE UL(0x1000000000)
+#define KERNEL_DIRECT_MEMORY_MAP_END \
+ (KERNEL_DIRECT_MEMORY_MAP_BASE + KERNEL_DIRECT_MEMORY_MAP_SIZE)
+
+/**
+ * Vmalloc mapping (goes from kernel direct memory map up to fixmap start -
+ * guard size)
+ */
+#define KERNEL_VMALLOC_MAP_BASE (KERNEL_DIRECT_MEMORY_MAP_END + MAP_GUARD_SIZE)
+#define KERNEL_VMALLOC_MAP_SIZE \
+ (FIXADDR_START - KERNEL_VMALLOC_MAP_BASE - MAP_GUARD_SIZE)
+
+#endif
diff --git a/arch/kvx/include/asm/mmu.h b/arch/kvx/include/asm/mmu.h
new file mode 100644
index 0000000000000..a9fecb560fe5b
--- /dev/null
+++ b/arch/kvx/include/asm/mmu.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ * Marc Poulhiès
+ */
+
+#ifndef _ASM_KVX_MMU_H
+#define _ASM_KVX_MMU_H
+
+#include <linux/bug.h>
+#include <linux/types.h>
+#include <linux/threads.h>
+
+#include <asm/page.h>
+#include <asm/sfr.h>
+#include <asm/page.h>
+#include <asm/pgtable-bits.h>
+#include <asm/tlb_defs.h>
+
+/* Virtual addresses can use at most 41 bits */
+#define MMU_VIRT_BITS 41
+
+/*
+ * See Documentation/kvx/kvx-mmu.rst for details about the division of the
+ * virtual memory space.
+ */
+#if defined(CONFIG_KVX_4K_PAGES)
+#define MMU_USR_ADDR_BITS 39
+#else
+#error "Only 4Ko page size is supported at this time"
+#endif
+
+typedef struct mm_context {
+ unsigned long end_brk;
+ unsigned long asn[NR_CPUS];
+ unsigned long sigpage;
+} mm_context_t;
+
+struct __packed tlb_entry_low {
+ unsigned int es:2; /* Entry Status */
+ unsigned int cp:2; /* Cache Policy */
+ unsigned int pa:4; /* Protection Attributes */
+ unsigned int r:2; /* Reserved */
+ unsigned int ps:2; /* Page Size */
+ unsigned int fn:28; /* Frame Number */
+};
+
+struct __packed tlb_entry_high {
+ unsigned int asn:9; /* Address Space Number */
+ unsigned int g:1; /* Global Indicator */
+ unsigned int vs:2; /* Virtual Space */
+ unsigned int pn:29; /* Page Number */
+};
+
+struct kvx_tlb_format {
+ union {
+ struct tlb_entry_low tel;
+ uint64_t tel_val;
+ };
+ union {
+ struct tlb_entry_high teh;
+ uint64_t teh_val;
+ };
+};
+
+#define KVX_EMPTY_TLB_ENTRY { .tel_val = 0x0, .teh_val = 0x0 }
+
+/* Bit [0:39] of the TLB format corresponds to TLB Entry low */
+/* Bit [40:80] of the TLB format corresponds to the TLB Entry high */
+#define kvx_mmu_set_tlb_entry(tlbf) do { \
+ kvx_sfr_set(TEL, (uint64_t) tlbf.tel_val); \
+ kvx_sfr_set(TEH, (uint64_t) tlbf.teh_val); \
+} while (0)
+
+#define kvx_mmu_get_tlb_entry(tlbf) do { \
+ tlbf.tel_val = kvx_sfr_get(TEL); \
+ tlbf.teh_val = kvx_sfr_get(TEH); \
+} while (0)
+
+/* Use kvx_mmc_ to read a field from MMC value passed as parameter */
+#define __kvx_mmc(mmc_reg, field) \
+ kvx_sfr_field_val(mmc_reg, MMC, field)
+
+#define kvx_mmc_error(mmc) __kvx_mmc(mmc, E)
+#define kvx_mmc_parity(mmc) __kvx_mmc(mmc, PAR)
+#define kvx_mmc_sb(mmc) __kvx_mmc(mmc, SB)
+#define kvx_mmc_ss(mmc) __kvx_mmc(mmc, SS)
+#define kvx_mmc_sw(mmc) __kvx_mmc(mmc, SW)
+#define kvx_mmc_asn(mmc) __kvx_mmc(mmc, ASN)
+
+#define KVX_TLB_ACCESS_READ 0
+#define KVX_TLB_ACCESS_WRITE 1
+#define KVX_TLB_ACCESS_PROBE 2
+
+#ifdef CONFIG_KVX_DEBUG_TLB_ACCESS
+
+#define KVX_TLB_ACCESS_SIZE (1 << CONFIG_KVX_DEBUG_TLB_ACCESS_BITS)
+#define KVX_TLB_ACCESS_MASK GENMASK((CONFIG_KVX_DEBUG_TLB_ACCESS_BITS - 1), 0)
+#define KVX_TLB_ACCESS_GET_IDX(idx) (idx & KVX_TLB_ACCESS_MASK)
+
+/* This structure is used to make decoding of MMC easier in gdb */
+struct mmc_t {
+ unsigned int asn:9;
+ unsigned int s: 1;
+ unsigned int r1: 4;
+ unsigned int sne: 1;
+ unsigned int spe: 1;
+ unsigned int ptc: 2;
+ unsigned int sw: 4;
+ unsigned int ss: 6;
+ unsigned int sb: 1;
+ unsigned int r2: 1;
+ unsigned int par: 1;
+ unsigned int e: 1;
+};
+
+struct __packed kvx_tlb_access_t {
+ struct kvx_tlb_format entry; /* 128 bits */
+ union {
+ struct mmc_t mmc;
+ uint32_t mmc_val;
+ };
+ uint32_t type;
+};
+
+extern void kvx_update_tlb_access(int type);
+
+#else
+#define kvx_update_tlb_access(type) do {} while (0)
+#endif
+
+static inline void kvx_mmu_readtlb(void)
+{
+ kvx_update_tlb_access(KVX_TLB_ACCESS_READ);
+ asm volatile ("tlbread\n;;");
+}
+
+static inline void kvx_mmu_writetlb(void)
+{
+ kvx_update_tlb_access(KVX_TLB_ACCESS_WRITE);
+ asm volatile ("tlbwrite\n;;");
+}
+
+static inline void kvx_mmu_probetlb(void)
+{
+ kvx_update_tlb_access(KVX_TLB_ACCESS_PROBE);
+ asm volatile ("tlbprobe\n;;");
+}
+
+#define kvx_mmu_add_entry(buffer, way, entry) do { \
+ kvx_sfr_set_field(MMC, SB, buffer); \
+ kvx_sfr_set_field(MMC, SW, way); \
+ kvx_mmu_set_tlb_entry(entry); \
+ kvx_mmu_writetlb(); \
+} while (0)
+
+#define kvx_mmu_remove_ltlb_entry(way) do { \
+ struct kvx_tlb_format __invalid_entry = KVX_EMPTY_TLB_ENTRY; \
+ kvx_mmu_add_entry(MMC_SB_LTLB, way, __invalid_entry); \
+} while (0)
+
+static inline int get_page_size_shift(int ps)
+{
+ /*
+ * Use the same assembly trick using sbmm to directly get the page size
+ * shift using a constant which encodes all page size shifts
+ */
+ return __builtin_kvx_sbmm8(KVX_PS_SHIFT_MATRIX,
+ KVX_SBMM_BYTE_SEL << ps);
+}
+
+/*
+ * 4 bits are used to index the kvx access permissions. Bits are used as
+ * follow:
+ *
+ * +---------------+------------+-------------+------------+
+ * | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
+ * |---------------+------------+-------------+------------|
+ * | _PAGE_GLOBAL | _PAGE_EXEC | _PAGE_WRITE | _PAGE_READ |
+ * +---------------+------------+-------------+------------+
+ *
+ * If _PAGE_GLOBAL is set then the page belongs to the kernel. Otherwise it
+ * belongs to the user. When the page belongs to user-space then give the
+ * same rights to the kernel-space.
+ * In order to quickly compute policy from this value, we use sbmm instruction.
+ * The main interest is to avoid an additionnal load and specifically in the
+ * assembly refill handler.
+ */
+static inline u8 get_page_access_perms(u8 policy)
+{
+ /* If PAGE_READ is unset, there is no permission for this page */
+ if (!(policy & (_PAGE_READ >> _PAGE_PERMS_SHIFT)))
+ return TLB_PA_NA_NA;
+
+ /* Discard the _PAGE_READ bit to get a linear number in [0,7] */
+ policy >>= 1;
+
+ /* Use sbmm to directly get the page perms */
+ return __builtin_kvx_sbmm8(KVX_PAGE_PA_MATRIX,
+ KVX_SBMM_BYTE_SEL << policy);
+}
+
+static inline struct kvx_tlb_format tlb_mk_entry(
+ void *paddr,
+ void *vaddr,
+ unsigned int ps,
+ unsigned int global,
+ unsigned int pa,
+ unsigned int cp,
+ unsigned int asn,
+ unsigned int es)
+{
+ struct kvx_tlb_format entry;
+ u64 mask = ULONG_MAX << get_page_size_shift(ps);
+
+ BUG_ON(ps >= (1 << KVX_SFR_TEL_PS_WIDTH));
+
+ /*
+ * 0 matches the virtual space:
+ * - either we are virtualized and the hypervisor will set it
+ * for us when using writetlb
+ * - Or we are native and the virtual space is 0
+ */
+ entry.teh_val = TLB_MK_TEH_ENTRY((uintptr_t)vaddr & mask, 0, global,
+ asn);
+ entry.tel_val = TLB_MK_TEL_ENTRY((uintptr_t)paddr, ps, es, cp, pa);
+
+ return entry;
+}
+
+static inline unsigned long tlb_entry_phys(struct kvx_tlb_format tlbe)
+{
+ return ((unsigned long) tlbe.tel.fn << KVX_SFR_TEL_FN_SHIFT);
+}
+
+static inline unsigned long tlb_entry_virt(struct kvx_tlb_format tlbe)
+{
+ return ((unsigned long) tlbe.teh.pn << KVX_SFR_TEH_PN_SHIFT);
+}
+
+static inline unsigned long tlb_entry_size(struct kvx_tlb_format tlbe)
+{
+ return BIT(get_page_size_shift(tlbe.tel.ps));
+}
+
+static inline int tlb_entry_overlaps(struct kvx_tlb_format tlbe1,
+ struct kvx_tlb_format tlbe2)
+{
+ unsigned long start1, end1;
+ unsigned long start2, end2;
+
+ start1 = tlb_entry_virt(tlbe1);
+ end1 = start1 + tlb_entry_size(tlbe1);
+
+ start2 = tlb_entry_virt(tlbe2);
+ end2 = start2 + tlb_entry_size(tlbe2);
+
+ return start1 <= end2 && end1 >= start2;
+}
+
+static inline int tlb_entry_match_addr(struct kvx_tlb_format tlbe,
+ unsigned long vaddr)
+{
+ /*
+ * TLB entries store up to 41 bits so the provided address must be
+ * truncated to match teh.pn.
+ */
+ vaddr &= GENMASK(MMU_VIRT_BITS - 1, KVX_SFR_TEH_PN_SHIFT);
+
+ return tlb_entry_virt(tlbe) == vaddr;
+}
+
+extern void kvx_mmu_early_setup(void);
+
+static inline void paging_init(void) {}
+
+void kvx_mmu_ltlb_remove_entry(unsigned long vaddr);
+void kvx_mmu_ltlb_add_entry(unsigned long vaddr, phys_addr_t paddr,
+ pgprot_t flags, unsigned long page_shift);
+
+void kvx_mmu_jtlb_add_entry(unsigned long address, pte_t *ptep,
+ unsigned int asn);
+extern void mmu_early_init(void);
+
+struct mm_struct;
+
+#endif /* _ASM_KVX_MMU_H */
diff --git a/arch/kvx/include/asm/mmu_context.h b/arch/kvx/include/asm/mmu_context.h
new file mode 100644
index 0000000000000..39fa92f1506b2
--- /dev/null
+++ b/arch/kvx/include/asm/mmu_context.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ */
+
+#ifndef __ASM_KVX_MMU_CONTEXT_H
+#define __ASM_KVX_MMU_CONTEXT_H
+
+/*
+ * Management of the Address Space Number:
+ * Coolidge architecture provides a 9-bit ASN to tag TLB entries. This can be
+ * used to allow several entries with the same virtual address (so from
+ * different process) to be in the TLB at the same time. That means that won't
+ * necessarily flush the TLB when a context switch occurs and so it will
+ * improve performances.
+ */
+#include <linux/smp.h>
+
+#include <asm/mmu.h>
+#include <asm/sfr_defs.h>
+#include <asm/tlbflush.h>
+
+#include <asm-generic/mm_hooks.h>
+
+#define MM_CTXT_ASN_MASK GENMASK(KVX_SFR_MMC_ASN_WIDTH - 1, 0)
+#define MM_CTXT_CYCLE_MASK (~MM_CTXT_ASN_MASK)
+#define MM_CTXT_NO_ASN UL(0x0)
+#define MM_CTXT_FIRST_CYCLE (MM_CTXT_ASN_MASK + 1)
+
+#define mm_asn(mm, cpu) ((mm)->context.asn[cpu])
+
+DECLARE_PER_CPU(unsigned long, kvx_asn_cache);
+#define cpu_asn_cache(cpu) per_cpu(kvx_asn_cache, cpu)
+
+static inline void get_new_mmu_context(struct mm_struct *mm, unsigned int cpu)
+{
+ unsigned long asn = cpu_asn_cache(cpu);
+
+ asn++;
+ /* Check if we need to start a new cycle */
+ if ((asn & MM_CTXT_ASN_MASK) == 0) {
+ pr_debug("%s: start new cycle, flush all tlb\n", __func__);
+ local_flush_tlb_all();
+
+ /*
+ * Above check for rollover of 9 bit ASN in 64 bit container.
+ * If the container itself wrapped around, set it to a non zero
+ * "generation" to distinguish from no context
+ */
+ if (asn == 0)
+ asn = MM_CTXT_FIRST_CYCLE;
+ }
+
+ cpu_asn_cache(cpu) = asn;
+ mm_asn(mm, cpu) = asn;
+
+ pr_debug("%s: mm = 0x%llx: cpu[%d], cycle: %lu, asn: %lu\n",
+ __func__, (unsigned long long)mm, cpu,
+ (asn & MM_CTXT_CYCLE_MASK) >> KVX_SFR_MMC_ASN_WIDTH,
+ asn & MM_CTXT_ASN_MASK);
+}
+
+static inline void get_mmu_context(struct mm_struct *mm, unsigned int cpu)
+{
+
+ unsigned long asn = mm_asn(mm, cpu);
+
+ /*
+ * Move to new ASN if it was not from current alloc-cycle/generation.
+ * This is done by ensuring that the generation bits in both
+ * mm->context.asn and cpu_asn_cache counter are exactly same.
+ *
+ * NOTE: this also works for checking if mm has a context since the
+ * first alloc-cycle/generation is always '1'. MM_CTXT_NO_ASN value
+ * contains cycle '0', and thus it will match.
+ */
+ if ((asn ^ cpu_asn_cache(cpu)) & MM_CTXT_CYCLE_MASK)
+ get_new_mmu_context(mm, cpu);
+}
+
+static inline void activate_context(struct mm_struct *mm, unsigned int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ get_mmu_context(mm, cpu);
+
+ kvx_sfr_set_field(MMC, ASN, mm_asn(mm, cpu) & MM_CTXT_ASN_MASK);
+
+ local_irq_restore(flags);
+}
+
+/**
+ * Redefining the generic hooks that are:
+ * - activate_mm
+ * - deactivate_mm
+ * - enter_lazy_tlb
+ * - init_new_context
+ * - destroy_context
+ * - switch_mm
+ */
+
+#define activate_mm(prev, next) switch_mm((prev), (next), NULL)
+#define deactivate_mm(tsk, mm) do { } while (0)
+#define enter_lazy_tlb(mm, tsk) do { } while (0)
+
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ mm_asn(mm, cpu) = MM_CTXT_NO_ASN;
+
+ return 0;
+}
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mm_asn(mm, cpu) = MM_CTXT_NO_ASN;
+ local_irq_restore(flags);
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned int cpu = smp_processor_id();
+
+ /**
+ * Comment taken from arc, but logic is the same for us:
+ *
+ * Note that the mm_cpumask is "aggregating" only, we don't clear it
+ * for the switched-out task, unlike some other arches.
+ * It is used to enlist cpus for sending TLB flush IPIs and not sending
+ * it to CPUs where a task once ran-on, could cause stale TLB entry
+ * re-use, specially for a multi-threaded task.
+ * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+ * For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+ * were to re-migrate to C1, it could access the unmapped region
+ * via any existing stale TLB entries.
+ */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ if (prev != next)
+ activate_context(next, cpu);
+}
+
+
+#endif /* __ASM_KVX_MMU_CONTEXT_H */
diff --git a/arch/kvx/include/asm/mmu_stats.h b/arch/kvx/include/asm/mmu_stats.h
new file mode 100644
index 0000000000000..999352dbc1ce1
--- /dev/null
+++ b/arch/kvx/include/asm/mmu_stats.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_MMU_STATS_H
+#define _ASM_KVX_MMU_STATS_H
+
+#ifdef CONFIG_KVX_MMU_STATS
+#include <linux/percpu.h>
+
+struct mmu_refill_stats {
+ unsigned long count;
+ unsigned long total;
+ unsigned long min;
+ unsigned long max;
+};
+
+enum mmu_refill_type {
+ MMU_REFILL_TYPE_USER,
+ MMU_REFILL_TYPE_KERNEL,
+ MMU_REFILL_TYPE_KERNEL_DIRECT,
+ MMU_REFILL_TYPE_COUNT,
+};
+
+struct mmu_stats {
+ struct mmu_refill_stats refill[MMU_REFILL_TYPE_COUNT];
+ /* keep these fields ordered this way for assembly */
+ unsigned long cycles_between_refill;
+ unsigned long last_refill;
+ unsigned long tlb_flush_all;
+};
+
+DECLARE_PER_CPU(struct mmu_stats, mmu_stats);
+#endif
+
+#endif /* _ASM_KVX_MMU_STATS_H */
diff --git a/arch/kvx/include/asm/page.h b/arch/kvx/include/asm/page.h
new file mode 100644
index 0000000000000..63a3bdd3bfdf6
--- /dev/null
+++ b/arch/kvx/include/asm/page.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ * Marius Gligor
+ */
+
+#ifndef _ASM_KVX_PAGE_H
+#define _ASM_KVX_PAGE_H
+
+#include <linux/const.h>
+
+#define PAGE_SHIFT CONFIG_KVX_PAGE_SHIFT
+#define PAGE_SIZE _BITUL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define DDR_PHYS_OFFSET (0x100000000)
+#define PHYS_OFFSET CONFIG_KVX_PHYS_OFFSET
+#define PAGE_OFFSET CONFIG_KVX_PAGE_OFFSET
+#define DDR_VIRT_OFFSET (DDR_PHYS_OFFSET + PAGE_OFFSET)
+
+#define VA_TO_PA_OFFSET (PHYS_OFFSET - PAGE_OFFSET)
+#define PA_TO_VA_OFFSET (PAGE_OFFSET - PHYS_OFFSET)
+
+/*
+ * These macros are specifically written for assembly. They are useful for
+ * converting symbols above PAGE_OFFSET to their physical addresses.
+ */
+#define __PA(x) ((x) + VA_TO_PA_OFFSET)
+#define __VA(x) ((x) + PA_TO_VA_OFFSET)
+
+#if defined(CONFIG_KVX_4K_PAGES)
+/* Maximum usable bit using 4K pages and current page table layout */
+#define VA_MAX_BITS 40
+#define PGDIR_SHIFT 30
+#define PMD_SHIFT 21
+#else
+#error "64K page not supported"
+#endif
+
+/*
+ * Define _SHIFT, _SIZE and _MASK corresponding of the different page
+ * sizes supported by kvx.
+ */
+#define KVX_PAGE_4K_SHIFT 12
+#define KVX_PAGE_4K_SIZE BIT(KVX_PAGE_4K_SHIFT)
+#define KVX_PAGE_4K_MASK (~(KVX_PAGE_4K_SIZE - 1))
+
+#define KVX_PAGE_64K_SHIFT 16
+#define KVX_PAGE_64K_SIZE BIT(KVX_PAGE_64K_SHIFT)
+#define KVX_PAGE_64K_MASK (~(KVX_PAGE_64K_SIZE - 1))
+
+#define KVX_PAGE_2M_SHIFT 21
+#define KVX_PAGE_2M_SIZE BIT(KVX_PAGE_2M_SHIFT)
+#define KVX_PAGE_2M_MASK (~(KVX_PAGE_2M_SIZE - 1))
+
+#define KVX_PAGE_512M_SHIFT 29
+#define KVX_PAGE_512M_SIZE BIT(KVX_PAGE_512M_SHIFT)
+#define KVX_PAGE_512M_MASK (~(KVX_PAGE_512M_SIZE - 1))
+
+/* Encode all page shift into one 32bit constant for sbmm */
+#define KVX_PS_SHIFT_MATRIX ((KVX_PAGE_512M_SHIFT << 24) | \
+ (KVX_PAGE_2M_SHIFT << 16) | \
+ (KVX_PAGE_64K_SHIFT << 8) | \
+ (KVX_PAGE_4K_SHIFT))
+
+/* Encode all page access policy into one 64bit constant for sbmm */
+#define KVX_PAGE_PA_MATRIX ((UL(TLB_PA_NA_RWX) << 56) | \
+ (UL(TLB_PA_NA_RX) << 48) | \
+ (UL(TLB_PA_NA_RW) << 40) | \
+ (UL(TLB_PA_NA_R) << 32) | \
+ (UL(TLB_PA_RWX_RWX) << 24) | \
+ (UL(TLB_PA_RX_RX) << 16) | \
+ (UL(TLB_PA_RW_RW) << 8) | \
+ (UL(TLB_PA_R_R)))
+
+/*
+ * Select a byte using sbmm8. When shifted by one bit left, we get the next
+ * byte.
+ * For instance using this default constant with sbmm yields the value between
+ * first byte of the double word.
+ * If constant is shifted by 1, the value is now 0x0000000000000002ULL and this
+ * yield the second byte and so on, and so on !
+ */
+#define KVX_SBMM_BYTE_SEL 0x01
+
+#ifndef __ASSEMBLY__
+
+#include <linux/string.h>
+
+/* Page Global Directory entry */
+typedef struct {
+ unsigned long pgd;
+} pgd_t;
+
+/* Page Middle Directory entry */
+typedef struct {
+ unsigned long pmd;
+} pmd_t;
+
+/* Page Table entry */
+typedef struct {
+ unsigned long pte;
+} pte_t;
+
+/* Protection bits */
+typedef struct {
+ unsigned long pgprot;
+} pgprot_t;
+
+typedef struct page *pgtable_t;
+
+/**
+ * Macros to access entry values
+ */
+#define pgd_val(x) ((x).pgd)
+#define pmd_val(x) ((x).pmd)
+#define pte_val(x) ((x).pte)
+#define pgprot_val(x) ((x).pgprot)
+
+/**
+ * Macro to create entry from value
+ */
+#define __pgd(x) ((pgd_t) { (x) })
+#define __pmd(x) ((pmd_t) { (x) })
+#define __pte(x) ((pte_t) { (x) })
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+#define pte_pgprot(x) __pgprot(pte_val(x) & ~PFN_PTE_MASK)
+
+#define __pa(x) ((unsigned long)(x) + VA_TO_PA_OFFSET)
+#define __va(x) ((void *)((unsigned long) (x) + PA_TO_VA_OFFSET))
+
+#define phys_to_pfn(phys) (PFN_DOWN(phys))
+#define pfn_to_phys(pfn) (PFN_PHYS(pfn))
+
+#define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
+#define pfn_to_virt(pfn) (__va(pfn_to_phys(pfn)))
+
+#define virt_to_page(vaddr) (pfn_to_page(virt_to_pfn(vaddr)))
+#define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
+
+#define page_to_phys(page) virt_to_phys(page_to_virt(page))
+#define phys_to_page(phys) (pfn_to_page(phys_to_pfn(phys)))
+
+#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+static inline void clear_user_page(void *page, unsigned long vaddr,
+ struct page *pg)
+{
+ clear_page(page);
+}
+
+static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
+ struct page *topage)
+{
+ copy_page(to, from);
+}
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_KVX_PAGE_H */
diff --git a/arch/kvx/include/asm/page_size.h b/arch/kvx/include/asm/page_size.h
new file mode 100644
index 0000000000000..2c2850205b50e
--- /dev/null
+++ b/arch/kvx/include/asm/page_size.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ */
+
+#ifndef _ASM_KVX_PAGE_SIZE_H
+#define _ASM_KVX_PAGE_SIZE_H
+
+#include <asm/tlb_defs.h>
+
+#if defined(CONFIG_HUGETLB_PAGE)
+#define HUGE_PAGE_SIZE (MMC_PMJ_64K | MMC_PMJ_2M | MMC_PMJ_512M)
+#else
+#define HUGE_PAGE_SIZE (0)
+#endif
+
+#if defined(CONFIG_KVX_4K_PAGES)
+#define TLB_DEFAULT_PS TLB_PS_4K
+#define KVX_SUPPORTED_PSIZE (MMC_PMJ_4K | HUGE_PAGE_SIZE)
+#elif defined(CONFIG_KVX_64K_PAGES)
+#define TLB_DEFAULT_PS TLB_PS_64K
+#define KVX_SUPPORTED_PSIZE (MMC_PMJ_64K | HUGE_PAGE_SIZE)
+#else
+#error "Unsupported page size"
+#endif
+
+#endif
diff --git a/arch/kvx/include/asm/pgalloc.h b/arch/kvx/include/asm/pgalloc.h
new file mode 100644
index 0000000000000..c199343600339
--- /dev/null
+++ b/arch/kvx/include/asm/pgalloc.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ */
+
+#ifndef _ASM_KVX_PGALLOC_H
+#define _ASM_KVX_PGALLOC_H
+
+#include <linux/mm.h>
+#include <asm/tlb.h>
+
+#define __HAVE_ARCH_PGD_FREE
+#include <asm-generic/pgalloc.h>
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ free_pages((unsigned long) pgd, PAGES_PER_PGD);
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+
+ pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PAGES_PER_PGD);
+ if (unlikely(pgd == NULL))
+ return NULL;
+
+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+
+ /* Copy kernel mappings */
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ init_mm.pgd + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+
+ return pgd;
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ unsigned long pfn = virt_to_pfn(pmd);
+
+ set_pud(pud, __pud((unsigned long)pfn << PAGE_SHIFT));
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+ unsigned long pfn = virt_to_pfn(pte);
+
+ set_pmd(pmd, __pmd((unsigned long)pfn << PAGE_SHIFT));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
+{
+ unsigned long pfn = virt_to_pfn(page_address(pte));
+
+ set_pmd(pmd, __pmd((unsigned long)pfn << PAGE_SHIFT));
+}
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+#endif
+
+#define __pte_free_tlb(tlb, pte, buf) do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \
+ } while (0)
+
+#endif /* _ASM_KVX_PGALLOC_H */
diff --git a/arch/kvx/include/asm/pgtable-bits.h b/arch/kvx/include/asm/pgtable-bits.h
new file mode 100644
index 0000000000000..7e4f20cd1b39c
--- /dev/null
+++ b/arch/kvx/include/asm/pgtable-bits.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ * Julian Vetter
+ */
+
+#ifndef _ASM_KVX_PGTABLE_BITS_H
+#define _ASM_KVX_PGTABLE_BITS_H
+
+/*
+ * Protection bit definition
+ * As we don't have any HW to handle page table walk, we can define
+ * our own PTE format. In order to make things easier, we are trying to match
+ * some parts of $tel and $teh.
+ *
+ * PageSZ must be on bit 10 and 11, because it matches the TEL.PS bits. By doing
+ * this it is easier in assembly to set the TEL.PS to PageSZ. In other words,
+ * KVX_PAGE_SZ_SHIFT == KVX_SFR_TEL_PS_SHIFT. It is checked by using a
+ * BUILD_BUG_ON() in arch/kvx/mm/tlb.c.
+ *
+ * The Huge bit must be somewhere in the first 12 bits to be able to detect it
+ * when reading the PMD entry.
+ *
+ * KV3-1:
+ * +---------+--------+----+--------+---+---+---+---+---+---+------+---+---+
+ * | 63..23 | 22..13 | 12 | 11..10 | 9 | 8 | 7 | 6 | 5 | 4 | 3..2 | 1 | 0 |
+ * +---------+--------+----+--------+---+---+---+---+---+---+------+---+---+
+ * PFN Unused S PageSZ H G X W R D CP A P
+ *
+ * Note: PFN is 40-bits wide. We use 41-bits to ensure that the upper bit is
+ * always set to 0. This is required when shifting the PFN to the right.
+ */
+
+/* The following shifts are used in ASM to easily extract bits */
+#define _PAGE_PERMS_SHIFT 5
+#define _PAGE_GLOBAL_SHIFT 8
+#define _PAGE_HUGE_SHIFT 9
+
+#define _PAGE_PRESENT (1 << 0) /* Present */
+#define _PAGE_ACCESSED (1 << 1) /* Set by tlb refill code on any access */
+/* Bits 2 - 3 are reserved for the cache policy */
+#define _PAGE_DIRTY (1 << 4) /* Set by tlb refill code on any write */
+#define _PAGE_READ (1 << _PAGE_PERMS_SHIFT) /* Readable */
+#define _PAGE_WRITE (1 << 6) /* Writable */
+#define _PAGE_EXEC (1 << 7) /* Executable */
+#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT) /* Global */
+#define _PAGE_HUGE (1 << _PAGE_HUGE_SHIFT) /* Huge page */
+/* Bits 10 - 11 are reserved for the page size */
+#define _PAGE_SOFT (1 << 12) /* Reserved for software */
+
+#define _PAGE_SZ_64K (TLB_PS_64K << KVX_PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M (TLB_PS_2M << KVX_PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512M (TLB_PS_512M << KVX_PAGE_SZ_SHIFT)
+
+#define _PAGE_SPECIAL _PAGE_SOFT
+
+/*
+ * If _PAGE_PRESENT is clear because the user mapped it with PROT_NONE
+ * pte_present still gives true. Bit[15] of the PTE is used since its unused
+ * for a PTE entry for kv3-1 (see above)
+ */
+#define _PAGE_NONE (1 << 15)
+
+/* Used for swap PTEs only. */
+#define _PAGE_SWP_EXCLUSIVE (1 << 2)
+
+/* Note: mask used in assembly cannot be generated with GENMASK */
+#define PFN_PTE_SHIFT 23
+#define PFN_PTE_MASK (~(((1 << PFN_PTE_SHIFT) - 1)))
+
+#define KVX_PAGE_SZ_SHIFT 10
+#define KVX_PAGE_SZ_MASK KVX_SFR_TEL_PS_MASK
+
+/* Huge page of 64K are hold in PTE table */
+#define KVX_PAGE_64K_NR_CONT (1UL << (KVX_PAGE_64K_SHIFT - PAGE_SHIFT))
+/* Huge page of 512M are hold in PMD table */
+#define KVX_PAGE_512M_NR_CONT (1UL << (KVX_PAGE_512M_SHIFT - PMD_SHIFT))
+
+#define KVX_PAGE_CP_SHIFT 2
+#define KVX_PAGE_CP_MASK KVX_SFR_TEL_CP_MASK
+
+
+#define _PAGE_CACHED (TLB_CP_W_C << KVX_PAGE_CP_SHIFT)
+#define _PAGE_UNCACHED (TLB_CP_U_U << KVX_PAGE_CP_SHIFT)
+#define _PAGE_DEVICE (TLB_CP_D_U << KVX_PAGE_CP_SHIFT)
+
+#define KVX_ACCESS_PERMS_BITS 4
+#define KVX_ACCESS_PERMS_OFFSET _PAGE_PERMS_SHIFT
+#define KVX_ACCESS_PERMS_SIZE (1 << KVX_ACCESS_PERMS_BITS)
+
+#define KVX_ACCESS_PERM_START_BIT KVX_ACCESS_PERMS_OFFSET
+#define KVX_ACCESS_PERM_STOP_BIT \
+ (KVX_ACCESS_PERMS_OFFSET + KVX_ACCESS_PERMS_BITS - 1)
+#define KVX_ACCESS_PERMS_MASK \
+ GENMASK(KVX_ACCESS_PERM_STOP_BIT, KVX_ACCESS_PERM_START_BIT)
+#define KVX_ACCESS_PERMS_INDEX(x) \
+ ((unsigned int)(x & KVX_ACCESS_PERMS_MASK) >> KVX_ACCESS_PERMS_OFFSET)
+
+/* Bits read, write, exec and global are not preserved across pte_modify() */
+#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_READ | _PAGE_WRITE | \
+ _PAGE_EXEC | _PAGE_GLOBAL))
+
+#endif
diff --git a/arch/kvx/include/asm/pgtable.h b/arch/kvx/include/asm/pgtable.h
new file mode 100644
index 0000000000000..f33e81d7c5b1f
--- /dev/null
+++ b/arch/kvx/include/asm/pgtable.h
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ * Marius Gligor
+ * Yann Sionneau
+ */
+
+#ifndef _ASM_KVX_PGTABLE_H
+#define _ASM_KVX_PGTABLE_H
+
+#include <linux/mmzone.h>
+#include <linux/mm_types.h>
+
+#include <asm/page.h>
+#include <asm/pgtable-bits.h>
+
+#include <asm-generic/pgtable-nopud.h>
+
+#include <asm/mem_map.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/*
+ * Hugetlb definitions. All sizes are supported (64 KB, 2 MB and 512 MB).
+ */
+#if defined(CONFIG_KVX_4K_PAGES)
+#define HUGE_MAX_HSTATE 3
+#elif defined(CONFIG_KVX_64K_PAGES)
+#define HUGE_MAX_HSTATE 2
+#else
+#error "Unsupported page size"
+#endif
+
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE BIT(HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+extern pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
+ vm_flags_t flags);
+#define arch_make_huge_pte arch_make_huge_pte
+
+/* Vmalloc definitions */
+#define VMALLOC_START KERNEL_VMALLOC_MAP_BASE
+#define VMALLOC_END (VMALLOC_START + KERNEL_VMALLOC_MAP_SIZE - 1)
+
+/* Also used by GDB script to go through the page table */
+#define PGDIR_BITS (VA_MAX_BITS - PGDIR_SHIFT)
+#define PMD_BITS (PGDIR_SHIFT - PMD_SHIFT)
+#define PTE_BITS (PMD_SHIFT - PAGE_SHIFT)
+
+/* Size of region mapped by a page global directory */
+#define PGDIR_SIZE BIT(PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+
+/* Size of region mapped by a page middle directory */
+#define PMD_SIZE BIT(PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE - 1))
+
+/* Number of entries in the page global directory */
+#define PAGES_PER_PGD 2
+#define PTRS_PER_PGD (PAGES_PER_PGD * PAGE_SIZE / sizeof(pgd_t))
+
+/* Number of entries in the page middle directory */
+#define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t))
+
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/* Page protection bits */
+#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_CACHED)
+#define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_GLOBAL | \
+ _PAGE_READ | _PAGE_WRITE)
+#define _PAGE_KERNEL_EXEC (_PAGE_BASE | _PAGE_READ | _PAGE_EXEC | \
+ _PAGE_GLOBAL | _PAGE_WRITE)
+#define _PAGE_KERNEL_DEVICE (_PAGE_KERNEL | _PAGE_DEVICE)
+#define _PAGE_KERNEL_NOCACHE (_PAGE_KERNEL | _PAGE_UNCACHED)
+
+#define PAGE_NONE __pgprot(_PAGE_NONE)
+#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READ_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READ_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \
+ _PAGE_EXEC | _PAGE_WRITE)
+
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL | _PAGE_CACHED)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_NOCACHE __pgprot(_PAGE_KERNEL | _PAGE_UNCACHED)
+#define PAGE_KERNEL_DEVICE __pgprot(_PAGE_KERNEL_DEVICE)
+#define PAGE_KERNEL_RO __pgprot((_PAGE_KERNEL | _PAGE_CACHED) & ~(_PAGE_WRITE))
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_EXEC & ~(_PAGE_WRITE))
+
+#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~KVX_PAGE_CP_MASK) | _PAGE_UNCACHED))
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr) (empty_zero_page)
+
+
+/*
+ * Encode and decode a swap entry. Swap PTEs are all PTEs that are !pte_none()
+ * && !pte_present(). Swap entries are encoded in an arch dependent format:
+ *
+ * +--------+----+-------+------+---+---+---+
+ * | 63..16 | 15 | 14..8 | 7..3 | 2 | 1 | 0 |
+ * +--------+----+-------+------+---+---+---+
+ * offset 0 0 type E 0 0
+ *
+ * This allows for up to 31 swap files and 1PB per swap file.
+ */
+#define __SWP_TYPE_SHIFT 3
+#define __SWP_TYPE_BITS 5
+#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
+#define __SWP_OFFSET_SHIFT 16
+
+#define MAX_SWAPFILES_CHECK() \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
+#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t) \
+ { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+/*
+ * PGD definitions:
+ * - pgd_ERROR
+ */
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * PUD
+ *
+ * As we manage a three level page table the call to set_pud is used to fill
+ * PGD.
+ */
+static inline void set_pud(pud_t *pudp, pud_t pmd)
+{
+ *pudp = pmd;
+}
+
+static inline int pud_none(pud_t pud)
+{
+ return pud_val(pud) == 0;
+}
+
+static inline int pud_bad(pud_t pud)
+{
+ return pud_none(pud);
+}
+static inline int pud_present(pud_t pud)
+{
+ return pud_val(pud) != 0;
+}
+
+static inline void pud_clear(pud_t *pud)
+{
+ set_pud(pud, __pud(0));
+}
+
+/*
+ * PMD definitions:
+ * - set_pmd
+ * - pmd_present
+ * - pmd_none
+ * - pmd_bad
+ * - pmd_clear
+ * - pmd_page
+ */
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+ *pmdp = pmd;
+}
+
+/* Returns 1 if entry is present */
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) != 0;
+}
+
+/* Returns 1 if the corresponding entry has the value 0 */
+static inline int pmd_none(pmd_t pmd)
+{
+ return pmd_val(pmd) == 0;
+}
+
+/* Used to check that a page middle directory entry is valid */
+static inline int pmd_bad(pmd_t pmd)
+{
+ return pmd_none(pmd);
+}
+
+/* Clears the entry to prevent process to use the linear address that
+ * mapped it.
+ */
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ set_pmd(pmdp, __pmd(0));
+}
+
+/*
+ * Returns the address of the descriptor of the page table referred by the
+ * PMD entry.
+ */
+static inline struct page *pmd_page(pmd_t pmd)
+{
+ if (pmd_val(pmd) & _PAGE_HUGE)
+ return pfn_to_page(
+ (pmd_val(pmd) & PFN_PTE_MASK) >> PFN_PTE_SHIFT);
+
+ return pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT);
+}
+
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)pfn_to_virt(pud_val(pud) >> PAGE_SHIFT);
+}
+
+static inline struct page *pud_page(pud_t pud)
+{
+ return pfn_to_page(pud_val(pud) >> PAGE_SHIFT);
+}
+
+/*
+ * PTE definitions:
+ * - set_pte
+ * - set_pte_at
+ * - pte_clear
+ * - pte_page
+ * - pte_pfn
+ * - pte_present
+ * - pte_none
+ * - pte_write
+ * - pte_dirty
+ * - pte_young
+ * - pte_special
+ * - pte_mkdirty
+ * - pte_mkwrite_novma
+ * - pte_mkclean
+ * - pte_mkyoung
+ * - pte_mkold
+ * - pte_mkspecial
+ * - pte_wrprotect
+ */
+
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+ *ptep = pteval;
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pteval)
+{
+ set_pte(ptep, pteval);
+}
+
+#define pte_clear(mm, addr, ptep) set_pte(ptep, __pte(0))
+
+/* Constructs a page table entry */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+ return __pte(((pfn << PFN_PTE_SHIFT) & PFN_PTE_MASK) |
+ pgprot_val(prot));
+}
+
+/* Builds a page table entry by combining a page descriptor and a group of
+ * access rights.
+ */
+#define mk_pte(page, prot) (pfn_pte(page_to_pfn(page), prot))
+
+/* Modifies page access rights */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ return (unsigned long)pfn_to_virt(pmd_val(pmd) >> PAGE_SHIFT);
+}
+
+/* Yields the page frame number (PFN) of a page table entry */
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ return ((pte_val(pte) & PFN_PTE_MASK) >> PFN_PTE_SHIFT);
+}
+
+static inline int pte_present(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_NONE));
+}
+
+static inline int pte_none(pte_t pte)
+{
+ return (pte_val(pte) == 0);
+}
+
+static inline int pte_write(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_WRITE;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_DIRTY;
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+static inline int pte_special(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SPECIAL;
+}
+
+static inline int pte_huge(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_HUGE;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_WRITE);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_DIRTY));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_WRITE));
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_HUGE);
+}
+
+static inline pte_t pte_of_pmd(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
+#define pmd_pfn(pmd) pte_pfn(pte_of_pmd(pmd))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define pmdp_establish pmdp_establish
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+ return __pmd(xchg(&pmd_val(*pmdp), pmd_val(pmd)));
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & _PAGE_HUGE);
+}
+
+static inline pmd_t pmd_of_pte(pte_t pte)
+{
+ return __pmd(pte_val(pte));
+}
+
+
+#define pmd_mkclean(pmd) pmd_of_pte(pte_mkclean(pte_of_pmd(pmd)))
+#define pmd_mkdirty(pmd) pmd_of_pte(pte_mkdirty(pte_of_pmd(pmd)))
+#define pmd_mkold(pmd) pmd_of_pte(pte_mkold(pte_of_pmd(pmd)))
+#define pmd_mkwrite_novma(pmd) pmd_of_pte(pte_mkwrite_novma(pte_of_pmd(pmd)))
+#define pmd_mkyoung(pmd) pmd_of_pte(pte_mkyoung(pte_of_pmd(pmd)))
+#define pmd_modify(pmd, prot) pmd_of_pte(pte_modify(pte_of_pmd(pmd), prot))
+#define pmd_wrprotect(pmd) pmd_of_pte(pte_wrprotect(pte_of_pmd(pmd)))
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /* Create a huge page in PMD implies a size of 2 MB */
+ return __pmd(pmd_val(pmd) |
+ _PAGE_HUGE | (TLB_PS_2M << KVX_PAGE_SZ_SHIFT));
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~(_PAGE_PRESENT);
+
+ return pmd;
+}
+
+#define pmd_dirty(pmd) pte_dirty(pte_of_pmd(pmd))
+#define pmd_write(pmd) pte_write(pte_of_pmd(pmd))
+#define pmd_young(pmd) pte_young(pte_of_pmd(pmd))
+
+#define mk_pmd(page, prot) pmd_of_pte(mk_pte(page, prot))
+
+static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
+{
+ return __pmd(((pfn << PFN_PTE_SHIFT) & PFN_PTE_MASK) |
+ pgprot_val(prot));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ *pmdp = pmd;
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* _ASM_KVX_PGTABLE_H */
diff --git a/arch/kvx/include/asm/sparsemem.h b/arch/kvx/include/asm/sparsemem.h
new file mode 100644
index 0000000000000..2f35743f20fb2
--- /dev/null
+++ b/arch/kvx/include/asm/sparsemem.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_SPARSEMEM_H
+#define _ASM_KVX_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS 40
+#define SECTION_SIZE_BITS 30
+#endif /* CONFIG_SPARSEMEM */
+
+#endif /* _ASM_KVX_SPARSEMEM_H */
diff --git a/arch/kvx/include/asm/symbols.h b/arch/kvx/include/asm/symbols.h
new file mode 100644
index 0000000000000..a53c1607979fe
--- /dev/null
+++ b/arch/kvx/include/asm/symbols.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_SYMBOLS_H
+#define _ASM_KVX_SYMBOLS_H
+
+/* Symbols to patch TLB refill handler */
+extern char kvx_perf_tlb_refill[], kvx_std_tlb_refill[];
+
+/* Entry point of the ELF, used to start other PEs in SMP */
+extern int kvx_start[];
+
+#endif
diff --git a/arch/kvx/include/asm/tlb.h b/arch/kvx/include/asm/tlb.h
new file mode 100644
index 0000000000000..190b682e18191
--- /dev/null
+++ b/arch/kvx/include/asm/tlb.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ */
+
+#ifndef _ASM_KVX_TLB_H
+#define _ASM_KVX_TLB_H
+
+struct mmu_gather;
+
+static void tlb_flush(struct mmu_gather *tlb);
+
+int clear_ltlb_entry(unsigned long vaddr);
+
+#include <asm-generic/tlb.h>
+
+static inline unsigned int pgprot_cache_policy(unsigned long flags)
+{
+ return (flags & KVX_PAGE_CP_MASK) >> KVX_PAGE_CP_SHIFT;
+}
+
+#endif /* _ASM_KVX_TLB_H */
diff --git a/arch/kvx/include/asm/tlb_defs.h b/arch/kvx/include/asm/tlb_defs.h
new file mode 100644
index 0000000000000..1c77f7387ecea
--- /dev/null
+++ b/arch/kvx/include/asm/tlb_defs.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Julian Vetter
+ * Guillaume Thouvenin
+ * Marius Gligor
+ */
+
+#ifndef _ASM_KVX_TLB_DEFS_H
+#define _ASM_KVX_TLB_DEFS_H
+
+#include <linux/sizes.h>
+
+#include <asm/sfr.h>
+
+/* Architecture specification */
+#define MMC_SB_JTLB 0
+#define MMC_SB_LTLB 1
+
+#define MMU_LTLB_SETS 1
+#define MMU_LTLB_WAYS 16
+
+#define MMU_JTLB_SETS 64
+#define MMU_JTLB_WAYS_SHIFT 2
+#define MMU_JTLB_WAYS (1 << MMU_JTLB_WAYS_SHIFT)
+
+#define MMU_JTLB_ENTRIES (MMU_JTLB_SETS << MMU_JTLB_WAYS_SHIFT)
+
+/* Set is determined using the 6 lsb of virtual page */
+#define MMU_JTLB_SET_MASK (MMU_JTLB_SETS - 1)
+#define MMU_JTLB_WAY_MASK (MMU_JTLB_WAYS - 1)
+
+/* TLB: Entry Status */
+#define TLB_ES_INVALID 0
+#define TLB_ES_PRESENT 1
+#define TLB_ES_MODIFIED 2
+#define TLB_ES_A_MODIFIED 3
+
+/* TLB: Cache Policy - First value is for data, the second is for instruction
+ * Symbols are
+ * D: device
+ * U: uncached
+ * W: write through
+ * C: cache enabled
+ */
+#define TLB_CP_D_U 0
+#define TLB_CP_U_U 1
+#define TLB_CP_W_C 2
+#define TLB_CP_U_C 3
+
+/* TLB: Protection Attributes: First value is when PM=0, second is when PM=1
+ * Symbols are:
+ * NA: no access
+ * R : read
+ * W : write
+ * X : execute
+ */
+#define TLB_PA_NA_NA 0
+#define TLB_PA_NA_R 1
+#define TLB_PA_NA_RW 2
+#define TLB_PA_NA_RX 3
+#define TLB_PA_NA_RWX 4
+#define TLB_PA_R_R 5
+#define TLB_PA_R_RW 6
+#define TLB_PA_R_RX 7
+#define TLB_PA_R_RWX 8
+#define TLB_PA_RW_RW 9
+#define TLB_PA_RW_RWX 10
+#define TLB_PA_RX_RX 11
+#define TLB_PA_RX_RWX 12
+#define TLB_PA_RWX_RWX 13
+
+/* TLB: Page Size */
+#define TLB_PS_4K 0
+#define TLB_PS_64K 1
+#define TLB_PS_2M 2
+#define TLB_PS_512M 3
+
+#define TLB_G_GLOBAL 1
+#define TLB_G_USE_ASN 0
+
+#define TLB_MK_TEH_ENTRY(_vaddr, _vs, _global, _asn) \
+ (((_vs) << KVX_SFR_TEH_VS_SHIFT) | \
+ ((_global) << KVX_SFR_TEH_G_SHIFT) | \
+ ((_asn) << KVX_SFR_TEH_ASN_SHIFT) | \
+ (((_vaddr) >> KVX_SFR_TEH_PN_SHIFT) << KVX_SFR_TEH_PN_SHIFT))
+
+#define TLB_MK_TEL_ENTRY(_paddr, _ps, _es, _cp, _pa) \
+ (((_es) << KVX_SFR_TEL_ES_SHIFT) | \
+ ((_ps) << KVX_SFR_TEL_PS_SHIFT) | \
+ ((_cp) << KVX_SFR_TEL_CP_SHIFT) | \
+ ((_pa) << KVX_SFR_TEL_PA_SHIFT) | \
+ (((_paddr) >> KVX_SFR_TEL_FN_SHIFT) << KVX_SFR_TEL_FN_SHIFT))
+
+
+/* Refill routine related defines */
+#define REFILL_PERF_ENTRIES 4
+#define REFILL_PERF_PAGE_SIZE SZ_512M
+
+/* paddr will be inserted in assembly code */
+#define REFILL_PERF_TEL_VAL \
+ TLB_MK_TEL_ENTRY(0, TLB_PS_512M, TLB_ES_A_MODIFIED, TLB_CP_W_C, \
+ TLB_PA_NA_RWX)
+/* vaddr will be inserted in assembly code */
+#define REFILL_PERF_TEH_VAL TLB_MK_TEH_ENTRY(0, 0, TLB_G_GLOBAL, 0)
+
+/*
+ * LTLB fixed entry index
+ */
+#define LTLB_ENTRY_KERNEL_TEXT 0
+#define LTLB_ENTRY_GDB_PAGE 1
+#define LTLB_ENTRY_VIRTUAL_SMEM 2
+/* Reserve entries for kernel pagination */
+#define LTLB_KERNEL_RESERVED 3
+/* This define should reflect the maximum number of fixed LTLB entries */
+#define LTLB_ENTRY_FIXED_COUNT (LTLB_KERNEL_RESERVED + REFILL_PERF_ENTRIES)
+#define LTLB_ENTRY_EARLY_SMEM LTLB_ENTRY_FIXED_COUNT
+
+/* MMC: Protection Trap Cause */
+#define MMC_PTC_RESERVED 0
+#define MMC_PTC_READ 1
+#define MMC_PTC_WRITE 2
+#define MMC_PTC_EXECUTE 3
+
+/* MMC: Page size Mask in JTLB */
+#define MMC_PMJ_4K 1
+#define MMC_PMJ_64K 2
+#define MMC_PMJ_2M 4
+#define MMC_PMJ_512M 8
+
+#endif
diff --git a/arch/kvx/include/asm/tlbflush.h b/arch/kvx/include/asm/tlbflush.h
new file mode 100644
index 0000000000000..6c11fe7108078
--- /dev/null
+++ b/arch/kvx/include/asm/tlbflush.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ */
+
+#ifndef _ASM_KVX_TLBFLUSH_H
+#define _ASM_KVX_TLBFLUSH_H
+
+#include <linux/sched.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+
+extern void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr);
+extern void local_flush_tlb_all(void);
+extern void local_flush_tlb_mm(struct mm_struct *mm);
+extern void local_flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end);
+extern void local_flush_tlb_kernel_range(unsigned long start,
+ unsigned long end);
+
+#ifdef CONFIG_SMP
+extern void smp_flush_tlb_all(void);
+extern void smp_flush_tlb_mm(struct mm_struct *mm);
+extern void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+extern void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+static inline void flush_tlb(void)
+{
+ smp_flush_tlb_mm(current->mm);
+}
+
+#define flush_tlb_page smp_flush_tlb_page
+#define flush_tlb_all smp_flush_tlb_all
+#define flush_tlb_mm smp_flush_tlb_mm
+#define flush_tlb_range smp_flush_tlb_range
+#define flush_tlb_kernel_range smp_flush_tlb_kernel_range
+
+#else
+#define flush_tlb_page local_flush_tlb_page
+#define flush_tlb_all local_flush_tlb_all
+#define flush_tlb_mm local_flush_tlb_mm
+#define flush_tlb_range local_flush_tlb_range
+#define flush_tlb_kernel_range local_flush_tlb_kernel_range
+#endif
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr);
+void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep);
+
+#endif /* _ASM_KVX_TLBFLUSH_H */
diff --git a/arch/kvx/include/asm/vmalloc.h b/arch/kvx/include/asm/vmalloc.h
new file mode 100644
index 0000000000000..a07692431108e
--- /dev/null
+++ b/arch/kvx/include/asm/vmalloc.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifndef _ASM_KVX_VMALLOC_H
+#define _ASM_KVX_VMALLOC_H
+
+#endif /* _ASM_KVX_VMALLOC_H */
diff --git a/arch/kvx/mm/cacheflush.c b/arch/kvx/mm/cacheflush.c
new file mode 100644
index 0000000000000..62ec07266708e
--- /dev/null
+++ b/arch/kvx/mm/cacheflush.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#include <linux/smp.h>
+#include <linux/hugetlb.h>
+#include <linux/mm_types.h>
+
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_SMP
+
+struct flush_data {
+ unsigned long start;
+ unsigned long end;
+};
+
+static inline void ipi_flush_icache_range(void *arg)
+{
+ struct flush_data *ta = arg;
+
+ local_flush_icache_range(ta->start, ta->end);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+ struct flush_data data = {
+ .start = start,
+ .end = end
+ };
+
+ /* Then invalidate L1 icache on all cpus */
+ on_each_cpu(ipi_flush_icache_range, &data, 1);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#endif /* CONFIG_SMP */
+
+void dcache_wb_inval_phys_range(phys_addr_t addr, unsigned long len, bool wb,
+ bool inval)
+{
+ if (wb && inval) {
+ wbinval_dcache_range(addr, len);
+ } else {
+ if (inval)
+ inval_dcache_range(addr, len);
+ if (wb)
+ wb_dcache_range(addr, len);
+ }
+}
+
+static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_none(*pgd))
+ return NULL;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return NULL;
+
+ /* checks for huge page at pmd level */
+ if (pmd_leaf(*pmd)) {
+ pte = (pte_t *) pmd;
+ if (!pte_present(*pte))
+ return NULL;
+
+ return pte;
+ }
+
+ pte = pte_offset_map(pmd, addr);
+ if (!pte_present(*pte))
+ return NULL;
+
+ return pte;
+}
+
+static unsigned long dcache_wb_inval_virt_to_phys(struct vm_area_struct *vma,
+ unsigned long vaddr,
+ unsigned long len,
+ bool wb, bool inval)
+{
+ unsigned long pfn, offset, pgsize;
+ pte_t *ptep;
+
+ ptep = get_ptep(vma->vm_mm, vaddr);
+ if (!ptep) {
+ /*
+ * Since we did not found a matching pte, return needed
+ * length to be aligned on next page boundary
+ */
+ offset = (vaddr & (PAGE_SIZE - 1));
+ return PAGE_SIZE - offset;
+ }
+
+ /* Handle page sizes correctly */
+ pgsize = (pte_val(*ptep) & KVX_PAGE_SZ_MASK) >> KVX_PAGE_SZ_SHIFT;
+ pgsize = (1 << get_page_size_shift(pgsize));
+
+ offset = vaddr & (pgsize - 1);
+ len = min(pgsize - offset, len);
+ pfn = pte_pfn(*ptep);
+
+ dcache_wb_inval_phys_range(PFN_PHYS(pfn) + offset, len, wb, inval);
+
+ return len;
+}
+
+int dcache_wb_inval_virt_range(unsigned long vaddr, unsigned long len, bool wb,
+ bool inval)
+{
+ unsigned long end = vaddr + len;
+ unsigned long vma_end;
+ struct vm_area_struct *vma;
+ unsigned long rlen;
+ struct mm_struct *mm = current->mm;
+
+ /* necessary for find_vma */
+ mmap_read_lock(mm);
+
+ /*
+ * Verify that the specified address region actually belongs to this
+ * process.
+ */
+ while (vaddr < end) {
+ vma = find_vma(current->mm, vaddr);
+ if (vma == NULL || vaddr < vma->vm_start) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ vma_end = min(end, vma->vm_end);
+ while (vaddr < vma_end) {
+ rlen = dcache_wb_inval_virt_to_phys(vma, vaddr, len, wb, inval);
+ len -= rlen;
+ vaddr += rlen;
+ }
+ }
+
+ mmap_read_unlock(mm);
+
+ return 0;
+}
diff --git a/arch/kvx/mm/dma-mapping.c b/arch/kvx/mm/dma-mapping.c
new file mode 100644
index 0000000000000..d73fbb1c35896
--- /dev/null
+++ b/arch/kvx/mm/dma-mapping.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ * Jules Maselbas
+ * Julian Vetter
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
+#include "../../../drivers/iommu/dma-iommu.h"
+
+#include <asm/cacheflush.h>
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+ unsigned long addr = (unsigned long) page_to_phys(page);
+
+ /* Flush pending data and invalidate pages */
+ wbinval_dcache_range(addr, size);
+}
+
+/**
+ * The implementation of arch should follow the following rules:
+ * map for_cpu for_device unmap
+ * TO_DEV writeback none writeback none
+ * FROM_DEV invalidate invalidate(*) invalidate invalidate(*)
+ * BIDIR writeback invalidate writeback invalidate
+ *
+ * (*) - only necessary if the CPU speculatively prefetches.
+ *
+ * (see https://lkml.org/lkml/2018/5/18/979)
+ */
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ switch (dir) {
+ case DMA_FROM_DEVICE:
+ inval_dcache_range(paddr, size);
+ break;
+
+ case DMA_TO_DEVICE:
+ case DMA_BIDIRECTIONAL:
+ wb_dcache_range(paddr, size);
+ break;
+
+ default:
+ WARN_ON_ONCE(true);
+ }
+}
+
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ switch (dir) {
+ case DMA_TO_DEVICE:
+ break;
+ case DMA_FROM_DEVICE:
+ break;
+
+ case DMA_BIDIRECTIONAL:
+ inval_dcache_range(paddr, size);
+ break;
+
+ default:
+ WARN_ON_ONCE(true);
+ }
+}
+
+#ifdef CONFIG_IOMMU_DMA
+void arch_teardown_dma_ops(struct device *dev)
+{
+ dev->dma_ops = NULL;
+}
+#endif /* CONFIG_IOMMU_DMA*/
+
+void arch_setup_dma_ops(struct device *dev, bool coherent)
+{
+ dev->dma_coherent = coherent;
+ if (device_iommu_mapped(dev))
+ iommu_setup_dma_ops(dev);
+}
diff --git a/arch/kvx/mm/extable.c b/arch/kvx/mm/extable.c
new file mode 100644
index 0000000000000..8889a1a90a775
--- /dev/null
+++ b/arch/kvx/mm/extable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * derived from arch/riscv/mm/extable.c
+ *
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_exception_tables(regs->spc);
+ if (fixup) {
+ regs->spc = fixup->fixup;
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/kvx/mm/fault.c b/arch/kvx/mm/fault.c
new file mode 100644
index 0000000000000..9d8513db57a8d
--- /dev/null
+++ b/arch/kvx/mm/fault.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Guillaume Thouvenin
+ * Clement Leger
+ * Yann Sionneau
+ */
+
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/perf_event.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+
+#include <asm/mmu.h>
+#include <asm/traps.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/sfr_defs.h>
+#include <asm/current.h>
+#include <asm/tlbflush.h>
+
+static int handle_vmalloc_fault(uint64_t ea)
+{
+ /*
+ * Synchronize this task's top level page-table with
+ * the 'reference' page table.
+ * As we only have 2 or 3 level page table we don't need to
+ * deal with other levels.
+ */
+ unsigned long addr = ea & PAGE_MASK;
+ pgd_t *pgd_k, *pgd;
+ p4d_t *p4d_k, *p4d;
+ pud_t *pud_k, *pud;
+ pmd_t *pmd_k, *pmd;
+ pte_t *pte_k;
+
+ pgd = pgd_offset(current->active_mm, ea);
+ pgd_k = pgd_offset_k(ea);
+ if (!pgd_present(*pgd_k))
+ return 1;
+ set_pgd(pgd, *pgd_k);
+
+ p4d = p4d_offset(pgd, ea);
+ p4d_k = p4d_offset(pgd_k, ea);
+ if (!p4d_present(*p4d_k))
+ return 1;
+
+ pud = pud_offset(p4d, ea);
+ pud_k = pud_offset(p4d_k, ea);
+ if (!pud_present(*pud_k))
+ return 1;
+
+ pmd = pmd_offset(pud, ea);
+ pmd_k = pmd_offset(pud_k, ea);
+ if (!pmd_present(*pmd_k))
+ return 1;
+
+ /* Some other architectures set pmd to synchronize them but
+ * as we just synchronized the pgd we don't see how they can
+ * be different. Maybe we miss something so in case we
+ * put a guard here.
+ */
+ if (pmd_val(*pmd) != pmd_val(*pmd_k))
+ pr_err("%s: pmd not synchronized (0x%lx != 0x%lx)\n",
+ __func__, pmd_val(*pmd), pmd_val(*pmd_k));
+
+ pte_k = pte_offset_kernel(pmd_k, ea);
+ if (!pte_present(*pte_k)) {
+ pr_err("%s: PTE not present for 0x%llx\n",
+ __func__, ea);
+ return 1;
+ }
+
+ /* We refill the TLB now to avoid to take another nomapping
+ * trap.
+ */
+ kvx_mmu_jtlb_add_entry(addr, pte_k, 0);
+
+ return 0;
+}
+
+void do_page_fault(struct pt_regs *regs, uint64_t es, uint64_t ea)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long flags, cause, vma_mask;
+ int code = SEGV_MAPERR;
+ vm_fault_t fault;
+
+ cause = kvx_sfr_field_val(es, ES, RWX);
+
+ /* We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ */
+ if (is_vmalloc_addr((void *) ea) && !user_mode(regs)) {
+ if (handle_vmalloc_fault(ea))
+ goto no_context;
+ return;
+ }
+
+ mm = current->mm;
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (unlikely(faulthandler_disabled() || !mm))
+ goto no_context;
+
+ /* By default we retry and fault task can be killed */
+ flags = FAULT_FLAG_DEFAULT;
+
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, ea);
+
+retry:
+ mmap_read_lock(mm);
+
+ vma = find_vma(mm, ea);
+ if (!vma)
+ goto bad_area;
+ if (likely(vma->vm_start <= ea))
+ goto good_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+ goto bad_area;
+ vma = expand_stack(mm, ea);
+ if (unlikely(!vma))
+ goto bad_area_nosemaphore;
+
+good_area:
+ /* Handle access type */
+ switch (cause) {
+ case KVX_TRAP_RWX_FETCH:
+ vma_mask = VM_EXEC;
+ break;
+ case KVX_TRAP_RWX_READ:
+ vma_mask = VM_READ;
+ break;
+ case KVX_TRAP_RWX_WRITE:
+ vma_mask = VM_WRITE;
+ flags |= FAULT_FLAG_WRITE;
+ break;
+ /* Atomic are both read/write */
+ case KVX_TRAP_RWX_ATOMIC:
+ vma_mask = VM_WRITE | VM_READ;
+ flags |= FAULT_FLAG_WRITE;
+ break;
+ default:
+ panic("%s: unhandled cause %lu", __func__, cause);
+ }
+
+ if ((vma->vm_flags & vma_mask) != vma_mask) {
+ code = SEGV_ACCERR;
+ goto bad_area;
+ }
+
+ /*
+ * If for any reason we can not handle the fault we make sure that
+ * we exit gracefully rather then retry endlessly with the same
+ * result.
+ */
+ fault = handle_mm_fault(vma, ea, flags, regs);
+
+ /*
+ * If we need to retry but a fatal signal is pending, handle the
+ * signal first. We do not need to release the mmap_sem because it
+ * would already be released in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fault_signal_pending(fault, regs))
+ return;
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED)
+ return;
+
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ if (fault & VM_FAULT_OOM)
+ goto out_of_memory;
+ else if (fault & VM_FAULT_SIGSEGV)
+ goto bad_area;
+ else if (fault & VM_FAULT_SIGBUS)
+ goto do_sigbus;
+ BUG();
+ }
+
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ flags |= FAULT_FLAG_TRIED;
+ /* No need to up_read(&mm->mmap_sem) as we would
+ * have already released it in __lock_page_or_retry().
+ * Look in mm/filemap.c for explanations.
+ */
+ goto retry;
+ }
+
+ /* Fault errors and retry case have been handled nicely */
+ mmap_read_unlock(mm);
+ return;
+
+bad_area:
+ mmap_read_unlock(mm);
+bad_area_nosemaphore:
+
+ if (user_mode(regs)) {
+ user_do_sig(regs, SIGSEGV, code, ea);
+ return;
+ }
+
+no_context:
+ /* Are we prepared to handle this kernel fault?
+ *
+ * (The kernel has valid exception-points in the source
+ * when it accesses user-memory. When it fails in one
+ * of those points, we find it in a table and do a jump
+ * to some fixup code that loads an appropriate error
+ * code)
+ */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+ bust_spinlocks(1);
+ if (kvx_sfr_field_val(es, ES, HTC) == KVX_TRAP_PROTECTION)
+ pr_alert(CUT_HERE "Kernel protection trap at virtual address %016llx\n",
+ ea);
+ else {
+ pr_alert(CUT_HERE "Unable to handle kernel %s at virtual address %016llx\n",
+ (ea < PAGE_SIZE) ? "NULL pointer dereference" :
+ "paging request", ea);
+ }
+ die(regs, ea, "Oops");
+ bust_spinlocks(0);
+ make_task_dead(SIGKILL);
+
+out_of_memory:
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ mmap_read_unlock(mm);
+ if (!user_mode(regs))
+ goto no_context;
+ pagefault_out_of_memory();
+ return;
+
+do_sigbus:
+ mmap_read_unlock(mm);
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ goto no_context;
+
+ user_do_sig(regs, SIGBUS, BUS_ADRERR, ea);
+
+ return;
+
+}
+
+void do_writetoclean(struct pt_regs *regs, uint64_t es, uint64_t ea)
+{
+ panic("%s not implemented", __func__);
+}
diff --git a/arch/kvx/mm/init.c b/arch/kvx/mm/init.c
new file mode 100644
index 0000000000000..c68468b8957ea
--- /dev/null
+++ b/arch/kvx/mm/init.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ */
+
+/* Memblock header depends on types.h but does not include it ! */
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/of_fdt.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/tlb_defs.h>
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+/*
+ * On kvx, memory map contains the first 2G of DDR being aliased.
+ * Full contiguous DDR is located at @[4G - 68G].
+ * However, to access this DDR in 32bit mode, the first 2G of DDR are
+ * mirrored from 4G to 2G.
+ * These first 2G are accessible from all DMAs (included 32 bits one).
+ *
+ * Hence, the memory map is the following:
+ *
+ * (68G) 0x1100000000-> +-------------+
+ * | |
+ * 66G |(ZONE_NORMAL)|
+ * | |
+ * (6G) 0x180000000-> +-------------+
+ * | |
+ * 2G |(ZONE_DMA32) |
+ * | |
+ * (4G) 0x100000000-> +-------------+ +--+
+ * | | |
+ * 2G | (Alias) | | 2G Alias
+ * | | |
+ * (2G) 0x80000000-> +-------------+ <--+
+ *
+ * The translation of 64 bits -> 32 bits can then be done using dma-ranges property
+ * in device-trees.
+ */
+
+#define DDR_64BIT_START (4ULL * SZ_1G)
+#define DDR_32BIT_ALIAS_SIZE (2ULL * SZ_1G)
+
+#define MAX_DMA32_PFN PHYS_PFN(DDR_64BIT_START + DDR_32BIT_ALIAS_SIZE)
+
+#define SMEM_GLOBAL_START (16 * 1024 * 1024)
+#define SMEM_GLOBAL_END (20 * 1024 * 1024)
+#define MAX_SMEM_MEMBLOCKS (32)
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+
+/*
+ * empty_zero_page is a special page that is used for zero-initialized data and
+ * COW.
+ */
+struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern char _start[];
+extern char __kernel_smem_code_start[];
+extern char __kernel_smem_code_end[];
+
+static void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES];
+
+ memset(zones_size, 0, sizeof(zones_size));
+
+ zones_size[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn);
+ zones_size[ZONE_NORMAL] = max_low_pfn;
+
+ free_area_init(zones_size);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static void __init setup_initrd(void)
+{
+ u64 base = phys_initrd_start;
+ u64 end = phys_initrd_start + phys_initrd_size;
+
+ if (phys_initrd_size == 0) {
+ pr_info("initrd not found or empty");
+ return;
+ }
+
+ if (base < memblock_start_of_DRAM() || end > memblock_end_of_DRAM()) {
+ pr_err("initrd not in accessible memory, disabling it");
+ phys_initrd_size = 0;
+ return;
+ }
+
+ pr_info("initrd: 0x%llx - 0x%llx\n", base, end);
+
+ memblock_reserve(phys_initrd_start, phys_initrd_size);
+
+ /* the generic initrd code expects virtual addresses */
+ initrd_start = (unsigned long) __va(base);
+ initrd_end = initrd_start + phys_initrd_size;
+}
+#endif
+
+static phys_addr_t memory_limit = PHYS_ADDR_MAX;
+
+static int __init early_mem(char *p)
+{
+ if (!p)
+ return 1;
+
+ memory_limit = memparse(p, &p) & PAGE_MASK;
+ pr_notice("Memory limited to %lldMB\n", memory_limit >> 20);
+
+ return 0;
+}
+early_param("mem", early_mem);
+
+static void __init setup_bootmem(void)
+{
+ phys_addr_t start, end = 0;
+ u64 i;
+ struct memblock_region *region = NULL;
+ struct memblock_region to_be_reserved[MAX_SMEM_MEMBLOCKS] = {};
+ struct memblock_region *reserve = to_be_reserved;
+
+ init_mm.start_code = (unsigned long)_stext;
+ init_mm.end_code = (unsigned long)_etext;
+ init_mm.end_data = (unsigned long)_edata;
+ init_mm.brk = (unsigned long)_end;
+
+ memblock_reserve(__pa(_start), __pa(_end) - __pa(_start));
+
+
+ for_each_mem_range(i, &start, &end) {
+ pr_info("%15s: memory : 0x%lx - 0x%lx\n", __func__,
+ (unsigned long)start,
+ (unsigned long)end);
+ }
+
+ /* min_low_pfn is the lowest PFN available in the system */
+ min_low_pfn = PFN_UP(memblock_start_of_DRAM());
+
+ /* max_low_pfn indicates the end if NORMAL zone */
+ max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+
+ /* Set the maximum number of pages in the system */
+ set_max_mapnr(max_low_pfn - min_low_pfn);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ setup_initrd();
+#endif
+
+ if (memory_limit != PHYS_ADDR_MAX)
+ memblock_mem_limit_remove_map(memory_limit);
+
+ /* Don't reserve the device tree if its builtin */
+ if (!is_kernel_rodata((unsigned long) initial_boot_params))
+ early_init_fdt_reserve_self();
+ early_init_fdt_scan_reserved_mem();
+
+ start = SMEM_GLOBAL_START;
+ /*
+ * make sure all smem is reserved
+ * It seems that it's not OK to call memblock_reserve() from
+ * for_each_reserved_mem_region() {} loop, so let's store
+ * the "to be reserved" regions in the to_be_reserved array.
+ */
+ for_each_reserved_mem_region(region) {
+ if (region->base >= SMEM_GLOBAL_END)
+ break; /* we finished to scan the smem area */
+
+ if (region->base > SMEM_GLOBAL_START && region->base != start) {
+ if (reserve > &to_be_reserved[MAX_SMEM_MEMBLOCKS - 1]) {
+ pr_crit("Impossible to mark the whole smem as reserved memory.\n");
+ pr_crit("It would take more than the maximum of %d memblocks.\n",
+ MAX_SMEM_MEMBLOCKS);
+ break;
+ }
+ reserve->base = start;
+ reserve->size = region->base - start;
+ reserve++;
+ }
+
+ start = region->base + region->size;
+ }
+
+ if (start < SMEM_GLOBAL_END)
+ memblock_reserve(start, SMEM_GLOBAL_END - start);
+
+ /* Now let's reserve the smem memblocks for real */
+ for (region = to_be_reserved; region->size != 0; region++)
+ memblock_reserve(region->base, region->size);
+
+ memblock_allow_resize();
+ memblock_dump_all();
+}
+
+static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss __maybe_unused;
+
+void __init early_fixmap_init(void)
+{
+ unsigned long vaddr;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ /*
+ * Fixed mappings:
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+ pgd = pgd_offset_pgd(swapper_pg_dir, vaddr);
+ set_pgd(pgd, __pgd(__pa_symbol(fixmap_pmd)));
+
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ set_pmd(pmd, __pmd(__pa_symbol(fixmap_pte)));
+}
+
+void __init setup_arch_memory(void)
+{
+ setup_bootmem();
+ sparse_init();
+ zone_sizes_init();
+}
+
+void __init mem_init(void)
+{
+ memblock_free_all();
+
+ /* allocate the zero page */
+ empty_zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!empty_zero_page)
+ panic("Failed to allocate the empty_zero_page");
+}
+
+void free_initmem(void)
+{
+#ifdef CONFIG_POISON_INITMEM
+ free_initmem_default(0x0);
+#else
+ free_initmem_default(-1);
+#endif
+}
+
+void __set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
+
+
+ BUG_ON(idx >= __end_of_fixed_addresses);
+
+ pte = &fixmap_pte[pte_index(addr)];
+
+ if (pgprot_val(flags)) {
+ set_pte(pte, pfn_pte(phys_to_pfn(phys), flags));
+ } else {
+ /* Remove the fixmap */
+ pte_clear(&init_mm, addr, pte);
+ }
+ local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
+
+static const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_READ,
+ [VM_WRITE] = PAGE_READ,
+ [VM_WRITE | VM_READ] = PAGE_READ,
+ [VM_EXEC] = PAGE_READ_EXEC,
+ [VM_EXEC | VM_READ] = PAGE_READ_EXEC,
+ [VM_EXEC | VM_WRITE] = PAGE_READ_EXEC,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_READ_EXEC,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_READ,
+ [VM_SHARED | VM_WRITE] = PAGE_READ_WRITE,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_READ_WRITE,
+ [VM_SHARED | VM_EXEC] = PAGE_READ_EXEC,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_READ_WRITE_EXEC,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_READ_WRITE_EXEC
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/kvx/mm/mmap.c b/arch/kvx/mm/mmap.c
new file mode 100644
index 0000000000000..a2225db644384
--- /dev/null
+++ b/arch/kvx/mm/mmap.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * derived from arch/arm64/mm/mmap.c
+ *
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#ifdef CONFIG_STRICT_DEVMEM
+
+#include <linux/mm.h>
+#include <linux/ioport.h>
+
+#include <asm/page.h>
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number. We mimic x86 here by
+ * disallowing access to system RAM as well as device-exclusive MMIO regions.
+ * This effectively disable read()/write() on /dev/mem.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+ if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+ return 0;
+ if (!page_is_ram(pfn))
+ return 1;
+ return 0;
+}
+
+#endif
diff --git a/arch/kvx/mm/mmu.c b/arch/kvx/mm/mmu.c
new file mode 100644
index 0000000000000..9cb11bd2dfdf7
--- /dev/null
+++ b/arch/kvx/mm/mmu.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ * Vincent Chardon
+ * Jules Maselbas
+ */
+
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/irqflags.h>
+#include <linux/printk.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+
+#include <asm/mmu.h>
+#include <asm/tlb.h>
+#include <asm/page_size.h>
+#include <asm/mmu_context.h>
+
+#define DUMP_LTLB 0
+#define DUMP_JTLB 1
+
+DEFINE_PER_CPU_ALIGNED(uint8_t[MMU_JTLB_SETS], jtlb_current_set_way);
+static struct kvx_tlb_format ltlb_entries[MMU_LTLB_WAYS];
+static unsigned long ltlb_entries_bmp;
+
+static int kvx_mmu_ltlb_overlaps(struct kvx_tlb_format tlbe)
+{
+ int bit = LTLB_ENTRY_FIXED_COUNT;
+
+ for_each_set_bit_from(bit, <lb_entries_bmp, MMU_LTLB_WAYS) {
+ if (tlb_entry_overlaps(tlbe, ltlb_entries[bit]))
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * kvx_mmu_ltlb_add_entry - Add a kernel entry in the LTLB
+ *
+ * In order to lock some entries in the LTLB and be always mapped, this
+ * function can be called with a physical address, a virtual address and
+ * protection attribute to add an entry into the LTLB. This is mainly for
+ * performances since there won't be any NOMAPPING traps for these pages.
+ *
+ * @vaddr: Virtual address for the entry (must be aligned according to tlb_ps)
+ * @paddr: Physical address for the entry (must be aligned according to tlb_ps)
+ * @flags: Protection attributes
+ * @tlb_ps: Page size attribute for TLB (TLB_PS_*)
+ */
+void kvx_mmu_ltlb_add_entry(unsigned long vaddr, phys_addr_t paddr,
+ pgprot_t flags, unsigned long tlb_ps)
+{
+ unsigned int cp;
+ unsigned int idx;
+ unsigned long irqflags;
+ struct kvx_tlb_format tlbe;
+ u64 page_size = BIT(get_page_size_shift(tlb_ps));
+
+ BUG_ON(!IS_ALIGNED(vaddr, page_size) || !IS_ALIGNED(paddr, page_size));
+
+ cp = pgprot_cache_policy(pgprot_val(flags));
+
+ tlbe = tlb_mk_entry(
+ (void *) paddr,
+ (void *) vaddr,
+ tlb_ps,
+ TLB_G_GLOBAL,
+ TLB_PA_NA_RW,
+ cp,
+ 0,
+ TLB_ES_A_MODIFIED);
+
+ local_irq_save(irqflags);
+
+ if (IS_ENABLED(CONFIG_KVX_DEBUG_TLB_WRITE) &&
+ kvx_mmu_ltlb_overlaps(tlbe))
+ panic("VA %lx overlaps with an existing LTLB mapping", vaddr);
+
+ idx = find_next_zero_bit(<lb_entries_bmp, MMU_LTLB_WAYS,
+ LTLB_ENTRY_FIXED_COUNT);
+ /* This should never happen */
+ BUG_ON(idx >= MMU_LTLB_WAYS);
+ __set_bit(idx, <lb_entries_bmp);
+ ltlb_entries[idx] = tlbe;
+ kvx_mmu_add_entry(MMC_SB_LTLB, idx, tlbe);
+
+ if (kvx_mmc_error(kvx_sfr_get(MMC)))
+ panic("Failed to write entry to the LTLB");
+
+ local_irq_restore(irqflags);
+}
+
+void kvx_mmu_ltlb_remove_entry(unsigned long vaddr)
+{
+ int ret, bit = LTLB_ENTRY_FIXED_COUNT;
+ struct kvx_tlb_format tlbe;
+
+ for_each_set_bit_from(bit, <lb_entries_bmp, MMU_LTLB_WAYS) {
+ tlbe = ltlb_entries[bit];
+ if (tlb_entry_match_addr(tlbe, vaddr)) {
+ __clear_bit(bit, <lb_entries_bmp);
+ break;
+ }
+ }
+
+ if (bit == MMU_LTLB_WAYS)
+ panic("Trying to remove non-existent LTLB entry for addr %lx\n",
+ vaddr);
+
+ ret = clear_ltlb_entry(vaddr);
+ if (ret)
+ panic("Failed to remove LTLB entry for addr %lx\n", vaddr);
+}
+
+/**
+ * kvx_mmu_jtlb_add_entry - Add an entry into JTLB
+ *
+ * JTLB is used both for kernel and user entries.
+ *
+ * @address: Virtual address for the entry (must be aligned according to tlb_ps)
+ * @ptep: pte entry pointer
+ * @asn: ASN (if pte entry is not global)
+ */
+void kvx_mmu_jtlb_add_entry(unsigned long address, pte_t *ptep,
+ unsigned int asn)
+{
+ unsigned int shifted_addr, set, way;
+ unsigned long flags, pte_val;
+ struct kvx_tlb_format tlbe;
+ unsigned int pa, cp, ps;
+ phys_addr_t pfn;
+
+ pte_val = pte_val(*ptep);
+
+ pfn = (phys_addr_t)pte_pfn(*ptep);
+
+ asn &= MM_CTXT_ASN_MASK;
+
+ /* Set page as accessed */
+ pte_val(*ptep) |= _PAGE_ACCESSED;
+
+ BUILD_BUG_ON(KVX_PAGE_SZ_SHIFT != KVX_SFR_TEL_PS_SHIFT);
+
+ ps = (pte_val & KVX_PAGE_SZ_MASK) >> KVX_PAGE_SZ_SHIFT;
+ pa = get_page_access_perms(KVX_ACCESS_PERMS_INDEX(pte_val));
+ cp = pgprot_cache_policy(pte_val);
+
+ tlbe = tlb_mk_entry(
+ (void *)pfn_to_phys(pfn),
+ (void *)address,
+ ps,
+ (pte_val & _PAGE_GLOBAL) ? TLB_G_GLOBAL : TLB_G_USE_ASN,
+ pa,
+ cp,
+ asn,
+ TLB_ES_A_MODIFIED);
+
+ shifted_addr = address >> get_page_size_shift(ps);
+ set = shifted_addr & MMU_JTLB_SET_MASK;
+
+ local_irq_save(flags);
+
+ if (IS_ENABLED(CONFIG_KVX_DEBUG_TLB_WRITE) &&
+ kvx_mmu_ltlb_overlaps(tlbe))
+ panic("VA %lx overlaps with an existing LTLB mapping", address);
+
+ way = get_cpu_var(jtlb_current_set_way)[set]++;
+ put_cpu_var(jtlb_current_set_way);
+
+ way &= MMU_JTLB_WAY_MASK;
+
+ kvx_mmu_add_entry(MMC_SB_JTLB, way, tlbe);
+
+ if (IS_ENABLED(CONFIG_KVX_DEBUG_TLB_WRITE) &&
+ kvx_mmc_error(kvx_sfr_get(MMC)))
+ panic("Failed to write entry to the JTLB (in update_mmu_cache)");
+
+ local_irq_restore(flags);
+}
+
+void __init kvx_mmu_early_setup(void)
+{
+ int bit;
+ struct kvx_tlb_format tlbe;
+
+ kvx_mmu_remove_ltlb_entry(LTLB_ENTRY_EARLY_SMEM);
+
+ if (raw_smp_processor_id() != 0) {
+ /* Apply existing ltlb entries starting from first one free */
+ bit = LTLB_ENTRY_FIXED_COUNT;
+ for_each_set_bit_from(bit, <lb_entries_bmp, MMU_LTLB_WAYS) {
+ tlbe = ltlb_entries[bit];
+ kvx_mmu_add_entry(MMC_SB_LTLB, bit, tlbe);
+ }
+ }
+}
diff --git a/arch/kvx/mm/mmu_stats.c b/arch/kvx/mm/mmu_stats.c
new file mode 100644
index 0000000000000..e70b7e2dcab1a
--- /dev/null
+++ b/arch/kvx/mm/mmu_stats.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+
+#include <asm/mmu_stats.h>
+
+static struct dentry *mmu_stats_debufs;
+
+static const char *mmu_refill_types_name[MMU_REFILL_TYPE_COUNT] = {
+ [MMU_REFILL_TYPE_USER] = "User",
+ [MMU_REFILL_TYPE_KERNEL] = "Kernel",
+ [MMU_REFILL_TYPE_KERNEL_DIRECT] = "Kernel Direct"
+};
+
+DEFINE_PER_CPU(struct mmu_stats, mmu_stats);
+
+static int mmu_stats_show(struct seq_file *m, void *v)
+{
+ int cpu, type;
+ unsigned long avg = 0, total_refill, efficiency, total_cycles;
+ struct mmu_stats *stats;
+ struct mmu_refill_stats *ref_stat;
+
+ total_cycles = get_cycles();
+ for_each_present_cpu(cpu) {
+ stats = &per_cpu(mmu_stats, cpu);
+ total_refill = 0;
+
+ seq_printf(m, " - CPU %d\n", cpu);
+ for (type = 0; type < MMU_REFILL_TYPE_COUNT; type++) {
+ ref_stat = &stats->refill[type];
+ total_refill += ref_stat->count;
+ if (ref_stat->count)
+ avg = ref_stat->total / ref_stat->count;
+ else
+ avg = 0;
+
+ seq_printf(m,
+ " - %s refill stats:\n"
+ " - count: %lu\n"
+ " - min: %lu\n"
+ " - avg: %lu\n"
+ " - max: %lu\n",
+ mmu_refill_types_name[type],
+ ref_stat->count,
+ ref_stat->min,
+ avg,
+ ref_stat->max
+ );
+ }
+
+ if (total_refill)
+ avg = stats->cycles_between_refill / total_refill;
+ else
+ avg = 0;
+
+ seq_printf(m, " - Average cycles between refill: %lu\n", avg);
+ seq_printf(m, " - tlb_flush_all calls: %lu\n",
+ stats->tlb_flush_all);
+ efficiency = stats->cycles_between_refill * 100 /
+ stats->last_refill;
+ seq_printf(m, " - Efficiency: %lu%%\n", efficiency);
+ }
+
+ return 0;
+}
+
+static int mmu_stats_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, mmu_stats_show, NULL);
+}
+
+static const struct file_operations mmu_stats_fops = {
+ .open = mmu_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init mmu_stats_debufs_init(void)
+{
+ mmu_stats_debufs = debugfs_create_dir("kvx_mmu_debug", NULL);
+
+ debugfs_create_file("mmu_stats", 0444, mmu_stats_debufs, NULL,
+ &mmu_stats_fops);
+
+ return 0;
+}
+subsys_initcall(mmu_stats_debufs_init);
diff --git a/arch/kvx/mm/tlb.c b/arch/kvx/mm/tlb.c
new file mode 100644
index 0000000000000..a606dc89bba4e
--- /dev/null
+++ b/arch/kvx/mm/tlb.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2023 Kalray Inc.
+ * Author(s): Clement Leger
+ * Guillaume Thouvenin
+ */
+
+#include <linux/mmu_context.h>
+#include <linux/sched.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb_defs.h>
+#include <asm/page_size.h>
+#include <asm/mmu_stats.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+/*
+ * When in kernel, use dummy ASN 42 to be able to catch any problem easily if
+ * ASN is not restored properly.
+ */
+#define KERNEL_DUMMY_ASN 42
+
+/* Threshold of page count above which we will regenerate a new ASN */
+#define ASN_FLUSH_PAGE_THRESHOLD (MMU_JTLB_ENTRIES)
+
+/* Threshold of page count above which we will flush the whole JTLB */
+#define FLUSH_ALL_PAGE_THRESHOLD (MMU_JTLB_ENTRIES)
+
+DEFINE_PER_CPU(unsigned long, kvx_asn_cache) = MM_CTXT_FIRST_CYCLE;
+
+#ifdef CONFIG_KVX_DEBUG_TLB_ACCESS
+
+static DEFINE_PER_CPU_ALIGNED(struct kvx_tlb_access_t[KVX_TLB_ACCESS_SIZE],
+ kvx_tlb_access_rb);
+/* Lower bits hold the index and upper ones hold the number of wrapped */
+static DEFINE_PER_CPU(unsigned int, kvx_tlb_access_idx);
+
+void kvx_update_tlb_access(int type)
+{
+ unsigned int *idx_ptr = &get_cpu_var(kvx_tlb_access_idx);
+ unsigned int idx;
+ struct kvx_tlb_access_t *tab = get_cpu_var(kvx_tlb_access_rb);
+
+ idx = KVX_TLB_ACCESS_GET_IDX(*idx_ptr);
+
+ kvx_mmu_get_tlb_entry(tab[idx].entry);
+ tab[idx].mmc_val = kvx_sfr_get(MMC);
+ tab[idx].type = type;
+
+ (*idx_ptr)++;
+ put_cpu_var(kvx_tlb_access_rb);
+ put_cpu_var(kvx_tlb_access_idx);
+};
+
+#endif
+
+/**
+ * clear_tlb_entry() - clear an entry in TLB if it exists
+ * @addr: the address used to set TEH.PN
+ * @global: is page global or not
+ * @asn: ASN used if page is not global
+ * @tlb_type: tlb type (MMC_SB_LTLB or MMC_SB_JTLB)
+ *
+ * Preemption must be disabled when calling this function. There is no need to
+ * invalidate micro TLB because it is invalidated when we write TLB.
+ *
+ * Return: 0 if TLB entry was found and deleted properly, -ENOENT if not found
+ * -EINVAL if found but in incorrect TLB.
+ *
+ */
+static int clear_tlb_entry(unsigned long addr,
+ unsigned int global,
+ unsigned int asn,
+ unsigned int tlb_type)
+{
+ struct kvx_tlb_format entry;
+ unsigned long mmc_val;
+ int saved_asn, ret = 0;
+
+ /* Sanitize ASN */
+ asn &= MM_CTXT_ASN_MASK;
+
+ /* Before probing we need to save the current ASN */
+ mmc_val = kvx_sfr_get(MMC);
+ saved_asn = kvx_sfr_field_val(mmc_val, MMC, ASN);
+ kvx_sfr_set_field(MMC, ASN, asn);
+
+ /* Probe is based on PN and ASN. So ES can be anything */
+ entry = tlb_mk_entry(0, (void *)addr, 0, global, 0, 0, 0,
+ TLB_ES_INVALID);
+ kvx_mmu_set_tlb_entry(entry);
+
+ kvx_mmu_probetlb();
+
+ mmc_val = kvx_sfr_get(MMC);
+
+ if (kvx_mmc_error(mmc_val)) {
+ if (kvx_mmc_parity(mmc_val)) {
+ /*
+ * This should never happens unless you are bombared by
+ * streams of charged particules. If it happens just
+ * flush the JTLB and let's continue (but check your
+ * environment you are probably not in a safe place).
+ */
+ WARN(1, "%s: parity error during lookup (addr 0x%lx, asn %u). JTLB will be flushed\n",
+ __func__, addr, asn);
+ kvx_sfr_set_field(MMC, PAR, 0);
+ local_flush_tlb_all();
+ }
+
+ /*
+ * else there is no matching entry so just clean the error and
+ * restore the ASN before returning.
+ */
+ kvx_sfr_set_field(MMC, E, 0);
+ ret = -ENOENT;
+ goto restore_asn;
+ }
+
+ /* We surely don't want to flush another TLB type or we are fried */
+ if (kvx_mmc_sb(mmc_val) != tlb_type) {
+ ret = -EINVAL;
+ goto restore_asn;
+ }
+
+ /*
+ * At this point the probe found an entry. TEL and TEH have correct
+ * values so we just need to set the entry status to invalid to clear
+ * the entry.
+ */
+ kvx_sfr_set_field(TEL, ES, TLB_ES_INVALID);
+
+ kvx_mmu_writetlb();
+
+ /* Need to read MMC SFR again */
+ mmc_val = kvx_sfr_get(MMC);
+ if (kvx_mmc_error(mmc_val))
+ panic("%s: Failed to clear entry (addr 0x%lx, asn %u)",
+ __func__, addr, asn);
+ else
+ pr_debug("%s: Entry (addr 0x%lx, asn %u) cleared\n",
+ __func__, addr, asn);
+
+restore_asn:
+ kvx_sfr_set_field(MMC, ASN, saved_asn);
+
+ return ret;
+}
+
+static void clear_jtlb_entry(unsigned long addr,
+ unsigned int global,
+ unsigned int asn)
+{
+ clear_tlb_entry(addr, global, asn, MMC_SB_JTLB);
+}
+
+/**
+ * clear_ltlb_entry() - Remove a LTLB entry
+ * @vaddr: Virtual address to be matched against LTLB entries
+ *
+ * Return: Same value as clear_tlb_entry
+ */
+int clear_ltlb_entry(unsigned long vaddr)
+{
+ return clear_tlb_entry(vaddr, TLB_G_GLOBAL, KERNEL_DUMMY_ASN,
+ MMC_SB_LTLB);
+}
+
+/* If mm is current we just need to assign the current task a new ASN. By
+ * doing this all previous TLB entries with the former ASN will be invalidated.
+ * if mm is not the current one we invalidate the context and when this other
+ * mm will be swapped in then a new context will be generated.
+ */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ int cpu = smp_processor_id();
+
+ destroy_context(mm);
+ if (current->active_mm == mm)
+ activate_context(mm, cpu);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ int cpu = smp_processor_id();
+ unsigned int current_asn;
+ struct mm_struct *mm;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ mm = vma->vm_mm;
+ current_asn = mm_asn(mm, cpu);
+
+ /* If mm has no context there is nothing to do */
+ if (current_asn != MM_CTXT_NO_ASN)
+ clear_jtlb_entry(addr, TLB_G_USE_ASN, current_asn);
+
+ local_irq_restore(flags);
+}
+
+void local_flush_tlb_all(void)
+{
+ struct kvx_tlb_format tlbe = KVX_EMPTY_TLB_ENTRY;
+ int set, way;
+ unsigned long flags;
+#ifdef CONFIG_KVX_MMU_STATS
+ struct mmu_stats *stats = &per_cpu(mmu_stats, smp_processor_id());
+
+ stats->tlb_flush_all++;
+#endif
+
+ local_irq_save(flags);
+
+ /* Select JTLB and prepare TEL (constant) */
+ kvx_sfr_set(TEL, (uint64_t) tlbe.tel_val);
+ kvx_sfr_set_field(MMC, SB, MMC_SB_JTLB);
+
+ for (set = 0; set < MMU_JTLB_SETS; set++) {
+ tlbe.teh.pn = set;
+ for (way = 0; way < MMU_JTLB_WAYS; way++) {
+ /* Set is selected automatically according to the
+ * virtual address.
+ * With 4K pages the set is the value of the 6 lower
+ * significant bits of the page number.
+ */
+ kvx_sfr_set(TEH, (uint64_t) tlbe.teh_val);
+ kvx_sfr_set_field(MMC, SW, way);
+ kvx_mmu_writetlb();
+
+ if (kvx_mmc_error(kvx_sfr_get(MMC)))
+ panic("Failed to initialize JTLB[s:%02d w:%d]",
+ set, way);
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ const unsigned int cpu = smp_processor_id();
+ unsigned long flags;
+ unsigned int current_asn;
+ unsigned long pages = (end - start) >> PAGE_SHIFT;
+
+ if (pages > ASN_FLUSH_PAGE_THRESHOLD) {
+ local_flush_tlb_mm(vma->vm_mm);
+ return;
+ }
+
+ start &= PAGE_MASK;
+
+ local_irq_save(flags);
+
+ current_asn = mm_asn(vma->vm_mm, cpu);
+ if (current_asn != MM_CTXT_NO_ASN) {
+ while (start < end) {
+ clear_jtlb_entry(start, TLB_G_USE_ASN, current_asn);
+ start += PAGE_SIZE;
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * local_flush_tlb_kernel_range() - flush kernel TLB entries
+ * @start: start kernel virtual address
+ * @end: end kernel virtual address
+ *
+ * Return: nothing
+ */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long flags;
+ unsigned long pages = (end - start) >> PAGE_SHIFT;
+
+ if (pages > FLUSH_ALL_PAGE_THRESHOLD) {
+ local_flush_tlb_all();
+ return;
+ }
+
+ start &= PAGE_MASK;
+
+ local_irq_save(flags);
+
+ while (start < end) {
+ clear_jtlb_entry(start, TLB_G_GLOBAL, KERNEL_DUMMY_ASN);
+ start += PAGE_SIZE;
+ }
+
+ local_irq_restore(flags);
+}
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ pte_t pte = __pte(pmd_val(*pmd));
+
+ /* THP PMD accessors are implemented in terms of PTE */
+ update_mmu_cache(vma, addr, &pte);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ struct mm_struct *mm;
+ unsigned long asn;
+ int cpu = smp_processor_id();
+
+ if (unlikely(ptep == NULL))
+ panic("pte should not be NULL\n");
+
+ /* Flush any previous TLB entries */
+ flush_tlb_page(vma, address);
+
+ /* No need to add the TLB entry until the process that owns the memory
+ * is running.
+ */
+ mm = current->active_mm;
+ if (vma && (mm != vma->vm_mm))
+ return;
+
+ /*
+ * Get the ASN:
+ * ASN can have no context if address belongs to kernel space. In
+ * fact as pages are global for kernel space the ASN is ignored
+ * and can be equal to any value.
+ */
+ asn = mm_asn(mm, cpu);
+
+#ifdef CONFIG_KVX_DEBUG_ASN
+ /*
+ * For addresses that belong to user space, the value of the ASN
+ * must match the mmc.asn and be non zero
+ */
+ if (address < PAGE_OFFSET) {
+ unsigned int mmc_asn = kvx_mmc_asn(kvx_sfr_get(MMC));
+
+ if (asn == MM_CTXT_NO_ASN)
+ panic("%s: ASN [%lu] is not properly set for address 0x%lx on CPU %d\n",
+ __func__, asn, address, cpu);
+
+ if ((asn & MM_CTXT_ASN_MASK) != mmc_asn)
+ panic("%s: ASN not synchronized with MMC: asn:%lu != mmc.asn:%u\n",
+ __func__, (asn & MM_CTXT_ASN_MASK), mmc_asn);
+ }
+#endif
+
+ kvx_mmu_jtlb_add_entry(address, ptep, asn);
+}
+
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ for (;;) {
+ update_mmu_cache(vma, addr, ptep);
+ if (--nr == 0)
+ break;
+ addr += PAGE_SIZE;
+ ptep++;
+ }
+}
+
+#ifdef CONFIG_SMP
+
+struct tlb_args {
+ struct vm_area_struct *ta_vma;
+ unsigned long ta_start;
+ unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+ struct tlb_args *ta = arg;
+
+ local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+void
+smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct tlb_args ta = {
+ .ta_vma = vma,
+ .ta_start = addr
+ };
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+EXPORT_SYMBOL(smp_flush_tlb_page);
+
+void
+smp_flush_tlb_mm(struct mm_struct *mm)
+{
+ on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
+ mm, 1);
+}
+EXPORT_SYMBOL(smp_flush_tlb_mm);
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+ struct tlb_args *ta = arg;
+
+ local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+void
+smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct tlb_args ta = {
+ .ta_vma = vma,
+ .ta_start = start,
+ .ta_end = end
+ };
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+EXPORT_SYMBOL(smp_flush_tlb_range);
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+ struct tlb_args *ta = arg;
+
+ local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void
+smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ struct tlb_args ta = {
+ .ta_start = start,
+ .ta_end = end
+ };
+
+ on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
+EXPORT_SYMBOL(smp_flush_tlb_kernel_range);
+
+#endif
--
2.45.2
More information about the linux-riscv
mailing list