[PATCH 4/4] WIP: break before make and don't invalidate uncached regions being remapped
Ahmad Fatoum
a.fatoum at pengutronix.de
Wed May 21 10:41:04 PDT 2025
This needs to be split up and cleaned up, but I include it anyway
to show what needs to be done still, here for arm32 and for arm64 as
well.
Signed-off-by: Ahmad Fatoum <a.fatoum at pengutronix.de>
---
arch/arm/cpu/mmu_32.c | 253 +++++++++++++++++++++++++++++++++---------
1 file changed, 201 insertions(+), 52 deletions(-)
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 9f50194c7c2b..d4ed298ac64f 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -70,6 +70,45 @@ static void set_pte(uint32_t *pt, uint32_t val)
WRITE_ONCE(*pt, val);
}
+static void set_pte_range(uint32_t *virt, phys_addr_t phys,
+ size_t count, uint32_t flags,
+ bool break_before_make)
+{
+ bool made = false;
+
+ if (!break_before_make)
+ goto write_attrs;
+
+ if ((flags & PTE_TYPE_MASK) == PTE_TYPE_FAULT)
+ phys = 0;
+
+ for (int i = 0; i < count; i++) {
+ if (READ_ONCE(virt[i]) == ((phys + i * PAGE_SIZE) | flags))
+ continue;
+ set_pte(&virt[i], PTE_TYPE_FAULT);
+ made = true;
+ }
+
+ if (made) {
+ dma_flush_range( virt, count * sizeof(u32));
+ tlb_invalidate();
+ } else {
+ break_before_make = false;
+ }
+
+write_attrs:
+ for (int i = 0; i < count; i++, phys += PAGE_SIZE)
+ set_pte(&virt[i], phys | flags);
+
+ dma_flush_range(virt, count * sizeof(u32));
+
+#if 0
+ pr_notice("%s(0x%08x+0x%zx -> 0x%08x, flags=0x%x%s)\n", __func__,
+ (unsigned)virt, count, phys, flags,
+ made ? " [BBM]" : break_before_make ? " [BBM, but unneeded]" : "");
+#endif
+}
+
#ifdef __PBL__
static uint32_t *alloc_pte(void)
{
@@ -89,30 +128,47 @@ static uint32_t *alloc_pte(void)
}
#endif
-static u32 *find_pte(unsigned long adr)
+static u32 *__find_pte(uint32_t *ttb, unsigned long adr, int *level)
{
+ u32 *pgd = (u32 *)&ttb[pgd_index(adr)];
u32 *table;
- uint32_t *ttb = get_ttb();
- if (!pgd_type_table(ttb[pgd_index(adr)]))
- return NULL;
+ if (!pgd_type_table(*pgd)) {
+ *level = 1;
+ return pgd;
+ }
+
+ *level = 2;
/* find the coarse page table base address */
- table = (u32 *)(ttb[pgd_index(adr)] & ~0x3ff);
+ table = (u32 *)(*pgd & ~0x3ff);
/* find second level descriptor */
return &table[(adr >> PAGE_SHIFT) & 0xff];
}
+static u32 *find_pte(unsigned long adr)
+{
+ int level;
+ u32 *pte = __find_pte(get_ttb(), adr, &level);
+
+ return level == 2 ? pte : NULL;
+}
+
+static void dma_flush_range_end(unsigned long start, unsigned long end)
+{
+ __dma_flush_range(start, end);
+
+ if (outer_cache.flush_range)
+ outer_cache.flush_range(start, end);
+}
+
void dma_flush_range(void *ptr, size_t size)
{
unsigned long start = (unsigned long)ptr;
unsigned long end = start + size;
- __dma_flush_range(start, end);
-
- if (outer_cache.flush_range)
- outer_cache.flush_range(start, end);
+ dma_flush_range_end(start, end);
}
void dma_inv_range(void *ptr, size_t size)
@@ -132,11 +188,11 @@ void dma_inv_range(void *ptr, size_t size)
* Not yet exported, but may be later if someone finds use for it.
*/
static u32 *arm_create_pte(unsigned long virt, unsigned long phys,
- uint32_t flags)
+ uint32_t flags, bool break_before_make)
{
uint32_t *ttb = get_ttb();
u32 *table;
- int i, ttb_idx;
+ int ttb_idx;
virt = ALIGN_DOWN(virt, PGDIR_SIZE);
phys = ALIGN_DOWN(phys, PGDIR_SIZE);
@@ -145,16 +201,11 @@ static u32 *arm_create_pte(unsigned long virt, unsigned long phys,
ttb_idx = pgd_index(virt);
- for (i = 0; i < PTRS_PER_PTE; i++) {
- set_pte(&table[i], phys | PTE_TYPE_SMALL | flags);
- virt += PAGE_SIZE;
- phys += PAGE_SIZE;
- }
- dma_flush_range(table, PTRS_PER_PTE * sizeof(u32));
+ set_pte_range(table, phys, PTRS_PER_PTE, PTE_TYPE_SMALL | flags,
+ break_before_make);
- // TODO break-before-make missing
- set_pte(&ttb[ttb_idx], (unsigned long)table | PMD_TYPE_TABLE);
- dma_flush_range(&ttb[ttb_idx], sizeof(u32));
+ set_pte_range(&ttb[ttb_idx], (unsigned long)table, 1,
+ PMD_TYPE_TABLE, break_before_make);
return table;
}
@@ -243,6 +294,22 @@ static uint32_t get_pte_flags(int map_type)
}
}
+static const char *map_type_tostr(int map_type)
+{
+ switch (map_type) {
+ case MAP_CACHED:
+ return "CACHED";
+ case MAP_UNCACHED:
+ return "UNCACHED";
+ case ARCH_MAP_WRITECOMBINE:
+ return "WRITECOMBINE";
+ case MAP_FAULT:
+ return "FAULT";
+ default:
+ return "<unknown>";
+ }
+}
+
static uint32_t get_pmd_flags(int map_type)
{
return pte_flags_to_pmd(get_pte_flags(map_type));
@@ -250,6 +317,7 @@ static uint32_t get_pmd_flags(int map_type)
static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t size, unsigned map_type)
{
+ bool mmu_on;
u32 virt_addr = (u32)_virt_addr;
u32 pte_flags, pmd_flags;
uint32_t *ttb = get_ttb();
@@ -262,6 +330,13 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
size = PAGE_ALIGN(size);
+ mmu_on = get_cr() & CR_M;
+
+ pr_info("[MMU %s]remapping 0x%08x+0x%zx: phys 0x%08lx, type %s\n",
+ get_cr() & CR_M ? " ON" : "OFF",
+ virt_addr, size, (ulong)phys_addr,
+ map_type_tostr(map_type));
+
while (size) {
const bool pgdir_size_aligned = IS_ALIGNED(virt_addr, PGDIR_SIZE);
u32 *pgd = (u32 *)&ttb[pgd_index(virt_addr)];
@@ -270,22 +345,20 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
if (size >= PGDIR_SIZE && pgdir_size_aligned &&
IS_ALIGNED(phys_addr, PGDIR_SIZE) &&
!pgd_type_table(*pgd)) {
- u32 val;
+ u32 flags;
/*
* TODO: Add code to discard a page table and
* replace it with a section
*/
chunk = PGDIR_SIZE;
- val = phys_addr | pmd_flags;
+ flags = pmd_flags;
if (map_type != MAP_FAULT)
- val |= PMD_TYPE_SECT;
- // TODO break-before-make missing
- set_pte(pgd, val);
- dma_flush_range(pgd, sizeof(*pgd));
+ flags |= PMD_TYPE_SECT;
+ set_pte_range(pgd, phys_addr, 1, flags, mmu_on);
} else {
unsigned int num_ptes;
u32 *table = NULL;
- unsigned int i;
+ u32 flags;
u32 *pte;
/*
* We only want to cover pages up until next
@@ -313,24 +386,16 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
* create a new page table for it
*/
table = arm_create_pte(virt_addr, phys_addr,
- pmd_flags_to_pte(*pgd));
+ pmd_flags_to_pte(*pgd), mmu_on);
pte = find_pte(virt_addr);
BUG_ON(!pte);
}
- for (i = 0; i < num_ptes; i++) {
- u32 val;
+ flags = pte_flags;
+ if (map_type != MAP_FAULT)
+ flags |= PTE_TYPE_SMALL;
- val = phys_addr + i * PAGE_SIZE;
- val |= pte_flags;
- if (map_type != MAP_FAULT)
- val |= PTE_TYPE_SMALL;
-
- // TODO break-before-make missing
- set_pte(&pte[i], val);
- }
-
- dma_flush_range(pte, num_ptes * sizeof(u32));
+ set_pte_range(pte, phys_addr, num_ptes, flags, mmu_on);
}
virt_addr += chunk;
@@ -345,12 +410,99 @@ static void early_remap_range(u32 addr, size_t size, unsigned map_type)
__arch_remap_range((void *)addr, addr, size, map_type);
}
+static size_t granule_size(int level)
+{
+ switch (level) {
+ default:
+ case 1:
+ return PGDIR_SIZE;
+ case 2:
+ return PAGE_SIZE;
+ }
+}
+
+static bool pte_is_cacheable(uint32_t pte, int level)
+{
+ return (level == 2 && (pte & PTE_CACHEABLE)) ||
+ (level == 1 && (pte & PMD_SECT_CACHEABLE));
+}
+
+/**
+ * flush_cacheable_pages - Flush only the cacheable pages in a region
+ * @start: Starting virtual address of the range.
+ * @end: Ending virtual address of the range.
+ *
+ * This function walks the page table and flushes the data caches for the
+ * specified range only if the memory is marked as normal cacheable in the
+ * page tables. If a non-cacheable or non-normal page is encountered,
+ * it's skipped.
+ */
+static void flush_cacheable_pages(void *start, size_t size)
+{
+ u32 flush_start = ~0UL, flush_end = ~0UL;
+ u32 region_start, region_end;
+ size_t block_size;
+ u32 *ttb;
+
+ region_start = PAGE_ALIGN_DOWN((ulong)start);
+ region_end = PAGE_ALIGN(region_start + size);
+
+ ttb = get_ttb();
+
+ /*
+ * TODO: This loop could be made more optimal by inlining the page walk,
+ * so we need not restart address translation from the top every time.
+ *
+ * The hope is that with the page tables being cached and the
+ * windows being remapped being small, the overhead compared to
+ * actually flushing the ranges isn't too significant.
+ */
+ for (u32 addr = region_start; addr < region_end; addr += block_size) {
+ int level;
+ u32 *pte = __find_pte(ttb, addr, &level);
+
+ block_size = granule_size(level);
+
+ if (!pte || !pte_is_cacheable(*pte, level))
+ continue;
+
+ if (flush_end == addr) {
+ /*
+ * While it's safe to flush the whole block_size,
+ * it's unnecessary time waste to go beyond region_end.
+ */
+ flush_end = min(flush_end + block_size, region_end);
+ continue;
+ }
+
+ /*
+ * We don't have a previous contiguous flush area to append to.
+ * If we recorded any area before, let's flush it now
+ */
+ if (flush_start != ~0U) {
+ pr_notice("flushing %x-%x\n", flush_start, flush_end);
+ dma_flush_range_end(flush_start, flush_end);
+ }
+
+ /* and start the new contiguous flush area with this page */
+ flush_start = addr;
+ flush_end = min(flush_start + block_size, region_end);
+ }
+
+ /* The previous loop won't flush the last cached range, so do it here */
+ if (flush_start != ~0UL) {
+ pr_notice("flushing %x-%x\n", flush_start, flush_end);
+ dma_flush_range_end(flush_start, flush_end);
+ }
+}
+
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned map_type)
{
- __arch_remap_range(virt_addr, phys_addr, size, map_type);
- if (map_type == MAP_UNCACHED)
- dma_inv_range(virt_addr, size);
+ if (map_type != MAP_CACHED)
+ flush_cacheable_pages(virt_addr, size);
+
+ __arch_remap_range(virt_addr, phys_addr, size, map_type);
return 0;
}
@@ -377,13 +529,11 @@ static inline void create_flat_mapping(void)
void *map_io_sections(unsigned long phys, void *_start, size_t size)
{
- unsigned long start = (unsigned long)_start, sec;
+ unsigned long start = (unsigned long)_start;
uint32_t *ttb = get_ttb();
- for (sec = start; sec < start + size; sec += PGDIR_SIZE, phys += PGDIR_SIZE) {
- // TODO break-before-make missing
- set_pte(&ttb[pgd_index(sec)], phys | get_pmd_flags(MAP_UNCACHED));
- }
+ set_pte_range(&ttb[pgd_index(start)], phys, size / PGDIR_SIZE,
+ get_pmd_flags(MAP_UNCACHED), true);
dma_flush_range(ttb, 0x4000);
tlb_invalidate();
@@ -424,11 +574,10 @@ static void create_vector_table(unsigned long adr)
vectors = xmemalign(PAGE_SIZE, PAGE_SIZE);
pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n",
vectors, adr);
- arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED));
+ arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED), true);
pte = find_pte(adr);
- // TODO break-before-make missing
- set_pte(pte, (u32)vectors | PTE_TYPE_SMALL |
- get_pte_flags(MAP_CACHED));
+ set_pte_range(pte, (u32)vectors, 1, PTE_TYPE_SMALL |
+ get_pte_flags(MAP_CACHED), true);
}
arm_fixup_vectors();
--
2.39.5
More information about the barebox
mailing list