[PATCH 04/17] KVM: arm64: Add a heap allocator for the pKVM hyp

Vincent Donnefort vdonnefort at google.com
Wed May 20 08:26:37 PDT 2026


Currently, memory used by the hypervisor comes from donations that are
embedded within HVCs. e.g. __pkvm_init_vm()'s hyp_vm.

This is cumbersome: the host needs to know the size of those
struct, the memory must be page-aligned and physically contiguous which
may be difficult to satisfy when host memory is highly fragmented.

Create a heap allocator to manage VA-contiguous memory. This allocator
grows upward, recycles unused chunks of memory and provides a simple API
to allocate and free:

  hyp_alloc(size), hyp_free(addr)

This heap allocator also manages the underlying physical memory,
allowing the host to top up the allocator's pool and reclaim memory.

  hyp_alloc_topup(), hyp_alloc_reclaim().

Pages remain mapped in the allocator's VA-space as long as they are not
reclaimed.

When the allocator runs out of memory, hyp_alloc() fails and
hyp_alloc_errno() returns -ENOMEM to signal that a top-up is required.

Additionally, harden the allocator by hashing chunk headers to detect
metadata corruption.

Signed-off-by: Vincent Donnefort <vdonnefort at google.com>

diff --git a/arch/arm64/kvm/hyp/include/nvhe/alloc.h b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
new file mode 100644
index 000000000000..8f87a63f8946
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_NVHE_ALLOC__
+#define __KVM_NVHE_ALLOC__
+#include <linux/types.h>
+
+#include <asm/kvm_host.h>
+
+void *hyp_alloc(size_t size);
+int hyp_alloc_errno(void);
+u32 hyp_alloc_topup_needed(void);
+void hyp_free(void *addr);
+
+int hyp_alloc_init(size_t size);
+int hyp_alloc_topup(struct kvm_hyp_memcache *host_mc);
+unsigned long hyp_alloc_reclaimable(void);
+void hyp_alloc_reclaim(struct kvm_hyp_memcache *host_mc, unsigned long target);
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 62cdfbff7562..66362bfa7061 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -23,7 +23,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 CFLAGS_switch.nvhe.o += -Wno-override-init
 
 hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
+	 hyp-main.o hyp-smp.o psci-relay.o alloc.o early_alloc.o page_alloc.o \
 	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
diff --git a/arch/arm64/kvm/hyp/nvhe/alloc.c b/arch/arm64/kvm/hyp/nvhe/alloc.c
new file mode 100644
index 000000000000..183336f297c3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/alloc.c
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Google LLC
+ * Author: Vincent Donnefort <vdonnefort at google.com>
+ *
+ * This heap allocator manages a reserved VA space range, dynamically mapping
+ * and unmapping physical pages on-demand to minimise the pKVM hypervisor
+ * footprint. As memory is reclaimed and relinquished to the host, unmapped
+ * holes are introduced within the VA space. To prevent orphans mapped regions,
+ * neighboring unused chunks cannot be merged if they are separated by an
+ * unmapped region.
+ *
+ */
+
+#include <nvhe/alloc.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/mm.h>
+#include <nvhe/spinlock.h>
+
+#include <linux/build_bug.h>
+#include <linux/hash.h>
+
+#define MIN_ALLOC_SIZE 8UL /* Must be a power of two */
+
+/**
+ * struct chunk_hdr - Chunk header
+ * @next:       offset from this chunk header to the next one.
+ * @prev:       offset from this chunk header to the previous one.
+ * @__unmapped: Internal field containing the offset to the unmapped page
+ *              boundary, multiplexed with the allocation state flag.
+ * @hash:       Hash computed over the chunk header.
+ */
+struct chunk_hdr {
+	u32			next;
+	u32			prev;
+#define USED_BIT_MASK		1U
+	u32			__unmapped;
+	u32			hash;
+	char			data[];
+} __aligned(MIN_ALLOC_SIZE);
+
+/**
+ * struct hyp_allocator - Heap allocator
+ * @start:		Start in the allocator's reserved virtual address range.
+ * @end:		End in the allocator's reserved virtual address range.
+ * @last_used:		Pointer to the end of the last used chunk. This is
+ *			necessary for the last chunk in the list as the
+ *			allocated size of a chunk is derived from the next one.
+ * @first_unmapped:	Pointer to the first unmapped page in the
+ *			allocator's range. This is only necessary and
+ *			updated when no chunk is in the list.
+ * @head:		Head of the chunk list.
+ * @tail:		Tail of the chunk list.
+ * @mc:			Memcache containing pre-allocated pages for mapping.
+ * @lock:		Spinlock protecting the allocator state.
+ * @errno:		Per-CPU error code for allocation failures.
+ * @topup_needed:	Per-CPU page counter needed to top-up the memcache.
+ */
+struct hyp_allocator {
+	void			*start;
+	void			*end;
+	void			*last_used;
+	void			*first_unmapped;
+	struct chunk_hdr	*head;
+	struct chunk_hdr	*tail;
+	struct kvm_hyp_memcache	mc;
+	hyp_spinlock_t		lock;
+	int __percpu		*errno;
+	u32 __percpu		*topup_needed;
+};
+
+static u32 chunk_hash_compute(const struct chunk_hdr *chunk)
+{
+	u32 hash = 0;
+
+	BUILD_BUG_ON(sizeof(*chunk) != 16);
+
+	hash ^= hash_64(*(const u64 *)chunk, 32);
+	hash ^= hash_32(chunk->__unmapped, 32);
+	return hash;
+}
+
+static void chunk_set_hash(struct chunk_hdr *chunk)
+{
+	if (chunk)
+		chunk->hash = chunk_hash_compute(chunk);
+}
+
+static void chunk_check_hash(const struct chunk_hdr *chunk)
+{
+	if (chunk)
+		WARN_ON(chunk->hash != chunk_hash_compute(chunk));
+}
+
+static bool chunk_is_used(const struct chunk_hdr *chunk)
+{
+	return !!(chunk->__unmapped & USED_BIT_MASK);
+}
+
+static void chunk_set_used(struct chunk_hdr *chunk)
+{
+	chunk->__unmapped |= USED_BIT_MASK;
+}
+
+static void chunk_set_unused(struct chunk_hdr *chunk)
+{
+	chunk->__unmapped &= ~USED_BIT_MASK;
+}
+
+static void *chunk_unmapped(const struct chunk_hdr *chunk)
+{
+	u32 offset = chunk->__unmapped & ~USED_BIT_MASK;
+
+	if (!offset)
+		return NULL;
+
+	return (void *)chunk + offset;
+}
+
+static void __chunk_set_unmapped(struct chunk_hdr *chunk, u32 unmapped)
+{
+	chunk->__unmapped = unmapped | (chunk_is_used(chunk) ? USED_BIT_MASK : 0);
+}
+
+static void chunk_set_unmapped(struct chunk_hdr *chunk, void *unmapped)
+{
+	WARN_ON(!PAGE_ALIGNED(unmapped));
+
+	if (unmapped) {
+		WARN_ON((void *)chunk > unmapped);
+		__chunk_set_unmapped(chunk, unmapped - (void *)chunk);
+	} else {
+		__chunk_set_unmapped(chunk, 0);
+	}
+}
+
+static void *chunk_data(const struct chunk_hdr *chunk)
+{
+	return (void *)&chunk->data;
+}
+
+static struct chunk_hdr *__chunk_next(const struct chunk_hdr *chunk)
+{
+	if (!chunk->next)
+		return NULL;
+
+	return (struct chunk_hdr *)((void *)chunk + chunk->next);
+}
+
+static struct chunk_hdr *__chunk_prev(const struct chunk_hdr *chunk)
+{
+	if (!chunk->prev)
+		return NULL;
+
+	return (struct chunk_hdr *)((void *)chunk - chunk->prev);
+}
+
+static void chunk_set_next(struct chunk_hdr *chunk, struct chunk_hdr *next)
+{
+	if (!chunk)
+		return;
+
+	if (next) {
+		WARN_ON(chunk > next);
+		chunk->next = (void *)next - (void *)chunk;
+	} else {
+		chunk->next = 0;
+	}
+}
+
+static void chunk_set_prev(struct chunk_hdr *chunk, struct chunk_hdr *prev)
+{
+	if (!chunk)
+		return;
+
+	if (prev) {
+		WARN_ON(chunk < prev);
+		chunk->prev = (void *)chunk - (void *)prev;
+	} else {
+		chunk->prev = 0;
+	}
+}
+
+static struct chunk_hdr *chunk_get_next(const struct chunk_hdr *chunk)
+{
+	struct chunk_hdr *next = __chunk_next(chunk);
+
+	chunk_check_hash(next);
+	return next;
+}
+
+static struct chunk_hdr *chunk_get_prev(const struct chunk_hdr *chunk)
+{
+	struct chunk_hdr *prev = __chunk_prev(chunk);
+
+	chunk_check_hash(prev);
+	return prev;
+}
+
+static struct chunk_hdr *chunk_get(struct chunk_hdr *chunk)
+{
+	chunk_check_hash(chunk);
+	return chunk;
+}
+
+#define chunk_hdr_size() \
+	offsetof(struct chunk_hdr, data)
+
+#define chunk_min_size() \
+	(chunk_hdr_size() + MIN_ALLOC_SIZE)
+
+static size_t chunk_data_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+	struct chunk_hdr *next = chunk_get_next(chunk);
+	void *end;
+
+	if (next)
+		end = (void *)next;
+	else
+		end = allocator->end;
+
+	return end - chunk_data(chunk);
+}
+
+static size_t chunk_mapped_data_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+	void *unmapped = chunk_unmapped(chunk);
+
+	if (!unmapped)
+		return chunk_data_size(chunk, allocator);
+
+	return unmapped - chunk_data(chunk);
+}
+
+static size_t chunk_used_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+	struct chunk_hdr *next = chunk_get_next(chunk);
+
+	if (!chunk_is_used(chunk))
+		return 0;
+
+	if (next)
+		return chunk_mapped_data_size(chunk, allocator);
+
+	return allocator->last_used - chunk_data(chunk);
+}
+
+static void chunk_list_insert(struct chunk_hdr *chunk, struct chunk_hdr *prev)
+{
+	struct chunk_hdr *next = NULL;
+
+	WARN_ON(!chunk);
+
+	if (prev) {
+		next = chunk_get_next(prev);
+		chunk_set_next(prev, chunk);
+		chunk_set_hash(prev);
+	}
+
+	if (next) {
+		chunk_set_prev(next, chunk);
+		chunk_set_hash(next);
+	}
+
+	chunk_set_next(chunk, next);
+	chunk_set_prev(chunk, prev);
+}
+
+static void chunk_list_del(struct chunk_hdr *chunk)
+{
+	struct chunk_hdr *prev, *next;
+
+	WARN_ON(!chunk);
+
+	prev = chunk_get_prev(chunk);
+	next = chunk_get_next(chunk);
+
+	if (prev) {
+		chunk_set_next(prev, next);
+		chunk_set_hash(prev);
+	}
+
+	if (next) {
+		chunk_set_prev(next, prev);
+		chunk_set_hash(next);
+	}
+}
+
+/*
+ * Return a fixup start address for chunk creation. It makes sure the chunk
+ * header doesn't cross any page boundary and that it leaves enough space at the
+ * start of page. This is intended to prevent orphan mapped regions during chunk
+ * memory reclaim
+ */
+static void *chunk_start(void *start)
+{
+	void *page = PTR_ALIGN(start, PAGE_SIZE);
+
+	if (page - start < chunk_hdr_size())
+		return page;
+
+	page = PTR_ALIGN_DOWN(start, PAGE_SIZE);
+	if (start - page < chunk_min_size())
+		return page + chunk_min_size();
+
+	return start;
+}
+
+static int hyp_allocator_map(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+			     struct chunk_hdr *next,
+			     void *addr, void *end)
+{
+	void *unmapped = chunk ? chunk_unmapped(chunk) : allocator->first_unmapped;
+
+	/*
+	 * hyp_allocator_can_create_chunk() already validates addr/end
+	 * belong to the chunk.
+	 */
+	WARN_ON(end <= addr);
+
+	/* The chunk does not span an unmapped region */
+	if (!unmapped)
+		return 0;
+
+	while (unmapped < end) {
+		void *page = pop_hyp_memcache(&allocator->mc, hyp_phys_to_virt);
+		int ret;
+
+		if (!page) {
+			end = PTR_ALIGN(end, PAGE_SIZE);
+			*this_cpu_ptr(allocator->topup_needed) =
+				(unsigned long)(end - unmapped) >> PAGE_SHIFT;
+			return -ENOMEM;
+		}
+
+		ret = __hyp_allocator_map(unmapped, hyp_virt_to_phys(page));
+		if (ret) {
+			push_hyp_memcache(&allocator->mc, page, hyp_virt_to_phys);
+			return ret;
+		}
+
+		unmapped += PAGE_SIZE;
+
+		/*
+		 * Reset the unmap field if we've reached the next chunk or the
+		 * allocator boundary.
+		 */
+		if (unmapped == (next ?: allocator->end))
+			unmapped = 0;
+
+		if (chunk) {
+			chunk_set_unmapped(chunk, unmapped);
+			chunk_set_hash(chunk);
+		} else {
+			allocator->first_unmapped = unmapped;
+		}
+
+		if (!unmapped)
+			break;
+	}
+
+	return 0;
+}
+
+static void hyp_allocator_unmap(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+				void *addr, void *end)
+{
+	void *unmap = addr;
+
+	/*
+	 * hyp_allocator_chunk_reclaimable() already computes valid addr/end, no
+	 * need to check them again
+	 */
+	WARN_ON(end <= addr);
+
+	while (unmap < end) {
+		phys_addr_t pa = __pkvm_private_range_pa((void *)unmap);
+		void *page = hyp_phys_to_virt(pa);
+
+		push_hyp_memcache(&allocator->mc, page, hyp_virt_to_phys);
+		unmap += PAGE_SIZE;
+	}
+
+	pkvm_remove_mappings((void *)addr, (void *)(end));
+
+	if (chunk) {
+		chunk_set_unmapped(chunk, addr);
+		chunk_set_hash(chunk);
+	} else {
+		allocator->first_unmapped = addr;
+	}
+}
+
+static bool hyp_allocator_can_create_chunk(struct hyp_allocator *allocator,
+					   const struct chunk_hdr *prev,
+					   const struct chunk_hdr *next,
+					   void *addr, void *end)
+{
+	void *page, *unmapped;
+
+	if (addr < allocator->start || end > allocator->end)
+		return false;
+
+	/* First chunk created must be installed at allocator->start */
+	if (!prev)
+		return addr == allocator->start;
+
+	/* Must not overwrite the next chunk */
+	if (next && end > (void *)next)
+		return false;
+
+	/* Must not overwrite the previous chunk */
+	if (addr < (chunk_data(prev) + chunk_used_size(prev, allocator)))
+		return false;
+
+	/* Header must not cross page boundaries */
+	page = PTR_ALIGN(addr, PAGE_SIZE);
+	if (page != addr && (page - addr) < chunk_hdr_size())
+		return false;
+
+	/* Must leave a minimum distance from a page-start to maximise reclaim */
+	page = PTR_ALIGN_DOWN(addr, PAGE_SIZE);
+	if (page != addr && (addr - page) < chunk_min_size())
+		return false;
+
+	unmapped = chunk_unmapped(prev);
+	if (!unmapped)
+		return true;
+
+	/* Must never create an orphan mapped region */
+	if (addr > unmapped)
+		return false;
+
+	return true;
+}
+
+/*
+ * Tries to create a new chunk in the allocator whose header starts at @addr and
+ * whose data finishes at @end.
+ */
+static struct chunk_hdr *hyp_allocator_create_chunk(struct hyp_allocator *allocator,
+						    struct chunk_hdr *prev, void *addr,
+						    void *end, bool used)
+{
+	struct chunk_hdr *next, *chunk = addr;
+	void *unmapped;
+	int ret;
+
+	if (end > allocator->end)
+		return ERR_PTR(-E2BIG);
+
+	next = prev ? chunk_get_next(prev) : NULL;
+	if (!hyp_allocator_can_create_chunk(allocator, prev, next, addr, end))
+		return ERR_PTR(-EINVAL);
+
+	ret = hyp_allocator_map(allocator, prev, next, addr, end);
+	if (ret)
+		return ERR_PTR(ret);
+
+	memset(chunk, 0, sizeof(*chunk));
+	if (used)
+		chunk_set_used(chunk);
+	else
+		chunk_set_unused(chunk);
+
+	/* First chunk, first allocation */
+	if (!prev) {
+		chunk_set_unmapped(chunk, allocator->first_unmapped);
+		chunk_list_insert(chunk, NULL);
+		chunk_set_hash(chunk);
+
+		allocator->last_used = end;
+		allocator->head = allocator->tail = chunk;
+		return chunk;
+	}
+
+	/* Last chunk in the list */
+	if (!next) {
+		allocator->last_used = end;
+		allocator->tail = chunk;
+	}
+
+	/* Inherit prev's unmapped region */
+	unmapped = chunk_unmapped(prev);
+	chunk_set_unmapped(chunk, unmapped);
+	chunk_list_insert(chunk, prev);
+	chunk_set_hash(chunk);
+
+	chunk_set_unmapped(prev, 0);
+	chunk_set_hash(prev);
+
+	return chunk;
+}
+
+static bool hyp_allocator_can_destroy_chunk(struct hyp_allocator *allocator,
+					    const struct chunk_hdr *prev,
+					    const struct chunk_hdr *next,
+					    const struct chunk_hdr *chunk)
+{
+	if (chunk_is_used(chunk))
+		return false;
+
+	/* Last chunk in the allocator */
+	if (!prev)
+		return true;
+
+	/* Can't merge down unless we are the last one in the list */
+	if (next && chunk_is_used(prev))
+		return false;
+
+	/* Must never create an orphan mapped region */
+	if (chunk_unmapped(prev))
+		return false;
+
+	return true;
+}
+
+static int hyp_allocator_destroy_chunk(struct hyp_allocator *allocator,
+				       struct chunk_hdr *prev,
+				       struct chunk_hdr *chunk)
+{
+	struct chunk_hdr *next;
+
+	next = prev ? chunk_get_next(chunk) : NULL;
+	if (!hyp_allocator_can_destroy_chunk(allocator, prev, next, chunk))
+		return -EINVAL;
+
+	/* Last chunk in the allocator */
+	if (!prev) {
+		allocator->first_unmapped = chunk_unmapped(chunk);
+		allocator->head = allocator->tail = NULL;
+		return 0;
+	}
+
+	/* Last chunk in the list */
+	if (!next) {
+		allocator->last_used = chunk;
+		allocator->tail = prev;
+	}
+
+	chunk_set_unmapped(prev, chunk_unmapped(chunk));
+	chunk_set_hash(prev);
+	chunk_list_del(chunk);
+
+	return 0;
+}
+
+/*
+ * Return the best unused chunk for recycling, that is the smallest chunk
+ * fitting the allocation which needs to use the least unmapped region.
+ */
+static struct chunk_hdr *hyp_allocator_find_efficient_chunk(struct hyp_allocator *allocator,
+							    size_t size)
+{
+	struct chunk_hdr *chunk, *best_chunk = NULL;
+	size_t best_data_size = SIZE_MAX;
+	size_t best_missing = SIZE_MAX;
+
+	chunk = allocator->head;
+	while (chunk) {
+		size_t missing, mapped, data_size;
+
+		if (chunk_is_used(chunk))
+			goto next;
+
+		data_size = chunk_data_size(chunk, allocator);
+		if (data_size < size)
+			goto next;
+
+		mapped = chunk_mapped_data_size(chunk, allocator);
+		missing = (size > mapped) ? DIV_ROUND_UP(size - mapped, PAGE_SIZE) : 0;
+		if (missing > best_missing)
+			goto next;
+
+		if (data_size >= best_data_size)
+			goto next;
+
+		best_missing = missing;
+		best_data_size = data_size;
+		best_chunk = chunk;
+
+next:
+		chunk = chunk_get_next(chunk);
+	}
+
+	return best_chunk;
+}
+
+static struct chunk_hdr *hyp_allocator_reuse_chunk(struct hyp_allocator *allocator,
+						   struct chunk_hdr *chunk, size_t size)
+{
+	struct chunk_hdr *next = chunk_get_next(chunk);
+	void *start, *end, *split, *split_end;
+	int ret;
+
+	start = chunk_data(chunk);
+	end = start + size;
+
+	/* Last chunk in the list, no need to split */
+	if (!next) {
+		split = split_end = NULL;
+		allocator->last_used = chunk_data(chunk) + size;
+	} else {
+		split = chunk_start(end);
+		split_end = split + chunk_min_size();
+
+		if (!hyp_allocator_can_create_chunk(allocator, chunk, next, split, split_end))
+			split = split_end = NULL;
+	}
+
+	/* Batch the mapping of the reused chunk and the split */
+	ret = hyp_allocator_map(allocator, chunk, next, chunk_data(chunk), split ? split_end : end);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (split)
+		WARN_ON(IS_ERR_OR_NULL(
+			hyp_allocator_create_chunk(allocator, chunk, split, split_end, false)));
+
+	chunk_set_used(chunk);
+	chunk_set_hash(chunk);
+
+	return chunk;
+}
+
+static void *hyp_allocator_alloc(struct hyp_allocator *allocator, size_t size)
+{
+	struct chunk_hdr *chunk = ERR_PTR(-E2BIG);
+	void *start, *end;
+
+	size = max(size, MIN_ALLOC_SIZE);
+
+	/* Ensure we do not overflow ALIGN(MIN_ALLOC_SIZE) */
+	if (size > U32_MAX)
+		goto errno;
+
+	size = ALIGN(size, MIN_ALLOC_SIZE);
+	if (size > (allocator->end - allocator->start - chunk_hdr_size()))
+		goto errno;
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+	/* The allocator can modify the hyp stage-1 */
+	if (WARN_ON(hyp_spin_is_locked(&pkvm_pgd_lock))) {
+		chunk = ERR_PTR(-EINVAL);
+		goto errno;
+	}
+#endif
+	hyp_spin_lock(&allocator->lock);
+
+	/* First allocation */
+	if (!allocator->head) {
+		start = allocator->start;
+		end = start + chunk_hdr_size() + size;
+		chunk = hyp_allocator_create_chunk(allocator, NULL, start, end, true);
+		goto unlock;
+	}
+
+	chunk = hyp_allocator_find_efficient_chunk(allocator, size);
+
+	/* Nothing found, create a new chunk at the end in the list */
+	if (!chunk) {
+		start = chunk_start(chunk_data(allocator->tail) +
+				    chunk_used_size(allocator->tail, allocator));
+		end = start + chunk_hdr_size() + size;
+		chunk = hyp_allocator_create_chunk(allocator, allocator->tail, start, end, true);
+		goto unlock;
+	}
+
+	chunk = hyp_allocator_reuse_chunk(allocator, chunk, size);
+
+unlock:
+	hyp_spin_unlock(&allocator->lock);
+
+errno:
+	if (IS_ERR_OR_NULL(chunk)) {
+		int errno = IS_ERR(chunk) ? PTR_ERR(chunk) : -EINVAL;
+
+		*this_cpu_ptr(allocator->errno) = errno;
+		return NULL;
+	}
+
+	memset(chunk_data(chunk), 0, size);
+	return chunk_data(chunk);
+}
+
+static void hyp_allocator_free(struct hyp_allocator *allocator, void *data)
+{
+	struct chunk_hdr *chunk, *next, *prev;
+
+	if (!data)
+		return;
+
+	WARN_ON(!IS_ALIGNED((unsigned long)data, MIN_ALLOC_SIZE));
+	WARN_ON(data >= allocator->end || data < allocator->start + chunk_hdr_size());
+
+	hyp_spin_lock(&allocator->lock);
+
+	chunk = chunk_get(container_of(data, struct chunk_hdr, data));
+	WARN_ON(!chunk_is_used(chunk));
+	chunk_set_unused(chunk);
+	chunk_set_hash(chunk);
+
+	next = chunk_get_next(chunk);
+	if (next)
+		hyp_allocator_destroy_chunk(allocator, chunk, next);
+
+	prev = chunk_get_prev(chunk);
+	if (prev)
+		hyp_allocator_destroy_chunk(allocator, prev, chunk);
+
+	hyp_spin_unlock(&allocator->lock);
+}
+
+static unsigned long hyp_allocator_chunk_reclaimable(struct hyp_allocator *allocator,
+						     const struct chunk_hdr *chunk,
+						     u64 *__addr, u64 *__end)
+{
+	struct chunk_hdr *next;
+	void *addr, *end;
+
+	/* Last chunk in the allocator */
+	if (chunk == allocator->head && chunk == allocator->tail && !chunk_is_used(chunk)) {
+		addr = (void *)chunk;
+		end = chunk_unmapped(chunk);
+		if (!end)
+			end = allocator->end;
+		goto end;
+	}
+
+	next = chunk_get_next(chunk);
+
+	/* Last chunk in the list we can reclaim, even if used */
+	if (!next) {
+		addr = chunk_data(chunk) + chunk_used_size(chunk, allocator);
+		addr = PTR_ALIGN(addr, PAGE_SIZE);
+		end = chunk_unmapped(chunk);
+		if (!end)
+			end = allocator->end;
+		goto end;
+	}
+
+	if (chunk_is_used(chunk))
+		return 0;
+
+	addr = PTR_ALIGN(chunk_data(chunk), PAGE_SIZE);
+	end = chunk_unmapped(chunk);
+	if (!end)
+		end = PTR_ALIGN_DOWN(next, PAGE_SIZE);
+
+end:
+	if (addr >= end)
+		return 0;
+
+	if (__end)
+		*__end = (u64)end;
+	if (__addr)
+		*__addr = (u64)addr;
+
+	return (end - addr) >> PAGE_SHIFT;
+}
+
+static void hyp_allocator_reclaim_chunk(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+					void *addr, void *end)
+{
+	struct chunk_hdr *next;
+
+	WARN_ON(end <= addr);
+
+	/* We are about to destroy the last chunk in the allocator */
+	if (addr == allocator->start) {
+		allocator->tail = allocator->head = chunk = NULL;
+		goto unmap;
+	}
+
+	next = chunk_get_next(chunk);
+
+	/*
+	 * Split the reclaimed chunk at the next page boundary,
+	 * this ensures no orphan mapped region is created. Splitting at the page boundary is always
+	 * possible because chunks always leave a minimum distance to the page start.
+	 *
+	 *  +--------------+
+	 *  |______________|
+	 *  |______________|<- Next chunk
+	 *  |_ _ _ __ _ _ _|
+	 *  |              |<- Page-aligned split
+	 *  +--------------+
+	 *  +--------------+
+	 *  |              |
+	 *  |              |<- Page reclaimed
+	 *  |              |
+	 *  |              |
+	 *  +--------------+
+	 *  +--------------+
+	 *  |              |
+	 *  |______________|
+	 *  |______________|<- Chunk to split
+	 *  |              |
+	 *  +--------------+
+	 */
+	if (next && !chunk_unmapped(chunk) && next != end)
+		WARN_ON(IS_ERR_OR_NULL(hyp_allocator_create_chunk(allocator, chunk, end, next,
+								  false)));
+unmap:
+	hyp_allocator_unmap(allocator, chunk, addr, end);
+}
+
+/*
+ * Return the best reclaimable chunk which is the highest chunk in the list
+ * with the biggest reclaimable region.
+ */
+static struct chunk_hdr *hyp_allocator_find_reclaimable_chunk(struct hyp_allocator *allocator,
+							      u64 *addr, u64 *end)
+{
+	struct chunk_hdr *chunk, *best_chunk = NULL;
+	unsigned long best_reclaimable = 0;
+
+	chunk = allocator->head;
+	while (chunk) {
+		u64 __addr, __end;
+		unsigned long reclaimable = hyp_allocator_chunk_reclaimable(allocator, chunk,
+									    &__addr, &__end);
+
+		/* Favour the top biggest chunks */
+		if (reclaimable && reclaimable >= best_reclaimable) {
+			best_reclaimable = reclaimable;
+			best_chunk = chunk;
+			*addr = __addr;
+			*end = __end;
+		}
+
+		chunk = chunk_get_next(chunk);
+	}
+
+	return best_chunk;
+}
+
+static unsigned long hyp_allocator_drain_memcache(struct hyp_allocator *allocator,
+						  struct kvm_hyp_memcache *host_mc,
+						  unsigned long target)
+{
+	struct kvm_hyp_memcache *mc = &allocator->mc;
+	unsigned long drained = 0;
+
+	while (target && mc->nr_pages) {
+		void *page = pop_hyp_memcache(mc, hyp_phys_to_virt);
+
+		memset(page, 0, PAGE_SIZE);
+		kvm_flush_dcache_to_poc(page, PAGE_SIZE);
+		push_hyp_memcache(host_mc, page, hyp_virt_to_phys);
+		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(page), 1));
+
+		target--;
+		drained++;
+	}
+
+	return drained;
+}
+
+static void hyp_allocator_reclaim(struct hyp_allocator *allocator, struct kvm_hyp_memcache *host_mc,
+				  unsigned long target)
+{
+	if (!target)
+		return;
+
+	hyp_spin_lock(&allocator->lock);
+
+	target -= hyp_allocator_drain_memcache(allocator, host_mc, target);
+	if (!target)
+		goto unlock;
+
+	do {
+		unsigned long reclaimable;
+		struct chunk_hdr *chunk;
+		u64 addr, end;
+
+		chunk = hyp_allocator_find_reclaimable_chunk(allocator, &addr, &end);
+		if (!chunk)
+			break;
+
+		reclaimable = min((end - addr) >> PAGE_SHIFT, target);
+		addr = end - (reclaimable << PAGE_SHIFT);
+		hyp_allocator_reclaim_chunk(allocator, chunk, (void *)addr, (void *)end);
+
+		target -= reclaimable;
+	} while (target);
+
+	hyp_allocator_drain_memcache(allocator, host_mc, ULONG_MAX);
+
+unlock:
+	hyp_spin_unlock(&allocator->lock);
+}
+
+static unsigned long hyp_allocator_reclaimable(struct hyp_allocator *allocator)
+{
+	unsigned long reclaimable = 0;
+	struct chunk_hdr *chunk;
+
+	hyp_spin_lock(&allocator->lock);
+
+	chunk = allocator->head;
+	while (chunk) {
+		reclaimable += hyp_allocator_chunk_reclaimable(allocator, chunk, NULL, NULL);
+		chunk = chunk_get_next(chunk);
+	}
+
+	hyp_spin_unlock(&allocator->lock);
+
+	return reclaimable;
+}
+
+static int hyp_allocator_topup(struct hyp_allocator *allocator,
+			       struct kvm_hyp_memcache *host_mc)
+{
+	struct kvm_hyp_memcache *alloc_mc = &allocator->mc;
+	int ret;
+
+	hyp_spin_lock(&allocator->lock);
+	ret = refill_memcache(alloc_mc, host_mc->nr_pages + alloc_mc->nr_pages, host_mc);
+	hyp_spin_unlock(&allocator->lock);
+
+	return ret;
+}
+
+static u32 hyp_allocator_topup_needed(struct hyp_allocator *allocator)
+{
+	u32 *topup_needed = this_cpu_ptr(allocator->topup_needed);
+	u32 ret = *topup_needed;
+
+	*topup_needed = 0;
+
+	return ret;
+}
+
+static int hyp_allocator_errno(struct hyp_allocator *allocator)
+{
+	int *errno = this_cpu_ptr(allocator->errno);
+	int ret = *errno;
+
+	*errno = 0;
+
+	return ret;
+}
+
+
+static int hyp_allocator_init(struct hyp_allocator *allocator, size_t size)
+{
+	unsigned long start;
+	int ret;
+
+	size = PAGE_ALIGN(size);
+
+	/* constrained by chunk_hdr u32 types */
+	if (size > U32_MAX || !size)
+		return -EINVAL;
+
+	ret = pkvm_alloc_private_va_range(size, &start);
+	if (ret)
+		return ret;
+
+	allocator->first_unmapped = allocator->start = (void *)start;
+	allocator->end = allocator->start + size;
+	hyp_spin_lock_init(&allocator->lock);
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(int, __hyp_allocator_errno);
+static DEFINE_PER_CPU(u32, __hyp_allocator_topup_needed);
+
+static struct hyp_allocator hyp_allocator = {
+	.errno = &__hyp_allocator_errno,
+	.topup_needed = &__hyp_allocator_topup_needed,
+};
+
+/**
+ * hyp_alloc() - Allocate memory from the heap allocator
+ *
+ * @size:	Allocation size in bytes.
+ *
+ * Return: A pointer to the allocated memory on success, else NULL.
+ */
+void *hyp_alloc(size_t size)
+{
+	return hyp_allocator_alloc(&hyp_allocator, size);
+}
+
+/**
+ * hyp_free() - Free memory allocated with hyp_alloc()
+ *
+ * @data:	Address returned by the original hyp_alloc().
+ *
+ * The use of any other address than one returned by hyp_alloc() will cause a
+ * hypervisor panic.
+ */
+void hyp_free(void *data)
+{
+	hyp_allocator_free(&hyp_allocator, data);
+}
+
+/**
+ * hyp_alloc_errno() - Read the errno on allocation error
+ *
+ * Get the return code from an allocation failure.
+ *
+ * Return: -ENOMEM if the allocator needs a refill from the host, -E2BIG if
+ * there is no VA space left else 0.
+ */
+int hyp_alloc_errno(void)
+{
+	return hyp_allocator_errno(&hyp_allocator);
+}
+
+int hyp_alloc_init(size_t size)
+{
+	return hyp_allocator_init(&hyp_allocator, size);
+}
+
+void hyp_alloc_reclaim(struct kvm_hyp_memcache *mc, unsigned long target)
+{
+	hyp_allocator_reclaim(&hyp_allocator, mc, target);
+}
+
+unsigned long hyp_alloc_reclaimable(void)
+{
+	return hyp_allocator_reclaimable(&hyp_allocator);
+}
+
+int hyp_alloc_topup(struct kvm_hyp_memcache *host_mc)
+{
+	return hyp_allocator_topup(&hyp_allocator, host_mc);
+}
+
+u32 hyp_alloc_topup_needed(void)
+{
+	return hyp_allocator_topup_needed(&hyp_allocator);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index d461981616d9..95ce7496e67f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -10,6 +10,7 @@
 #include <asm/kvm_pgtable.h>
 #include <asm/kvm_pkvm.h>
 
+#include <nvhe/alloc.h>
 #include <nvhe/early_alloc.h>
 #include <nvhe/ffa.h>
 #include <nvhe/gfp.h>
@@ -363,6 +364,10 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_bas
 	if (ret)
 		return ret;
 
+	ret = hyp_alloc_init(SZ_128M);
+	if (ret)
+		return ret;
+
 	update_nvhe_init_params();
 
 	/* Jump in the idmap page to switch to the new page-tables */
-- 
2.54.0.631.ge1b05301d1-goog




More information about the linux-arm-kernel mailing list