[PATCH v3 2/3] RISC-V: KVM: Allow splitting huge pages to arbitrary level
Anup Patel
anup at brainfault.org
Sun Mar 29 23:25:56 PDT 2026
On Mon, Mar 16, 2026 at 11:49 AM <wang.yechao255 at zte.com.cn> wrote:
>
> From: Wang Yechao <wang.yechao255 at zte.com.cn>
>
> This patch introduces the function kvm_riscv_gstage_split_huge().
> It splits the huge page covering a given guest physical address down
> to a specified target level (e.g., from 1G to 2M or 4K). The caller
> provides a memory cache for allocating any intermediate page tables
> and may request a TLB flush after the split.
>
> This functionality will be used by subsequent patches to split huge
> pages before handling the write-protection fault, or for other operations
> that require page-level granularity.
>
> Signed-off-by: Wang Yechao <wang.yechao255 at zte.com.cn>
> ---
> arch/riscv/include/asm/kvm_gstage.h | 4 ++
> arch/riscv/kvm/gstage.c | 69 +++++++++++++++++++++++++++++
> 2 files changed, 73 insertions(+)
>
> diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
> index 595e2183173e..373748c6745e 100644
> --- a/arch/riscv/include/asm/kvm_gstage.h
> +++ b/arch/riscv/include/asm/kvm_gstage.h
> @@ -53,6 +53,10 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
> bool page_rdonly, bool page_exec,
> struct kvm_gstage_mapping *out_map);
>
> +int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
> + struct kvm_mmu_memory_cache *pcache,
> + gpa_t addr, u32 target_level, bool flush);
> +
> enum kvm_riscv_gstage_op {
> GSTAGE_OP_NOP = 0, /* Nothing */
> GSTAGE_OP_CLEAR, /* Clear/Unmap */
> diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
> index d2001d508046..5356abb18932 100644
> --- a/arch/riscv/kvm/gstage.c
> +++ b/arch/riscv/kvm/gstage.c
> @@ -209,6 +209,75 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
> return kvm_riscv_gstage_set_pte(gstage, pcache, out_map);
> }
>
> +static inline unsigned long make_child_pte(unsigned long huge_pte, int index,
> + unsigned long child_page_size)
> +{
> + unsigned long child_pte = huge_pte;
> + unsigned long child_pfn_offset;
> +
> + /*
> + * The child_pte already has the base address of the huge page being
> + * split. So we just have to OR in the offset to the page at the next
> + * lower level for the given index.
> + */
> + child_pfn_offset = index * (child_page_size / PAGE_SIZE);
> + child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0)));
> +
> + return child_pte;
> +}
> +
> +int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
> + struct kvm_mmu_memory_cache *pcache,
> + gpa_t addr, u32 target_level, bool flush)
> +{
> + u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
> + pte_t *next_ptep = (pte_t *)gstage->pgd;
> + pte_t *ptep;
> + unsigned long huge_pte, child_pte;
> + unsigned long child_page_size;
> + int i, ret;
Declare local variables in inverted pyramid fashion.
> +
> + while(current_level > target_level) {
> + ptep = (pte_t *)&next_ptep[gstage_pte_index(addr, current_level)];
> +
> + if (!pte_val(ptep_get(ptep)))
> + break;
> +
> + if (!gstage_pte_leaf(ptep)) {
> + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> + current_level--;
> + continue;
> + }
> +
> + huge_pte = pte_val(ptep_get(ptep));
> +
> + ret = gstage_level_to_page_size(current_level - 1, &child_page_size);
> + if (ret)
> + return ret;
> +
> + if (!pcache)
> + return -ENOMEM;
This checks needs to be outside while-loop.
> + next_ptep = kvm_mmu_memory_cache_alloc(pcache);
> + if (!next_ptep)
> + return -ENOMEM;
> +
> + for (i = 0; i < PTRS_PER_PTE; i++) {
> + child_pte = make_child_pte(huge_pte, i, child_page_size);
> + set_pte((pte_t *)&next_ptep[i], __pte(child_pte));
> + }
> +
> + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
> + __pgprot(_PAGE_TABLE)));
> +
> + if (flush)
> + gstage_tlb_flush(gstage, current_level, addr);
> +
> + current_level--;
> + }
> +
> + return 0;
> +}
> +
> void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
> pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op)
> {
> --
> 2.27.0
The kvm_riscv_gstage_split_huge() function introduced here
is only used in PATCH3 so better sqash this patch into PATCH3.
Regards,
Anup
More information about the linux-riscv
mailing list