[PATCH 5/5] KVM: riscv: Fast-path dirty logging write faults
Jinyu Tang
tjytimi at 163.com
Sun May 17 08:34:27 PDT 2026
With dirty logging enabled, guest writes often fault on an existing 4K
G-stage leaf that was write-protected only for dirty tracking. The slow
path still performs the full fault handling flow and takes mmu_lock for
write, even though the page-table shape does not change.
x86 handles the analogous case in its fast page fault path by atomically
making a writable SPTE writable again when the fault is only a
write-protection fault. Add the same style of fast path for RISC-V. If a
write fault hits an existing 4K leaf in a writable dirty-log memslot,
mark the page dirty and atomically set the PTE writable and dirty under
the read side of mmu_lock.
The dirty bitmap is updated before the PTE becomes writable again. The
PTE D bit is also set so systems that trap on a clear D bit do not fall
back to the slow path for a writable but clean PTE.
Signed-off-by: Jinyu Tang <tjytimi at 163.com>
---
arch/riscv/kvm/mmu.c | 75 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 48f16e52f..980059e09 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -419,6 +419,77 @@ static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
return PAGE_SIZE;
}
+static bool kvm_riscv_mmu_dirty_log_write_fault_fast(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ gpa_t gpa,
+ struct kvm_gstage_mapping *out_map)
+{
+ struct kvm_gstage gstage;
+ unsigned long mmu_seq;
+ pte_t old_pte, new_pte;
+ pte_t *ptep;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ u32 ptep_level;
+ bool dirty_marked = false;
+ bool ret;
+
+ kvm_riscv_gstage_init(&gstage, kvm);
+ mmu_seq = kvm->mmu_invalidate_seq;
+
+ read_lock(&kvm->mmu_lock);
+
+ if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
+ ret = false;
+ goto out_unlock;
+ }
+
+ if (!kvm_riscv_gstage_get_leaf(&gstage, gpa, &ptep, &ptep_level) ||
+ ptep_level) {
+ ret = false;
+ goto out_unlock;
+ }
+
+ for (;;) {
+ old_pte = ptep_get(ptep);
+ if (!(pte_val(old_pte) & _PAGE_LEAF)) {
+ ret = false;
+ break;
+ }
+
+ if (!dirty_marked) {
+ mark_page_dirty_in_slot(kvm, memslot, gfn);
+ dirty_marked = true;
+ }
+
+ if ((pte_val(old_pte) & (_PAGE_WRITE | _PAGE_DIRTY)) ==
+ (_PAGE_WRITE | _PAGE_DIRTY)) {
+ new_pte = old_pte;
+ ret = true;
+ break;
+ }
+
+ new_pte = pte_mkdirty(pte_mkwrite_novma(old_pte));
+
+ if (kvm_riscv_gstage_try_update_pte(&gstage, ptep_level, gpa,
+ ptep, old_pte, new_pte)) {
+ ret = true;
+ break;
+ }
+ cpu_relax();
+ }
+
+out_unlock:
+ read_unlock(&kvm->mmu_lock);
+
+ if (ret) {
+ out_map->addr = gpa & PAGE_MASK;
+ out_map->level = 0;
+ out_map->pte = new_pte;
+ }
+
+ return ret;
+}
+
int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write,
struct kvm_gstage_mapping *out_map)
@@ -442,6 +513,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
/* Setup initial state of output mapping */
memset(out_map, 0, sizeof(*out_map));
+ if (is_write && logging &&
+ kvm_riscv_mmu_dirty_log_write_fault_fast(kvm, memslot, gpa, out_map))
+ return 0;
+
/* We need minimum second+third level pages */
ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels);
if (ret) {
--
2.43.0
More information about the linux-riscv
mailing list