[PATCH 10/12] KVM: arm64: nv: Add SW walker for AT S1 emulation

Thu Jul 25 07:16:12 PDT 2024

Hi Marc,

On Mon, Jul 08, 2024 at 05:57:58PM +0100, Marc Zyngier wrote:
> In order to plug the brokenness of our current AT implementation,
> we need a SW walker that is going to... err.. walk the S1 tables
> and tell us what it finds.
> 
> Of course, it builds on top of our S2 walker, and share similar
> concepts. The beauty of it is that since it uses kvm_read_guest(),
> it is able to bring back pages that have been otherwise evicted.
> 
> This is then plugged in the two AT S1 emulation functions as
> a "slow path" fallback. I'm not sure it is that slow, but hey.
> 
> Signed-off-by: Marc Zyngier <maz at kernel.org>
> [..]
> +static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> +{
> +	bool perm_fail, ur, uw, ux, pr, pw, pan;
> +	struct s1_walk_result wr = {};
> +	struct s1_walk_info wi = {};
> +	int ret, idx, el;
> +
> +	/*
> +	 * We only get here from guest EL2, so the translation regime
> +	 * AT applies to is solely defined by {E2H,TGE}.
> +	 */
> +	el = (vcpu_el2_e2h_is_set(vcpu) &&
> +	      vcpu_el2_tge_is_set(vcpu)) ? 2 : 1;
> +
> +	ret = setup_s1_walk(vcpu, &wi, &wr, vaddr, el);
> +	if (ret)
> +		goto compute_par;
> +
> +	if (wr.level == S1_MMU_DISABLED)
> +		goto compute_par;
> +
> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> +
> +	ret = walk_s1(vcpu, &wi, &wr, vaddr);
> +
> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +
> +	if (ret)
> +		goto compute_par;
> +
> +	/* FIXME: revisit when adding indirect permission support */
> +	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3) &&
> +	    !wi.nvhe) {
> +		u64 sctlr;
> +
> +		if (el == 1)
> +			sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
> +		else
> +			sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
> +
> +		ux = (sctlr & SCTLR_EL1_EPAN) && !(wr.desc & PTE_UXN);
> +	} else {
> +		ux = false;
> +	}
> +
> +	pw = !(wr.desc & PTE_RDONLY);
> +
> +	if (wi.nvhe) {
> +		ur = uw = false;
> +		pr = true;
> +	} else {
> +		if (wr.desc & PTE_USER) {
> +			ur = pr = true;
> +			uw = pw;
> +		} else {
> +			ur = uw = false;
> +			pr = true;
> +		}
> +	}
> +
> +	/* Apply the Hierarchical Permission madness */
> +	if (wi.nvhe) {
> +		wr.APTable &= BIT(1);
> +		wr.PXNTable = wr.UXNTable;
> +	}
> +
> +	ur &= !(wr.APTable & BIT(0));
> +	uw &= !(wr.APTable != 0);
> +	ux &= !wr.UXNTable;
> +
> +	pw &= !(wr.APTable & BIT(1));
> +
> +	pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
> +
> +	perm_fail = false;
> +
> +	switch (op) {
> +	case OP_AT_S1E1RP:
> +		perm_fail |= pan && (ur || uw || ux);
> +		fallthrough;
> +	case OP_AT_S1E1R:
> +	case OP_AT_S1E2R:
> +		perm_fail |= !pr;
> +		break;
> +	case OP_AT_S1E1WP:
> +		perm_fail |= pan && (ur || uw || ux);
> +		fallthrough;
> +	case OP_AT_S1E1W:
> +	case OP_AT_S1E2W:
> +		perm_fail |= !pw;
> +		break;
> +	case OP_AT_S1E0R:
> +		perm_fail |= !ur;
> +		break;
> +	case OP_AT_S1E0W:
> +		perm_fail |= !uw;
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	if (perm_fail) {
> +		struct s1_walk_result tmp;

I was wondering if you would consider initializing 'tmp' to the empty struct
here. That makes it consistent with the initialization of 'wr' in the !perm_fail
case and I think it will make the code more robust wrt to changes to
compute_par_s1() and what fields it accesses.

Thanks,
Alex

> +
> +		tmp.failed = true;
> +		tmp.fst = ESR_ELx_FSC_PERM | wr.level;
> +		tmp.s2 = false;
> +		tmp.ptw = false;
> +
> +		wr = tmp;
> +	}
> +
> +compute_par:
> +	return compute_par_s1(vcpu, &wr);
> +}