[PATCH 8/8] KVM: arm64: Avoid accessing ICH registers

Christoffer Dall christoffer.dall at linaro.org
Wed Feb 10 04:45:13 PST 2016


On Mon, Feb 08, 2016 at 11:40:22AM +0000, Marc Zyngier wrote:
> Just like on GICv2, we're a bit hammer-happy with GICv3, and access
> them more often than we should.
> 
> Adopt a policy similar to what we do for GICv2, only save/restoring
> the minimal set of registers. As we don't access the registers
> linearly anymore (we may skip some), the convoluted accessors become
> slightly simpler, and we can drop the ugly indexing macro that
> tended to confuse the reviewers.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> ---
>  arch/arm64/kvm/hyp/vgic-v3-sr.c | 288 ++++++++++++++++++++++++----------------
>  include/kvm/arm_vgic.h          |   6 -
>  virt/kvm/arm/vgic-v3.c          |   4 +-
>  3 files changed, 176 insertions(+), 122 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
> index 9142e082..d3813f5 100644
> --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
> +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
> @@ -39,12 +39,104 @@
>  		asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
>  	} while (0)
>  
> -/* vcpu is already in the HYP VA space */
> +static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
> +{
> +	switch (lr & 0xf) {
> +	case 0:
> +		return read_gicreg(ICH_LR0_EL2);
> +	case 1:
> +		return read_gicreg(ICH_LR1_EL2);
> +	case 2:
> +		return read_gicreg(ICH_LR2_EL2);
> +	case 3:
> +		return read_gicreg(ICH_LR3_EL2);
> +	case 4:
> +		return read_gicreg(ICH_LR4_EL2);
> +	case 5:
> +		return read_gicreg(ICH_LR5_EL2);
> +	case 6:
> +		return read_gicreg(ICH_LR6_EL2);
> +	case 7:
> +		return read_gicreg(ICH_LR7_EL2);
> +	case 8:
> +		return read_gicreg(ICH_LR8_EL2);
> +	case 9:
> +		return read_gicreg(ICH_LR9_EL2);
> +	case 10:
> +		return read_gicreg(ICH_LR10_EL2);
> +	case 11:
> +		return read_gicreg(ICH_LR11_EL2);
> +	case 12:
> +		return read_gicreg(ICH_LR12_EL2);
> +	case 13:
> +		return read_gicreg(ICH_LR13_EL2);
> +	case 14:
> +		return read_gicreg(ICH_LR14_EL2);
> +	case 15:
> +		return read_gicreg(ICH_LR15_EL2);
> +	}
> +
> +	unreachable();
> +}
> +
> +static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
> +{
> +	switch (lr & 0xf) {
> +	case 0:
> +		write_gicreg(val, ICH_LR0_EL2);
> +		break;
> +	case 1:
> +		write_gicreg(val, ICH_LR1_EL2);
> +		break;
> +	case 2:
> +		write_gicreg(val, ICH_LR2_EL2);
> +		break;
> +	case 3:
> +		write_gicreg(val, ICH_LR3_EL2);
> +		break;
> +	case 4:
> +		write_gicreg(val, ICH_LR4_EL2);
> +		break;
> +	case 5:
> +		write_gicreg(val, ICH_LR5_EL2);
> +		break;
> +	case 6:
> +		write_gicreg(val, ICH_LR6_EL2);
> +		break;
> +	case 7:
> +		write_gicreg(val, ICH_LR7_EL2);
> +		break;
> +	case 8:
> +		write_gicreg(val, ICH_LR8_EL2);
> +		break;
> +	case 9:
> +		write_gicreg(val, ICH_LR9_EL2);
> +		break;
> +	case 10:
> +		write_gicreg(val, ICH_LR10_EL2);
> +		break;
> +	case 11:
> +		write_gicreg(val, ICH_LR11_EL2);
> +		break;
> +	case 12:
> +		write_gicreg(val, ICH_LR12_EL2);
> +		break;
> +	case 13:
> +		write_gicreg(val, ICH_LR13_EL2);
> +		break;
> +	case 14:
> +		write_gicreg(val, ICH_LR14_EL2);
> +		break;
> +	case 15:
> +		write_gicreg(val, ICH_LR15_EL2);
> +		break;
> +	}
> +}
> +
>  void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>  	u64 val;
> -	u32 max_lr_idx, nr_pri_bits;
>  
>  	/*
>  	 * Make sure stores to the GIC via the memory mapped interface
> @@ -53,68 +145,50 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
>  	dsb(st);
>  
>  	cpu_if->vgic_vmcr  = read_gicreg(ICH_VMCR_EL2);
> -	cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
> -	cpu_if->vgic_eisr  = read_gicreg(ICH_EISR_EL2);
> -	cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
>  
> -	write_gicreg(0, ICH_HCR_EL2);
> -	val = read_gicreg(ICH_VTR_EL2);
> -	max_lr_idx = vtr_to_max_lr_idx(val);
> -	nr_pri_bits = vtr_to_nr_pri_bits(val);
> +	if (vcpu->arch.vgic_cpu.live_lrs) {
> +		int i;
> +		u32 max_lr_idx, nr_pri_bits;
>  
> -	switch (max_lr_idx) {
> -	case 15:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
> -	case 14:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
> -	case 13:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
> -	case 12:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
> -	case 11:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
> -	case 10:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
> -	case 9:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
> -	case 8:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
> -	case 7:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
> -	case 6:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
> -	case 5:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
> -	case 4:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
> -	case 3:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
> -	case 2:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
> -	case 1:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
> -	case 0:
> -		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
> -	}
> +		cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
> +		cpu_if->vgic_eisr  = read_gicreg(ICH_EISR_EL2);
> +		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
>  
> -	switch (nr_pri_bits) {
> -	case 7:
> -		cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
> -		cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
> -	case 6:
> -		cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
> -	default:
> -		cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
> -	}
> +		write_gicreg(0, ICH_HCR_EL2);
> +		val = read_gicreg(ICH_VTR_EL2);

can't we cache the read of ICH_VTR_EL2 then?

> +		max_lr_idx = vtr_to_max_lr_idx(val);
> +		nr_pri_bits = vtr_to_nr_pri_bits(val);
>  
> -	switch (nr_pri_bits) {
> -	case 7:
> -		cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
> -		cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
> -	case 6:
> -		cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
> -	default:
> -		cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
> +		for (i = 0; i <= max_lr_idx; i++) {
> +			if (vcpu->arch.vgic_cpu.live_lrs & (1UL << i))
> +				cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
> +		}
> +
> +		switch (nr_pri_bits) {
> +		case 7:
> +			cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
> +			cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
> +		case 6:
> +			cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
> +		default:
> +			cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
> +		}
> +
> +		switch (nr_pri_bits) {
> +		case 7:
> +			cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
> +			cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
> +		case 6:
> +			cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
> +		default:
> +			cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
> +		}
> +
> +		vcpu->arch.vgic_cpu.live_lrs = 0;
> +	} else {
> +		cpu_if->vgic_misr  = 0;
> +		cpu_if->vgic_eisr  = 0;
> +		cpu_if->vgic_elrsr = 0xffff;
>  	}
>  
>  	val = read_gicreg(ICC_SRE_EL2);
> @@ -128,6 +202,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
>  	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>  	u64 val;
>  	u32 max_lr_idx, nr_pri_bits;
> +	u16 live_lrs = 0;
> +	int i;
>  
>  	/*
>  	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
> @@ -140,68 +216,51 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
>  	write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
>  	isb();
>  
> -	write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
> -	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
> -
>  	val = read_gicreg(ICH_VTR_EL2);

same as above

>  	max_lr_idx = vtr_to_max_lr_idx(val);
>  	nr_pri_bits = vtr_to_nr_pri_bits(val);
>  
> -	switch (nr_pri_bits) {
> -	case 7:
> -		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
> -		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
> -	case 6:
> -		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
> -	default:
> -		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
> -	}	 	                           
> -		 	                           
> -	switch (nr_pri_bits) {
> -	case 7:
> -		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
> -		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
> -	case 6:
> -		 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
> -	default:
> -		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
> +	for (i = 0; i <= max_lr_idx; i++) {
> +		if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
> +			live_lrs |= (1 << i);
>  	}
>  
> -	switch (max_lr_idx) {
> -	case 15:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
> -	case 14:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
> -	case 13:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
> -	case 12:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
> -	case 11:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
> -	case 10:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
> -	case 9:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
> -	case 8:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
> -	case 7:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
> -	case 6:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
> -	case 5:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
> -	case 4:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
> -	case 3:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
> -	case 2:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
> -	case 1:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
> -	case 0:
> -		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
> +	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);

also here you may be able to optimize and cache the last seen in-ardware VMCR.

> +
> +	if (live_lrs) {
> +		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
> +
> +		switch (nr_pri_bits) {
> +		case 7:
> +			write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
> +			write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
> +		case 6:
> +			write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
> +		default:
> +			write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
> +		}
> +		 	                           

nit: trailing white space

> +		switch (nr_pri_bits) {
> +		case 7:
> +			write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
> +			write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
> +		case 6:
> +			write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
> +		default:
> +			write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
> +		}
> +
> +		for (i = 0; i <= max_lr_idx; i++) {
> +			val = 0;
> +
> +			if (live_lrs & (1 << i))
> +				val = cpu_if->vgic_lr[i];
> +
> +			__gic_v3_set_lr(val, i);
> +		}
>  	}
>  
> +
>  	/*
>  	 * Ensures that the above will have reached the
>  	 * (re)distributors. This ensure the guest will read the
> @@ -209,6 +268,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
>  	 */
>  	isb();
>  	dsb(sy);
> +	vcpu->arch.vgic_cpu.live_lrs = live_lrs;
>  
>  	/*
>  	 * Prevent the guest from touching the GIC system registers if
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index f473fd6..281caf8 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -279,12 +279,6 @@ struct vgic_v2_cpu_if {
>  	u32		vgic_lr[VGIC_V2_MAX_LRS];
>  };
>  
> -/*
> - * LRs are stored in reverse order in memory. make sure we index them
> - * correctly.
> - */
> -#define VGIC_V3_LR_INDEX(lr)		(VGIC_V3_MAX_LRS - 1 - lr)
> -
>  struct vgic_v3_cpu_if {
>  #ifdef CONFIG_KVM_ARM_VGIC_V3
>  	u32		vgic_hcr;
> diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
> index 453eafd..11b5ff6 100644
> --- a/virt/kvm/arm/vgic-v3.c
> +++ b/virt/kvm/arm/vgic-v3.c
> @@ -42,7 +42,7 @@ static u32 ich_vtr_el2;
>  static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
>  {
>  	struct vgic_lr lr_desc;
> -	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)];
> +	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr];
>  
>  	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
>  		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
> @@ -106,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
>  		lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
>  	}
>  
> -	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val;
> +	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val;
>  
>  	if (!(lr_desc.state & LR_STATE_MASK))
>  		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
> -- 
> 2.1.4
> 

Ignoring potential further optimizations:

Reviewed-by: Christoffer Dall <christoffer.dall at linaro.org>



More information about the linux-arm-kernel mailing list