[PATCH v3 41/41] KVM: arm/arm64: Avoid VGICv3 save/restore on VHE with no IRQs
Tomasz Nowicki
tnowicki at caviumnetworks.com
Mon Feb 5 05:29:50 PST 2018
Hi Christoffer,
On 12.01.2018 13:07, Christoffer Dall wrote:
> We can finally get completely rid of any calls to the VGICv3
> save/restore functions when the AP lists are empty on VHE systems. This
> requires carefully factoring out trap configuration from saving and
> restoring state, and carefully choosing what to do on the VHE and
> non-VHE path.
>
> One of the challenges is that we cannot save/restore the VMCR lazily
> because we can only write the VMCR when ICC_SRE_EL1.SRE is cleared when
> emulating a GICv2-on-GICv3, since otherwise all Group-0 interrupts end
> up being delivered as FIQ.
>
> To solve this problem, and still provide fast performance in the fast
> path of exiting a VM when no interrupts are pending (which also
> optimized the latency for actually delivering virtual interrupts coming
> from physical interrupts), we orchestrate a dance of only doing the
> activate/deactivate traps in vgic load/put for VHE systems (which can
> have ICC_SRE_EL1.SRE cleared when running in the host), and doing the
> configuration on every round-trip on non-VHE systems.
>
> Signed-off-by: Christoffer Dall <christoffer.dall at linaro.org>
> ---
> arch/arm/include/asm/kvm_hyp.h | 2 +
> arch/arm/kvm/hyp/switch.c | 8 ++-
> arch/arm64/include/asm/kvm_hyp.h | 2 +
> arch/arm64/kvm/hyp/switch.c | 8 ++-
> virt/kvm/arm/hyp/vgic-v3-sr.c | 121 +++++++++++++++++++++++++--------------
> virt/kvm/arm/vgic/vgic-v3.c | 6 ++
> virt/kvm/arm/vgic/vgic.c | 7 +--
> 7 files changed, 103 insertions(+), 51 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
> index b3dd4f4304f5..d01676e5b816 100644
> --- a/arch/arm/include/asm/kvm_hyp.h
> +++ b/arch/arm/include/asm/kvm_hyp.h
> @@ -109,6 +109,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
>
> void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
> void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
> +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
> +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
> void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
> void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
> index 214187446e63..337c76230885 100644
> --- a/arch/arm/kvm/hyp/switch.c
> +++ b/arch/arm/kvm/hyp/switch.c
> @@ -89,14 +89,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
>
> static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
> {
> - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> __vgic_v3_save_state(vcpu);
> + __vgic_v3_deactivate_traps(vcpu);
> + }
> }
>
> static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
> {
> - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> + __vgic_v3_activate_traps(vcpu);
> __vgic_v3_restore_state(vcpu);
> + }
> }
>
> static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index 693d29f0036d..af7cf0faf58f 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -125,6 +125,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
>
> void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
> void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
> +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
> +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
> void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
> void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
> int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9187afca181a..901a111fb509 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -194,14 +194,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
>
> static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
> {
> - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> __vgic_v3_save_state(vcpu);
> + __vgic_v3_deactivate_traps(vcpu);
> + }
> }
>
> static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
> {
> - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> + __vgic_v3_activate_traps(vcpu);
> __vgic_v3_restore_state(vcpu);
> + }
> }
>
> static bool __hyp_text __true_value(void)
> diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
> index 811b42c8441d..e5f3bc7582b6 100644
> --- a/virt/kvm/arm/hyp/vgic-v3-sr.c
> +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
> @@ -208,15 +208,15 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
> {
> struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
> u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
> - u64 val;
>
> /*
> * Make sure stores to the GIC via the memory mapped interface
> - * are now visible to the system register interface.
> + * are now visible to the system register interface when reading the
> + * LRs, and when reading back the VMCR on non-VHE systems.
> */
> - if (!cpu_if->vgic_sre) {
> - dsb(st);
> - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
> + if (used_lrs || !has_vhe()) {
> + if (!cpu_if->vgic_sre)
> + dsb(st);
> }
>
> if (used_lrs) {
> @@ -225,7 +225,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
>
> elrsr = read_gicreg(ICH_ELSR_EL2);
>
> - write_gicreg(0, ICH_HCR_EL2);
> + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
>
> for (i = 0; i < used_lrs; i++) {
> if (elrsr & (1 << i))
> @@ -235,19 +235,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
>
> __gic_v3_set_lr(0, i);
> }
> - } else {
> - if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
> - cpu_if->its_vpe.its_vm)
> - write_gicreg(0, ICH_HCR_EL2);
> - }
> -
> - val = read_gicreg(ICC_SRE_EL2);
> - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
> -
> - if (!cpu_if->vgic_sre) {
> - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
> - isb();
> - write_gicreg(1, ICC_SRE_EL1);
> }
> }
>
> @@ -257,6 +244,31 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
> u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
> int i;
>
> + if (used_lrs) {
> + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
> +
> + for (i = 0; i < used_lrs; i++)
> + __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
> + }
> +
> + /*
> + * Ensure that writes to the LRs, and on non-VHE systems ensure that
> + * the write to the VMCR in __vgic_v3_activate_traps(), will have
> + * reached the (re)distributors. This ensure the guest will read the
> + * correct values from the memory-mapped interface.
> + */
> + if (used_lrs || !has_vhe()) {
> + if (!cpu_if->vgic_sre) {
> + isb();
> + dsb(sy);
> + }
> + }
> +}
> +
> +void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
> +
> /*
> * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
> * Group0 interrupt (as generated in GICv2 mode) to be
> @@ -264,47 +276,70 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
> * consequences. So we must make sure that ICC_SRE_EL1 has
> * been actually programmed with the value we want before
> * starting to mess with the rest of the GIC, and VMCR_EL2 in
> - * particular.
> + * particular. This logic must be called before
> + * __vgic_v3_restore_state().
> */
> if (!cpu_if->vgic_sre) {
> write_gicreg(0, ICC_SRE_EL1);
> isb();
> write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
> - }
>
> - if (used_lrs) {
> - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
>
> - for (i = 0; i < used_lrs; i++)
> - __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
> - } else {
> - /*
> - * If we need to trap system registers, we must write
> - * ICH_HCR_EL2 anyway, even if no interrupts are being
> - * injected. Same thing if GICv4 is used, as VLPI
> - * delivery is gated by ICH_HCR_EL2.En.
> - */
> - if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
> - cpu_if->its_vpe.its_vm)
> - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
> + if (has_vhe()) {
> + /*
> + * Ensure that the write to the VMCR will have reached
> + * the (re)distributors. This ensure the guest will
> + * read the correct values from the memory-mapped
> + * interface.
> + */
> + isb();
> + dsb(sy);
> + }
> }
>
> /*
> - * Ensures that the above will have reached the
> - * (re)distributors. This ensure the guest will read the
> - * correct values from the memory-mapped interface.
> + * Prevent the guest from touching the GIC system registers if
> + * SRE isn't enabled for GICv3 emulation.
> + */
> + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
> + ICC_SRE_EL2);
> +
> + /*
> + * If we need to trap system registers, we must write
> + * ICH_HCR_EL2 anyway, even if no interrupts are being
> + * injected,
> */
> + if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
> + cpu_if->its_vpe.its_vm)
> + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
> +}
> +
> +void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
> + u64 val;
> +
> if (!cpu_if->vgic_sre) {
> + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
> + }
> +
> +
Nit: Please remove one space.
> + val = read_gicreg(ICC_SRE_EL2);
> + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
> +
> + if (!cpu_if->vgic_sre) {
> + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
> isb();
> - dsb(sy);
> + write_gicreg(1, ICC_SRE_EL1);
> }
>
> /*
> - * Prevent the guest from touching the GIC system registers if
> - * SRE isn't enabled for GICv3 emulation.
> + * If we were trapping system registers, we enabled the VGIC even if
> + * no interrupts were being injected, and we disable it again here.
> */
> - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
> - ICC_SRE_EL2);
> + if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
> + cpu_if->its_vpe.its_vm)
> + write_gicreg(0, ICH_HCR_EL2);
> }
>
> void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 4bafcd1e6bb8..4200657694f0 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -590,6 +590,9 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
> kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
>
> kvm_call_hyp(__vgic_v3_restore_aprs, vcpu);
> +
> + if (has_vhe())
> + __vgic_v3_activate_traps(vcpu);
> }
>
> void vgic_v3_put(struct kvm_vcpu *vcpu)
> @@ -600,4 +603,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
> cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
>
> kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
> +
> + if (has_vhe())
> + __vgic_v3_deactivate_traps(vcpu);
> }
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index d0a19a8c196a..0d95d7b55567 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -763,14 +763,14 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> {
> struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>
> - vgic_save_state(vcpu);
> -
> WARN_ON(vgic_v4_sync_hwstate(vcpu));
>
> /* An empty ap_list_head implies used_lrs == 0 */
> if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
> return;
>
> + vgic_save_state(vcpu);
> +
> if (vgic_cpu->used_lrs)
> vgic_fold_lr_state(vcpu);
> vgic_prune_ap_list(vcpu);
> @@ -799,7 +799,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> * this.
> */
> if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
> - goto out;
> + return;
>
> DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
>
> @@ -807,7 +807,6 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> vgic_flush_lr_state(vcpu);
> spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
>
> -out:
> vgic_restore_state(vcpu);
> }
>
>
The idea makes a lot of sense to me.
However, function __vgic_v3_save_state() depends on
__vgic_v3_deactivate_traps() indirectly while doing dsb(st) for nvhe and
!cpu_if->vgic_sre case. That's fine since we have good explanation in
comment but we have to be careful for any potential future changes. The
only alternative I see now is having separate function set for nvhe and
vhe cases but this means massive code duplication... So I think we need
to live with that, performance is top priority.
For the whole series:
Reviewed-by: Tomasz Nowicki <Tomasz.Nowicki at caviumnetworks.com>
Thanks,
Tomasz
More information about the linux-arm-kernel
mailing list