[PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
Marc Zyngier
marc.zyngier at arm.com
Thu Sep 3 10:06:39 PDT 2015
On 30/08/15 14:54, Christoffer Dall wrote:
> The arch timer currently uses edge-triggered semantics in the sense that
> the line is never sampled by the vgic and lowering the line from the
> timer to the vgic doesn't have any affect on the pending state of
> virtual interrupts in the vgic. This means that we do not support a
> guest with the otherwise valid behavior of (1) disable interrupts (2)
> enable the timer (3) disable the timer (4) enable interrupts. Such a
> guest would validly not expect to see any interrupts on real hardware,
> but will see interrupts on KVM.
>
> This patches fixes this shortcoming through the following series of
> changes.
>
> First, we change the flow of the timer/vgic sync/flush operations. Now
> the timer is always flushed/synced before the vgic, because the vgic
> samples the state of the timer output. This has the implication that we
> move the timer operations in to non-preempible sections, but that is
> fine after the previous commit getting rid of hrtimer schedules on every
> entry/exit.
>
> Second, we change the internal behavior of the timer, letting the timer
> keep track of its previous output state, and only lower/raise the line
> to the vgic when the state changes. Note that in theory this could have
> been accomplished more simply by signalling the vgic every time the
> state *potentially* changed, but we don't want to be hitting the vgic
> more often than necessary.
>
> Third, we get rid of the use of the map->active field in the vgic and
> instead simply set the interrupt as active on the physical distributor
> whenever we signal a mapped interrupt to the guest, and we reset the
> active state when we sync back the HW state from the vgic.
>
> Fourth, and finally, we now initialize the timer PPIs (and all the other
> unused PPIs for now), to be level-triggered, and modify the sync code to
> sample the line state on HW sync and re-inject a new interrupt if it is
> still pending at that time.
>
> Signed-off-by: Christoffer Dall <christoffer.dall at linaro.org>
> ---
> arch/arm/kvm/arm.c | 11 +++++--
> include/kvm/arm_arch_timer.h | 2 +-
> include/kvm/arm_vgic.h | 3 --
> virt/kvm/arm/arch_timer.c | 68 +++++++++++++++++++++++++++++++-------------
> virt/kvm/arm/vgic.c | 67 +++++++++++++++----------------------------
> 5 files changed, 81 insertions(+), 70 deletions(-)
>
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index bdf8871..102a4aa 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>
> if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
> local_irq_enable();
> + kvm_timer_sync_hwstate(vcpu);
> kvm_vgic_sync_hwstate(vcpu);
> preempt_enable();
> - kvm_timer_sync_hwstate(vcpu);
> continue;
> }
>
> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> kvm_guest_exit();
> trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
>
> + /*
> + * We must sync the timer state before the vgic state so that
> + * the vgic can properly sample the updated state of the
> + * interrupt line.
> + */
> + kvm_timer_sync_hwstate(vcpu);
> +
> kvm_vgic_sync_hwstate(vcpu);
>
> preempt_enable();
>
> - kvm_timer_sync_hwstate(vcpu);
> -
> ret = handle_exit(vcpu, run, ret);
> }
>
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index ef14cc1..1800227 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -51,7 +51,7 @@ struct arch_timer_cpu {
> bool armed;
>
> /* Timer IRQ */
> - const struct kvm_irq_level *irq;
> + struct kvm_irq_level irq;
>
> /* VGIC mapping */
> struct irq_phys_map *map;
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index d901f1a..99011a0 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -163,7 +163,6 @@ struct irq_phys_map {
> u32 virt_irq;
> u32 phys_irq;
> u32 irq;
> - bool active;
> };
>
> struct irq_phys_map_entry {
> @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> int virt_irq, int irq);
> int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
> -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
> -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
>
> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
> #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 018f3d6..747302f 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
> }
> }
>
> -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
> -{
> - int ret;
> - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> -
> - kvm_vgic_set_phys_irq_active(timer->map, true);
> - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
> - timer->map,
> - timer->irq->level);
> - WARN_ON(ret);
> -}
> -
> static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
> {
> struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
> @@ -116,8 +104,7 @@ static bool kvm_timer_irq_enabled(struct kvm_vcpu *vcpu)
> struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>
> return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
> - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) &&
> - !kvm_vgic_get_phys_irq_active(timer->map);
> + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
> }
>
> bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
> @@ -134,6 +121,45 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
> return cval <= now;
> }
>
> +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu)
> +{
> + int ret;
> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> + BUG_ON(!vgic_initialized(vcpu->kvm));
> +
> + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
> + timer->map,
> + timer->irq.level);
> + WARN_ON(ret);
> +}
> +
> +/*
> + * Check if there was a change in the timer state (should we raise or lower
> + * the line level to the GIC).
> + */
> +static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
> +{
> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> + /*
> + * If userspace modified the timer registers via SET_ONE_REG before
> + * the vgic was initialized, we mustn't set the timer->irq.level value
> + * because the guest would never see the interrupt. Instead wait
> + * until we call this funciton from kvm_timer_flush_hwstate.
> + */
> + if (!vgic_initialized(vcpu->kvm))
> + return;
> +
> + if (kvm_timer_should_fire(vcpu) && !timer->irq.level) {
> + timer->irq.level = 1;
> + kvm_timer_update_irq(vcpu);
> + } else if (!kvm_timer_should_fire(vcpu) && timer->irq.level) {
> + timer->irq.level = 0;
> + kvm_timer_update_irq(vcpu);
> + }
> +}
> +
It took me ages to parse this, so I rewrote it to match my understanding:
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 8a0fdfc..a722f0f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -121,13 +121,14 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
return cval <= now;
}
-static void kvm_timer_update_irq(struct kvm_vcpu *vcpu)
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_state)
{
int ret;
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
BUG_ON(!vgic_initialized(vcpu->kvm));
+ timer->irq.level = new_state;
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
timer->map,
timer->irq.level);
@@ -151,13 +152,8 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return;
- if (kvm_timer_should_fire(vcpu) && !timer->irq.level) {
- timer->irq.level = 1;
- kvm_timer_update_irq(vcpu);
- } else if (!kvm_timer_should_fire(vcpu) && timer->irq.level) {
- timer->irq.level = 0;
- kvm_timer_update_irq(vcpu);
- }
+ if (kvm_timer_should_fire(vcpu) != timer->irq.level)
+ kvm_timer_update_irq(vcpu, !timer->irq.level);
}
/*
Did I get it right?
> /*
> * Schedule the background timer before calling kvm_vcpu_block, so that this
> * thread is removed from its waitqueue and made runnable when there's a timer
> @@ -191,8 +217,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
> * If the timer expired while we were not scheduled, now is the time
> * to inject it.
> */
> - if (kvm_timer_should_fire(vcpu))
> - kvm_timer_inject_irq(vcpu);
> + kvm_timer_update_state(vcpu);
> }
>
> /**
> @@ -208,8 +233,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
>
> BUG_ON(timer_is_armed(timer));
>
> - if (kvm_timer_should_fire(vcpu))
> - kvm_timer_inject_irq(vcpu);
> + /*
> + * The guest could have modified the timer registers or the timer
> + * could have expired, update the timer state.
> + */
> + kvm_timer_update_state(vcpu);
> }
>
> int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
> @@ -224,7 +252,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
> * kvm_vcpu_set_target(). To handle this, we determine
> * vcpu timer irq number when the vcpu is reset.
> */
> - timer->irq = irq;
> + timer->irq.irq = irq->irq;
>
> /*
> * Tell the VGIC that the virtual interrupt is tied to a
> @@ -269,6 +297,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
> default:
> return -1;
> }
> +
> + kvm_timer_update_state(vcpu);
> return 0;
> }
>
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 9ed8d53..f4ea950 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -1422,34 +1422,43 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
> /*
> * Save the physical active state, and reset it to inactive.
> *
> - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
> + * Return true if there's a pending level triggered interrupt line to queue.
> */
> -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
> +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
> {
> struct irq_phys_map *map;
> + bool phys_active;
> int ret;
>
> if (!(vlr.state & LR_HW))
> return 0;
>
> map = vgic_irq_map_search(vcpu, vlr.irq);
> - BUG_ON(!map || !map->active);
> + BUG_ON(!map);
>
> ret = irq_get_irqchip_state(map->irq,
> IRQCHIP_STATE_ACTIVE,
> - &map->active);
> + &phys_active);
>
> WARN_ON(ret);
>
> - if (map->active) {
> + if (phys_active) {
> + /*
> + * Interrupt still marked as active on the physical
> + * distributor, so guest did not EOI it yet. Reset to
> + * non-active so that other VMs can see interrupts from this
> + * device.
> + */
> ret = irq_set_irqchip_state(map->irq,
> IRQCHIP_STATE_ACTIVE,
> false);
> WARN_ON(ret);
> - return 0;
> + return false;
> }
>
> - return 1;
> + /* Mapped edge-triggered interrupts not yet supported. */
> + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
Hmmm. What are we missing?
> + return process_level_irq(vcpu, lr, vlr);
> }
>
> /* Sync back the VGIC state after a guest run */
> @@ -1474,18 +1483,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> continue;
>
> vlr = vgic_get_lr(vcpu, lr);
> - if (vgic_sync_hwirq(vcpu, vlr)) {
> - /*
> - * So this is a HW interrupt that the guest
> - * EOI-ed. Clean the LR state and allow the
> - * interrupt to be sampled again.
> - */
> - vlr.state = 0;
> - vlr.hwirq = 0;
> - vgic_set_lr(vcpu, lr, vlr);
> - vgic_irq_clear_queued(vcpu, vlr.irq);
> - set_bit(lr, elrsr_ptr);
> - }
> + if (vgic_sync_hwirq(vcpu, lr, vlr))
> + level_pending = true;
>
> if (!test_bit(lr, elrsr_ptr))
> continue;
> @@ -1861,30 +1860,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
> }
>
> /**
> - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
> - *
> - * Return the logical active state of a mapped interrupt. This doesn't
> - * necessarily reflects the current HW state.
> - */
> -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
> -{
> - BUG_ON(!map);
> - return map->active;
> -}
> -
> -/**
> - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
> - *
> - * Set the logical active state of a mapped interrupt. This doesn't
> - * immediately affects the HW state.
> - */
> -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
> -{
> - BUG_ON(!map);
> - map->active = active;
> -}
> -
> -/**
> * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
> * @vcpu: The VCPU pointer
> * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
> @@ -2112,10 +2087,14 @@ int vgic_init(struct kvm *kvm)
> if (i < VGIC_NR_SGIS)
> vgic_bitmap_set_irq_val(&dist->irq_enabled,
> vcpu->vcpu_id, i, 1);
> - if (i < VGIC_NR_PRIVATE_IRQS)
> + if (i < VGIC_NR_SGIS)
> vgic_bitmap_set_irq_val(&dist->irq_cfg,
> vcpu->vcpu_id, i,
> VGIC_CFG_EDGE);
> + else if (i < VGIC_NR_PRIVATE_IRQS) /* PPIs */
> + vgic_bitmap_set_irq_val(&dist->irq_cfg,
> + vcpu->vcpu_id, i,
> + VGIC_CFG_LEVEL);
> }
>
> vgic_enable(vcpu);
>
My only real objection to this patch is that it puts my brain upside down.
Hopefully that won't last.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
More information about the linux-arm-kernel
mailing list