[PATCH v3 02/14] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode
Reiji Watanabe
reijiw at google.com
Fri Nov 11 23:55:38 PST 2022
Hi Marc,
On Mon, Nov 7, 2022 at 12:54 AM Marc Zyngier <maz at kernel.org> wrote:
>
> Ricardo recently pointed out that the PMU chained counter emulation
> in KVM wasn't quite behaving like the one on actual hardware, in
> the sense that a chained counter would expose an overflow on
> both halves of a chained counter, while KVM would only expose the
> overflow on the top half.
>
> The difference is subtle, but significant. What does the architecture
> say (DDI0087 H.a):
>
> - Up to PMUv3p4, all counters but the cycle counter are 32bit
>
> - A 32bit counter that overflows generates a CHAIN event on the
> adjacent counter after exposing its own overflow status
>
> - The CHAIN event is accounted if the counter is correctly
> configured (CHAIN event selected and counter enabled)
>
> This all means that our current implementation (which uses 64bit
> perf events) prevents us from emulating this overflow on the lower half.
>
> How to fix this? By implementing the above, to the letter.
>
> This largly results in code deletion, removing the notions of
> "counter pair", "chained counters", and "canonical counter".
> The code is further restructured to make the CHAIN handling similar
> to SWINC, as the two are now extremely similar in behaviour.
>
> Reported-by: Ricardo Koller <ricarkol at google.com>
> Signed-off-by: Marc Zyngier <maz at kernel.org>
> ---
> arch/arm64/kvm/pmu-emul.c | 312 ++++++++++----------------------------
> include/kvm/arm_pmu.h | 2 -
> 2 files changed, 83 insertions(+), 231 deletions(-)
>
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index 0003c7d37533..a38b3127f649 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -15,16 +15,14 @@
> #include <kvm/arm_pmu.h>
> #include <kvm/arm_vgic.h>
>
> +#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
Although this isn't the new code (but just a name change),
wouldn't it be nicer to have armv8pmu_event_is_64bit()
(in arch/arm64/kernel/perf_event.c) use the macro as well ?
> +
> DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
>
> static LIST_HEAD(arm_pmus);
> static DEFINE_MUTEX(arm_pmus_lock);
>
> static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
> -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
> -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
> -
> -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
>
> static u32 kvm_pmu_event_mask(struct kvm *kvm)
> {
> @@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
> __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
> }
>
> +static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
> +{
> + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX);
> +}
> +
> static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
> {
> struct kvm_pmu *pmu;
> @@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
> }
>
> /**
> - * kvm_pmu_pmc_is_chained - determine if the pmc is chained
> - * @pmc: The PMU counter pointer
> - */
> -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
> -{
> - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
> -
> - return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> -}
> -
> -/**
> - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
> - * @select_idx: The counter index
> - */
> -static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
> -{
> - return select_idx & 0x1;
> -}
> -
> -/**
> - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
> - * @pmc: The PMU counter pointer
> - *
> - * When a pair of PMCs are chained together we use the low counter (canonical)
> - * to hold the underlying perf event.
> - */
> -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
> -{
> - if (kvm_pmu_pmc_is_chained(pmc) &&
> - kvm_pmu_idx_is_high_counter(pmc->idx))
> - return pmc - 1;
> -
> - return pmc;
> -}
> -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
> -{
> - if (kvm_pmu_idx_is_high_counter(pmc->idx))
> - return pmc - 1;
> - else
> - return pmc + 1;
> -}
> -
> -/**
> - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
> + * kvm_pmu_get_counter_value - get PMU counter value
> * @vcpu: The vcpu pointer
> * @select_idx: The counter index
> */
> -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
> -{
> - u64 eventsel, reg;
> -
> - select_idx |= 0x1;
> -
> - if (select_idx == ARMV8_PMU_CYCLE_IDX)
> - return false;
> -
> - reg = PMEVTYPER0_EL0 + select_idx;
> - eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
> -
> - return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
> -}
> -
> -/**
> - * kvm_pmu_get_pair_counter_value - get PMU counter value
> - * @vcpu: The vcpu pointer
> - * @pmc: The PMU counter pointer
> - */
> -static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
> - struct kvm_pmc *pmc)
> +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> {
> - u64 counter, counter_high, reg, enabled, running;
> -
> - if (kvm_pmu_pmc_is_chained(pmc)) {
> - pmc = kvm_pmu_get_canonical_pmc(pmc);
> - reg = PMEVCNTR0_EL0 + pmc->idx;
> + u64 counter, reg, enabled, running;
> + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> + struct kvm_pmc *pmc = &pmu->pmc[select_idx];
>
> - counter = __vcpu_sys_reg(vcpu, reg);
> - counter_high = __vcpu_sys_reg(vcpu, reg + 1);
> + if (!kvm_vcpu_has_pmu(vcpu))
> + return 0;
>
> - counter = lower_32_bits(counter) | (counter_high << 32);
> - } else {
> - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
> - counter = __vcpu_sys_reg(vcpu, reg);
> - }
> + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
> + counter = __vcpu_sys_reg(vcpu, reg);
>
> /*
> * The real counter value is equal to the value of counter register plus
> @@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
> counter += perf_event_read_value(pmc->perf_event, &enabled,
> &running);
>
> - return counter;
> -}
> -
> -/**
> - * kvm_pmu_get_counter_value - get PMU counter value
> - * @vcpu: The vcpu pointer
> - * @select_idx: The counter index
> - */
> -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> -{
> - u64 counter;
> - struct kvm_pmu *pmu = &vcpu->arch.pmu;
> - struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> -
> - if (!kvm_vcpu_has_pmu(vcpu))
> - return 0;
> -
> - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> -
> - if (kvm_pmu_pmc_is_chained(pmc) &&
> - kvm_pmu_idx_is_high_counter(select_idx))
> - counter = upper_32_bits(counter);
> - else if (select_idx != ARMV8_PMU_CYCLE_IDX)
> + if (select_idx != ARMV8_PMU_CYCLE_IDX)
Nit:Using 'pmc->idx' instead of 'select_idx' appears to be more consistent.
> counter = lower_32_bits(counter);
>
> return counter;
> @@ -218,7 +130,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
> */
> static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
> {
> - pmc = kvm_pmu_get_canonical_pmc(pmc);
> if (pmc->perf_event) {
> perf_event_disable(pmc->perf_event);
> perf_event_release_kernel(pmc->perf_event);
> @@ -236,11 +147,10 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
> {
> u64 counter, reg, val;
>
> - pmc = kvm_pmu_get_canonical_pmc(pmc);
> if (!pmc->perf_event)
> return;
>
> - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
>
> if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
> reg = PMCCNTR_EL0;
> @@ -252,9 +162,6 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
>
> __vcpu_sys_reg(vcpu, reg) = val;
>
> - if (kvm_pmu_pmc_is_chained(pmc))
> - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
> -
> kvm_pmu_release_perf_event(pmc);
> }
>
> @@ -285,8 +192,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
>
> for_each_set_bit(i, &mask, 32)
> kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
> -
> - bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
> }
>
> /**
> @@ -340,11 +245,8 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
>
> pmc = &pmu->pmc[i];
>
> - /* A change in the enable state may affect the chain state */
> - kvm_pmu_update_pmc_chained(vcpu, i);
> kvm_pmu_create_perf_event(vcpu, i);
>
> - /* At this point, pmc must be the canonical */
> if (pmc->perf_event) {
> perf_event_enable(pmc->perf_event);
> if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
> @@ -375,11 +277,8 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
>
> pmc = &pmu->pmc[i];
>
> - /* A change in the enable state may affect the chain state */
> - kvm_pmu_update_pmc_chained(vcpu, i);
> kvm_pmu_create_perf_event(vcpu, i);
Do we still need to call kvm_pmu_update_pmc_chained() here even
with this patch ? (I would think the reason why the function was
called here was because the chain state change could affect the
backed perf event attribute before).
I have the same comment for kvm_pmu_enable_counter_mask().
>
> - /* At this point, pmc must be the canonical */
> if (pmc->perf_event)
> perf_event_disable(pmc->perf_event);
> }
> @@ -484,6 +383,48 @@ static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
> kvm_vcpu_kick(vcpu);
> }
>
> +/*
> + * Perform an increment on any of the counters described in @mask,
> + * generating the overflow if required, and propagate it as a chained
> + * event if possible.
> + */
> +static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
> + unsigned long mask, u32 event)
> +{
> + int i;
> +
> + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
> + return;
> +
> + /* Weed out disabled counters */
> + mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> +
> + for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
> + u64 type, reg;
> +
> + /* Filter on event type */
> + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
> + type &= kvm_pmu_event_mask(vcpu->kvm);
> + if (type != event)
> + continue;
> +
> + /* Increment this counter */
> + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
> + reg = lower_32_bits(reg);
> + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> +
> + if (reg) /* No overflow? move on */
> + continue;
> +
> + /* Mark overflow */
> + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> +
> + if (kvm_pmu_counter_can_chain(vcpu, i))
> + kvm_pmu_counter_increment(vcpu, BIT(i + 1),
> + ARMV8_PMUV3_PERFCTR_CHAIN);
> + }
> +}
> +
> /**
> * When the perf event overflows, set the overflow status and inform the vcpu.
> */
> @@ -514,6 +455,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
>
> __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
>
> + if (kvm_pmu_counter_can_chain(vcpu, idx))
> + kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
> + ARMV8_PMUV3_PERFCTR_CHAIN);
> +
> if (kvm_pmu_overflow_status(vcpu)) {
> kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>
> @@ -533,50 +478,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
> */
> void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
> {
> - struct kvm_pmu *pmu = &vcpu->arch.pmu;
> - int i;
> -
> - if (!kvm_vcpu_has_pmu(vcpu))
> - return;
> -
> - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
> - return;
> -
> - /* Weed out disabled counters */
> - val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> -
> - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
> - u64 type, reg;
> -
> - if (!(val & BIT(i)))
> - continue;
> -
> - /* PMSWINC only applies to ... SW_INC! */
> - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
> - type &= kvm_pmu_event_mask(vcpu->kvm);
> - if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
> - continue;
> -
> - /* increment this even SW_INC counter */
> - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
> - reg = lower_32_bits(reg);
> - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> -
> - if (reg) /* no overflow on the low part */
> - continue;
> -
> - if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
> - /* increment the high counter */
> - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
> - reg = lower_32_bits(reg);
> - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
> - if (!reg) /* mark overflow on the high counter */
> - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
> - } else {
> - /* mark overflow on low counter */
> - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> - }
> - }
> + kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
> }
>
> /**
> @@ -625,18 +527,11 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> {
> struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
> struct kvm_pmu *pmu = &vcpu->arch.pmu;
> - struct kvm_pmc *pmc;
> + struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> struct perf_event *event;
> struct perf_event_attr attr;
> u64 eventsel, counter, reg, data;
>
> - /*
> - * For chained counters the event type and filtering attributes are
> - * obtained from the low/even counter. We also use this counter to
> - * determine if the event is enabled/disabled.
> - */
> - pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
> -
> reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
> data = __vcpu_sys_reg(vcpu, reg);
> @@ -647,8 +542,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> else
> eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
>
> - /* Software increment event doesn't need to be backed by a perf event */
> - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
> + /*
> + * Neither SW increment nor chained events need to be backed
> + * by a perf event.
> + */
> + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
> + eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
> return;
>
> /*
> @@ -670,30 +569,21 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> attr.exclude_host = 1; /* Don't count host events */
> attr.config = eventsel;
>
> - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> + counter = kvm_pmu_get_counter_value(vcpu, select_idx);
Nit: Since all existing codes in the function use pmc->idx,
I would think it would be better to use 'pmc->idx' instead of
'select_idx' consistently.
> - if (kvm_pmu_pmc_is_chained(pmc)) {
> - /**
> - * The initial sample period (overflow count) of an event. For
> - * chained counters we only support overflow interrupts on the
> - * high counter.
> - */
> + /*
> + * If counting with a 64bit counter, advertise it to the perf
> + * code, carefully dealing with the initial sample period.
> + */
> + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
I have the same comments on this as above.
Thank you,
Reiji
> + attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
> attr.sample_period = (-counter) & GENMASK(63, 0);
> - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
> -
> - event = perf_event_create_kernel_counter(&attr, -1, current,
> - kvm_pmu_perf_overflow,
> - pmc + 1);
> } else {
> - /* The initial sample period (overflow count) of an event. */
> - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
> - attr.sample_period = (-counter) & GENMASK(63, 0);
> - else
> - attr.sample_period = (-counter) & GENMASK(31, 0);
> + attr.sample_period = (-counter) & GENMASK(31, 0);
> + }
>
> - event = perf_event_create_kernel_counter(&attr, -1, current,
> + event = perf_event_create_kernel_counter(&attr, -1, current,
> kvm_pmu_perf_overflow, pmc);
> - }
>
> if (IS_ERR(event)) {
> pr_err_once("kvm: pmu event creation failed %ld\n",
> @@ -704,41 +594,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> pmc->perf_event = event;
> }
>
> -/**
> - * kvm_pmu_update_pmc_chained - update chained bitmap
> - * @vcpu: The vcpu pointer
> - * @select_idx: The number of selected counter
> - *
> - * Update the chained bitmap based on the event type written in the
> - * typer register and the enable state of the odd register.
> - */
> -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
> -{
> - struct kvm_pmu *pmu = &vcpu->arch.pmu;
> - struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
> - bool new_state, old_state;
> -
> - old_state = kvm_pmu_pmc_is_chained(pmc);
> - new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> - kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
> -
> - if (old_state == new_state)
> - return;
> -
> - canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
> - kvm_pmu_stop_counter(vcpu, canonical_pmc);
> - if (new_state) {
> - /*
> - * During promotion from !chained to chained we must ensure
> - * the adjacent counter is stopped and its event destroyed
> - */
> - kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
> - set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> - return;
> - }
> - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> -}
> -
> /**
> * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
> * @vcpu: The vcpu pointer
> @@ -766,7 +621,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
>
> __vcpu_sys_reg(vcpu, reg) = data & mask;
>
> - kvm_pmu_update_pmc_chained(vcpu, select_idx);
> kvm_pmu_create_perf_event(vcpu, select_idx);
> }
>
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index c0b868ce6a8f..96b192139a23 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -11,7 +11,6 @@
> #include <asm/perf_event.h>
>
> #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
> -#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
>
> #ifdef CONFIG_HW_PERF_EVENTS
>
> @@ -29,7 +28,6 @@ struct kvm_pmu {
> struct irq_work overflow_work;
> struct kvm_pmu_events events;
> struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
> - DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
> int irq_num;
> bool created;
> bool irq_level;
> --
> 2.34.1
>
More information about the linux-arm-kernel
mailing list