[PATCH 10/43] KVM: arm64: gic-v5: Implement VPE IRS MMIO Ops
Marc Zyngier
maz at kernel.org
Thu Apr 30 01:46:17 PDT 2026
On Mon, 27 Apr 2026 17:09:27 +0100,
Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
>
> Introduce interfaces to make VPEs valid, and to configure them, via
> the host's IRS. As with the other valid bits in the GICv5 VM tables,
> VPEs cannot be made valid directly, and instead are made valid via an
> IRS MMIO Op.
>
> Additionally, some of the VPE configuration takes place via the IRS
> MMIO interface too (via the IRS_VPE_CR0, IRS_VPE_DBR). VPE doorbells
> are, for example, configured via this interface.
>
> The existing VPE-doorbell-based commands are extended with:
>
> VPE_MAKE_VALID - Make the VPE valid in the VPET
> VPE_CR0_READ - Handle a guest read from IRS_PE_CR0
> VPE_CR0_WRITE - Handle a guest write to IRS_PE_CR0
>
> Note: There is no VPE_MAKE_INVALID as VPEs are only made invalid on
> teardown, at which point the whole VMTE is marked as invalid. Hence,
> it is not required.
>
> Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> ---
> arch/arm64/kvm/vgic/vgic-v5.c | 164 +++++++++++++++++++++++++++++
> include/linux/irqchip/arm-gic-v5.h | 27 +++++
> 2 files changed, 191 insertions(+)
>
> diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
> index 49eb01ca07961..0649729f6b834 100644
> --- a/arch/arm64/kvm/vgic/vgic-v5.c
> +++ b/arch/arm64/kvm/vgic/vgic-v5.c
> @@ -253,6 +253,25 @@ static int vgic_v5_irs_wait_for_vm_op(void)
> return 0;
> }
>
> +/* Wait for completion of an VPE_STATUSR change */
> +static int vgic_v5_irs_wait_for_vpe_op(void)
> +{
> + int ret;
> + u32 statusr;
> +
> + ret = readl_relaxed_poll_timeout_atomic(
> + irs_base + GICV5_IRS_VPE_STATUSR, statusr,
> + FIELD_GET(GICV5_IRS_VPE_STATUSR_IDLE, statusr), 1,
> + USEC_PER_SEC);
Formatting.
> +
> + if (ret == -ETIMEDOUT) {
> + pr_err_ratelimited("Time out waiting for IRS VPE Op\n");
> + return ret;
> + }
> +
> + return 0;
You seem to have a number of these primitives. Consider having a
generic helper that takes the required parameters, including a partial
string in case of error.
> +}
> +
> static int vgic_v5_irs_assign_vmt(bool two_level, u8 vm_id_bits, phys_addr_t vmt_base)
> {
> u64 vmt_baser;
> @@ -369,10 +388,142 @@ static int vgic_v5_irs_set_vist_invalid(int vm_id, bool spi_ist)
> return __vgic_v5_irs_update_vist_validity(vm_id, spi_ist, true);
> }
>
> +static int vgic_v5_irs_set_up_vpe(int vm_id, int vpe_id, irq_hw_number_t db_hwirq)
> +{
> + u64 vmap_vper, dbr, selr;
> + u32 statusr, cr0;
> + int ret;
> +
> + guard(raw_spinlock)(&vm_config_lock);
> +
> + /* Make sure that we are idle to begin with */
> + ret = vgic_v5_irs_wait_for_vm_op();
> + if (ret)
> + return ret;
> +
> + /* Mark the VPE as valid */
> + vmap_vper = FIELD_PREP(GICV5_IRS_VMAP_VPER_VPE_ID, vpe_id) |
> + FIELD_PREP(GICV5_IRS_VMAP_VPER_VM_ID, vm_id) |
> + FIELD_PREP(GICV5_IRS_VMAP_VPER_M, true);
That's another of these single bit mask used with FIELD_PREP. Consider
rewriting it as:
vmap_vper = FIELD_PREP(GICV5_IRS_VMAP_VPER_VPE_ID, vpe_id) |
FIELD_PREP(GICV5_IRS_VMAP_VPER_VM_ID, vm_id) |
GICV5_IRS_VMAP_VPER_M;
> + irs_writeq_relaxed(vmap_vper, GICV5_IRS_VMAP_VPER);
> +
> + /* Wait for the VPE to be marked valid in the VPET */
> + ret = vgic_v5_irs_wait_for_vm_op();
> + if (ret)
> + return ret;
> +
> + selr = FIELD_PREP(GICV5_IRS_VPE_SELR_VPE_ID, vpe_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_VM_ID, vm_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_S, true);
> + irs_writeq_relaxed(selr, GICV5_IRS_VPE_SELR);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (!FIELD_GET(GICV5_IRS_VPE_STATUSR_V, statusr))
> + return -EINVAL;
> +
> + /* Set targeted only routing (disable 1ofN vPE selection) */
> + cr0 = FIELD_PREP(GICV5_IRS_VPE_CR0_DPS, true);
> + irs_writel_relaxed(cr0, GICV5_IRS_VPE_CR0);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (FIELD_GET(GICV5_IRS_VPE_STATUSR_F, statusr))
> + ret = -EINVAL;
> +
> + /*
> + * The VPE has not yet run. Therefore, make sure that all interrupts
> + * will generate a doorbell.
> + */
> + dbr = FIELD_PREP(GICV5_IRS_VPE_DBR_LPI_ID, db_hwirq) |
> + FIELD_PREP(GICV5_IRS_VPE_DBR_DBPM, 0b11111) |
> + FIELD_PREP(GICV5_IRS_VPE_DBR_REQ_DB, false) |
And anything that set to false can be removed altogether.
> + FIELD_PREP(GICV5_IRS_VPE_DBR_DBV, true);
> + irs_writeq_relaxed(dbr, GICV5_IRS_VPE_DBR);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (FIELD_GET(GICV5_IRS_VPE_STATUSR_F, statusr))
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static int vgic_v5_irs_vpe_cr0_read(int vm_id, int vpe_id, u64 *cr0)
> +{
> + u32 statusr;
> + u64 selr;
> + int ret;
> +
> + guard(raw_spinlock)(&vm_config_lock);
> +
> + selr = FIELD_PREP(GICV5_IRS_VPE_SELR_VPE_ID, vpe_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_VM_ID, vm_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_S, true);
> + irs_writeq_relaxed(selr, GICV5_IRS_VPE_SELR);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (!FIELD_GET(GICV5_IRS_VPE_STATUSR_V, statusr))
> + return -EINVAL;
> +
> + *cr0 = irs_readl_relaxed(GICV5_IRS_VPE_CR0);
> +
> + return 0;
I'd rather this function returned the CR0 value directly, even if the
IDLE bit isn't set. You can have a WARN_ONCE() if you want.
> +}
> +
> +static int vgic_v5_irs_vpe_cr0_update(int vm_id, int vpe_id, u32 cr0)
*_write() would be better than *_update() when you already have
*_read(). Specially as a consequence of VPE_CR0_WRITE.
> +{
> + u32 statusr;
> + u64 selr;
> + int ret;
> +
> + guard(raw_spinlock)(&vm_config_lock);
> +
> + selr = FIELD_PREP(GICV5_IRS_VPE_SELR_VPE_ID, vpe_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_VM_ID, vm_id) |
> + FIELD_PREP(GICV5_IRS_VPE_SELR_S, true);
> + irs_writeq_relaxed(selr, GICV5_IRS_VPE_SELR);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (!FIELD_GET(GICV5_IRS_VPE_STATUSR_V, statusr))
> + return ret;
return 0? But you have set SELR to something. Surely reading V==0
here is an indication of a bug. So should you report the error? Warn?
> +
> + irs_writel_relaxed(cr0, GICV5_IRS_VPE_CR0);
> +
> + ret = vgic_v5_irs_wait_for_vpe_op();
> + if (ret)
> + return ret;
> +
> + statusr = irs_readl_relaxed(GICV5_IRS_VPE_STATUSR);
> + if (FIELD_GET(GICV5_IRS_VPE_STATUSR_F, statusr))
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> static int vgic_v5_db_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
> {
> struct vgic_v5_vm *vm = data->domain->host_data;
> struct gicv5_cmd_info *cmd_info = vcpu_info;
> + /* Our VPE ID is the index within the doorbell domain */
> + u16 vpe_id = data->hwirq;
>
> switch (cmd_info->cmd_type) {
> case VMT_L2_MAP:
> @@ -381,6 +532,19 @@ static int vgic_v5_db_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
> return vgic_v5_irs_set_vm_valid(vm->vm_id);
> case VMTE_MAKE_INVALID:
> return vgic_v5_irs_set_vm_invalid(vm->vm_id);
> + case VPE_MAKE_VALID:
> + /*
> + * We need the actual LPI ID which lives in the top-most parent
> + * domain. This hwirq won't include the type (LPI) but that's
> + * not required for the IRS_VPE_DBR.
> + */
> + while (data->parent_data != NULL)
> + data = data->parent_data;
> + return vgic_v5_irs_set_up_vpe(vm->vm_id, vpe_id, data->hwirq);
> + case VPE_CR0_READ:
> + return vgic_v5_irs_vpe_cr0_read(vm->vm_id, vpe_id, &cmd_info->data);
> + case VPE_CR0_WRITE:
> + return vgic_v5_irs_vpe_cr0_update(vm->vm_id, vpe_id, cmd_info->data);
> case SPI_VIST_MAKE_VALID:
> return vgic_v5_irs_set_vist_valid(vm->vm_id, true);
> case LPI_VIST_MAKE_VALID:
> diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
> index ff5ad653252d2..54b573783cd75 100644
> --- a/include/linux/irqchip/arm-gic-v5.h
> +++ b/include/linux/irqchip/arm-gic-v5.h
> @@ -90,9 +90,14 @@
> #define GICV5_IRS_VMT_BASER 0x0200
> #define GICV5_IRS_VMT_CFGR 0x0210
> #define GICV5_IRS_VMT_STATUSR 0x0214
> +#define GICV5_IRS_VPE_SELR 0x0240
> +#define GICV5_IRS_VPE_DBR 0x0248
> +#define GICV5_IRS_VPE_CR0 0x0258
> +#define GICV5_IRS_VPE_STATUSR 0x025c
> #define GICV5_IRS_VMAP_L2_VMTR 0x02c0
> #define GICV5_IRS_VMAP_VMR 0x02c8
> #define GICV5_IRS_VMAP_VISTR 0x02d0
> +#define GICV5_IRS_VMAP_VPER 0x02e0
>
> #define GICV5_IRS_IDR0_VIRT BIT(6)
>
> @@ -199,6 +204,21 @@
>
> #define GICV5_IRS_VMT_STATUSR_IDLE BIT(0)
>
> +#define GICV5_IRS_VPE_SELR_S BIT_ULL(63)
> +#define GICV5_IRS_VPE_SELR_VPE_ID GENMASK_ULL(47, 32)
> +#define GICV5_IRS_VPE_SELR_VM_ID GENMASK_ULL(15, 0)
> +
> +#define GICV5_IRS_VPE_DBR_DBV BIT_ULL(63)
> +#define GICV5_IRS_VPE_DBR_REQ_DB BIT_ULL(62)
> +#define GICV5_IRS_VPE_DBR_DBPM GENMASK_ULL(36, 32)
> +#define GICV5_IRS_VPE_DBR_LPI_ID GENMASK_ULL(23, 0)
> +
> +#define GICV5_IRS_VPE_CR0_DPS BIT(0)
> +
> +#define GICV5_IRS_VPE_STATUSR_F BIT(2)
> +#define GICV5_IRS_VPE_STATUSR_V BIT(1)
> +#define GICV5_IRS_VPE_STATUSR_IDLE BIT(0)
> +
> #define GICV5_IRS_VMAP_L2_VMTR_M BIT_ULL(63)
> #define GICV5_IRS_VMAP_L2_VMTR_VM_ID GENMASK_ULL(15, 0)
>
> @@ -211,6 +231,10 @@
> #define GICV5_IRS_VMAP_VISTR_VM_ID GENMASK_ULL(47, 32)
> #define GICV5_IRS_VMAP_VISTR_TYPE GENMASK_ULL(31, 29)
>
> +#define GICV5_IRS_VMAP_VPER_M BIT_ULL(63)
> +#define GICV5_IRS_VMAP_VPER_VM_ID GENMASK_ULL(47, 32)
> +#define GICV5_IRS_VMAP_VPER_VPE_ID GENMASK_ULL(15, 0)
> +
> #define GICV5_ISTL1E_VALID BIT_ULL(0)
> #define GICV5_IRS_ISTL1E_SIZE 8UL
>
> @@ -480,6 +504,9 @@ enum gicv5_vcpu_info_cmd_type {
> VMT_L2_MAP, /* Map in a L2 VMT - *may* happen on VM init */
> VMTE_MAKE_VALID, /* Make the VMTE valid */
> VMTE_MAKE_INVALID, /* Make the VMTE (et al.) invalid */
> + VPE_MAKE_VALID, /* No corresponding invalid */
> + VPE_CR0_READ, /* Read of VPE_CR0 (guest read from PE_CR0) */
> + VPE_CR0_WRITE, /* Write to VPE_CR0 (guest write to PE_CR0) */
> SPI_VIST_MAKE_VALID, /* No corresponding invalid */
> LPI_VIST_MAKE_VALID, /* Triggered by a guest */
> LPI_VIST_MAKE_INVALID, /* Triggered by a guest */
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list