[PATCH 22/23] KVM: arm64: Add a rVIC/rVID in-kernel implementation
Marc Zyngier
maz at kernel.org
Thu Sep 3 11:26:09 EDT 2020
The rVIC (reduced Virtual Interrupt Controller), and its rVID
(reduced Virtual Interrupt Distributor) companion are the two
parts of a PV interrupt controller architecture, aiming at supporting
VMs with minimal interrupt requirements.
Signed-off-by: Marc Zyngier <maz at kernel.org>
---
arch/arm64/include/asm/kvm_host.h | 7 +-
arch/arm64/include/asm/kvm_irq.h | 2 +
arch/arm64/include/uapi/asm/kvm.h | 9 +
arch/arm64/kvm/Makefile | 2 +-
arch/arm64/kvm/arm.c | 3 +
arch/arm64/kvm/hypercalls.c | 7 +
arch/arm64/kvm/rvic-cpu.c | 1073 +++++++++++++++++++++++++++++
include/kvm/arm_rvic.h | 41 ++
include/linux/irqchip/irq-rvic.h | 4 +
include/uapi/linux/kvm.h | 2 +
10 files changed, 1148 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/kvm/rvic-cpu.c
create mode 100644 include/kvm/arm_rvic.h
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5dd92873d40f..381d3ff6e0b7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
#include <kvm/arm_pmu.h>
+#include <kvm/arm_rvic.h>
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
@@ -102,6 +103,7 @@ struct kvm_arch {
enum kvm_irqchip_type irqchip_type;
bool irqchip_finalized;
struct kvm_irqchip_flow irqchip_flow;
+ void *irqchip_data;
struct vgic_dist vgic;
/* Mandated version of PSCI */
@@ -324,7 +326,10 @@ struct kvm_vcpu_arch {
} host_debug_state;
/* VGIC state */
- struct vgic_cpu vgic_cpu;
+ union {
+ struct vgic_cpu vgic_cpu;
+ struct rvic rvic;
+ };
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
diff --git a/arch/arm64/include/asm/kvm_irq.h b/arch/arm64/include/asm/kvm_irq.h
index 05fbe5241642..bb1666093f80 100644
--- a/arch/arm64/include/asm/kvm_irq.h
+++ b/arch/arm64/include/asm/kvm_irq.h
@@ -11,11 +11,13 @@ enum kvm_irqchip_type {
IRQCHIP_USER, /* Implemented in userspace */
IRQCHIP_GICv2, /* v2 on v2, or v2 on v3 */
IRQCHIP_GICv3, /* v3 on v3 */
+ IRQCHIP_RVIC, /* PV irqchip */
};
#define irqchip_in_kernel(k) ((k)->arch.irqchip_type != IRQCHIP_USER)
#define irqchip_is_gic_v2(k) ((k)->arch.irqchip_type == IRQCHIP_GICv2)
#define irqchip_is_gic_v3(k) ((k)->arch.irqchip_type == IRQCHIP_GICv3)
+#define irqchip_is_rvic(k) ((k)->arch.irqchip_type == IRQCHIP_RVIC)
#define irqchip_finalized(k) ((k)->arch.irqchip_finalized)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..9fc26c84903f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -335,6 +335,15 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
+/*
+ * Device Control API: ARM RVIC. We only use the group, not the group
+ * attributes. They must be set to 0 for now.
+ */
+#define KVM_DEV_ARM_RVIC_GRP_NR_IRQS 0
+#define KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK 0xffff
+#define KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK (0xffff << 16)
+#define KVM_DEV_ARM_RVIC_GRP_INIT 1
+
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
#define KVM_ARM_IRQ_VCPU2_MASK 0xf
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 99977c1972cc..e378293ce99b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
- aarch32.o arch_timer.o \
+ aarch32.o arch_timer.o rvic-cpu.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 0d4c8de27d1e..bf0b11bdce84 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -41,6 +41,7 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
@@ -1402,6 +1403,8 @@ static int init_subsystems(void)
switch (err) {
case 0:
vgic_present = true;
+ if (kvm_register_rvic_device())
+ kvm_err("Failed to register rvic device type\n");
break;
case -ENODEV:
case -ENXIO:
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 550dfa3e53cd..f6620be74ce5 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -8,6 +8,9 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
{
@@ -62,6 +65,10 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
if (gpa != GPA_INVALID)
val = gpa;
break;
+ case SMC64_RVIC_BASE ... SMC64_RVIC_LAST:
+ return kvm_rvic_handle_hcall(vcpu);
+ case SMC64_RVID_BASE ... SMC64_RVID_LAST:
+ return kvm_rvid_handle_hcall(vcpu);
default:
return kvm_psci_call(vcpu);
}
diff --git a/arch/arm64/kvm/rvic-cpu.c b/arch/arm64/kvm/rvic-cpu.c
new file mode 100644
index 000000000000..5fb200c637d9
--- /dev/null
+++ b/arch/arm64/kvm/rvic-cpu.c
@@ -0,0 +1,1073 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <maz at kernel.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
+
+/* FIXME: lock/unlock_all_vcpus */
+#include "vgic/vgic.h"
+
+#define kvm_vcpu_to_rvic(v) (&(v)->arch.rvic)
+#define kvm_rvic_to_vcpu(r) (container_of((r), struct kvm_vcpu, arch.rvic))
+
+#define rvic_nr_untrusted(r) ((r)->nr_total - (r)->nr_trusted)
+
+struct rvic_vm_data {
+ u16 nr_trusted;
+ u16 nr_total;
+ spinlock_t lock;
+ /* Map is a dynamically allocated array of (total-trusted) elements */
+ struct {
+ u16 target_vcpu;
+ u16 intid;
+ } rvid_map[];
+};
+
+/*
+ * rvic_irq state machine:
+ *
+ * idle <- S/C -> pending
+ * ^ / ^
+ * | / |
+ * U/M A U/M
+ * | / |
+ * v v V
+ * masked <- S/C -> masked+pending
+ *
+ * [S]: Set Pending, [C]: Clear Pending
+ * [U]: Unmask, [M]: Mask
+ * [A]: Ack
+ */
+
+static struct rvic_irq *rvic_get_irq(struct rvic *rvic, unsigned int intid)
+{
+ if (intid >= rvic->nr_total)
+ return NULL;
+ return &rvic->irqs[intid];
+}
+
+static bool rvic_irq_queued(struct rvic_irq *irq)
+{
+ return !list_empty(&irq->delivery_entry);
+}
+
+/* RVIC primitives. They all imply that the RVIC lock is held */
+static void __rvic_enable(struct rvic *rvic)
+{
+ rvic->enabled = true;
+}
+
+static void __rvic_disable(struct rvic *rvic)
+{
+ rvic->enabled = false;
+}
+
+static bool __rvic_is_enabled(struct rvic *rvic)
+{
+ return rvic->enabled;
+}
+
+static void __rvic_set_pending(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ if (!__rvic_is_enabled(rvic)) {
+ pr_debug("dropping intid %u\n", intid);
+ return;
+ }
+
+ spin_lock_irqsave(&irq->lock, flags);
+
+ irq->pending = true;
+ if (!irq->masked && !rvic_irq_queued(irq))
+ list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+ spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_pending(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq->lock, flags);
+
+ irq->pending = false;
+ list_del_init(&irq->delivery_entry);
+
+ spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static bool __rvic_is_pending(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+ bool pend;
+
+ spin_lock_irqsave(&irq->lock, flags);
+ pend = irq->pending;
+ spin_unlock_irqrestore(&irq->lock, flags);
+
+ return pend;
+}
+
+static void __rvic_set_masked(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq->lock, flags);
+
+ irq->masked = true;
+ if (irq->pending)
+ list_del_init(&irq->delivery_entry);
+
+ spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_masked(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq->lock, flags);
+
+ irq->masked = false;
+ if (__rvic_is_enabled(rvic) && irq->pending && !rvic_irq_queued(irq))
+ list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+ spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static unsigned int __rvic_ack(struct rvic *rvic)
+{
+ unsigned int intid = ~0U;
+ struct rvic_irq *irq;
+
+ if (!__rvic_is_enabled(rvic))
+ return intid;
+
+ irq = list_first_entry_or_null(&rvic->delivery, struct rvic_irq,
+ delivery_entry);
+ if (irq) {
+ intid = irq->intid;
+ __rvic_set_masked(rvic, intid);
+ __rvic_clear_pending(rvic, intid);
+ }
+
+ return intid;
+}
+
+static bool __rvic_can_signal(struct rvic *rvic)
+{
+ return __rvic_is_enabled(rvic) && !list_empty(&rvic->delivery);
+}
+
+static void __rvic_resample(struct rvic *rvic, unsigned int intid)
+{
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+ bool pending;
+
+ spin_lock_irqsave(&irq->lock, flags);
+ if (irq->get_line_level) {
+ pending = irq->get_line_level(irq->intid);
+
+ /*
+ * As part of the resampling, tickle the GIC so that
+ * new interrupts can trickle in.
+ */
+ if (!pending && irq->host_irq)
+ irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_ACTIVE, false);
+ } else {
+ pending = irq->line_level;
+ }
+
+ spin_unlock_irqrestore(&irq->lock, flags);
+
+ if (pending)
+ __rvic_set_pending(rvic, intid);
+}
+
+/*
+ * rVIC hypercall handling. All functions assume they are being called
+ * from the vcpu thread that triggers the hypercall.
+ */
+static void __rvic_kick_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
+ bool was_signaling)
+{
+ struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
+ bool signal = __rvic_can_signal(rvic);
+
+ /* We're hitting our own rVIC: update HCR_VI locally */
+ if (vcpu == target) {
+ if (signal)
+ *vcpu_hcr(vcpu) |= HCR_VI;
+ else
+ *vcpu_hcr(vcpu) &= ~HCR_VI;
+
+ return;
+ }
+
+ /*
+ * Remote rVIC case:
+ *
+ * We kick even if the interrupt disappears, as ISR_EL1.I must
+ * always reflect the state of the rVIC. This forces a reload
+ * of the vcpu state, making it consistent.
+ *
+ * This avoids modifying the target's own copy of HCR_EL2, as
+ * we are in a cross-vcpu call, and changing it from under its
+ * feet is dodgy.
+ */
+ if (was_signaling != signal)
+ __rvic_kick_vcpu(target);
+}
+
+static void rvic_version(struct kvm_vcpu *vcpu)
+{
+ /* ALP0.3 is the name of the game */
+ smccc_set_retval(vcpu, RVIC_STATUS_SUCCESS, RVIC_VERSION(0, 3), 0, 0);
+}
+
+static void rvic_info(struct kvm_vcpu *vcpu)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long what = smccc_get_arg1(vcpu);
+ unsigned long a0, a1;
+
+ switch (what) {
+ case RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS:
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+ a1 = rvic->nr_trusted;
+ break;
+ case RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS:
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+ a1 = rvic_nr_untrusted(rvic);
+ break;
+ default:
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+ a1 = 0;
+ break;
+ }
+
+ smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_enable(struct kvm_vcpu *vcpu)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long flags;
+ bool was_signaling;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ was_signaling = __rvic_can_signal(rvic);
+ __rvic_enable(rvic);
+ __rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+ 0, 0, 0);
+}
+
+static void rvic_disable(struct kvm_vcpu *vcpu)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long flags;
+ bool was_signaling;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ was_signaling = __rvic_can_signal(rvic);
+ __rvic_disable(rvic);
+ __rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+ 0, 0, 0);
+}
+
+typedef void (*rvic_action_fn_t)(struct rvic *, unsigned int);
+
+static int validate_rvic_call(struct kvm_vcpu *vcpu, struct rvic **rvicp,
+ unsigned int *intidp)
+{
+ unsigned long mpidr = smccc_get_arg1(vcpu);
+ unsigned int intid = smccc_get_arg2(vcpu);
+ struct kvm_vcpu *target;
+ struct rvic *rvic;
+
+ /* FIXME: The spec distinguishes between invalid MPIDR and invalid CPU */
+
+ target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+ if (!target) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_INVALID_CPU, 0),
+ 0, 0, 0);
+ return -1;
+ }
+
+ rvic = kvm_vcpu_to_rvic(target);
+ if (intid >= rvic->nr_total) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 1),
+ 0, 0, 0);
+ return -1;
+ }
+
+ *rvicp = rvic;
+ *intidp = intid;
+
+ return 0;
+}
+
+static void __rvic_action(struct kvm_vcpu *vcpu, rvic_action_fn_t action,
+ bool check_enabled)
+{
+ struct rvic *rvic;
+ unsigned long a0;
+ unsigned long flags;
+ int intid;
+
+ if (validate_rvic_call(vcpu, &rvic, &intid))
+ return;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ if (unlikely(check_enabled && !__rvic_is_enabled(rvic))) {
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+ } else {
+ bool was_signaling = __rvic_can_signal(rvic);
+ action(rvic, intid);
+ __rvic_sync_hcr(vcpu, rvic, was_signaling);
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+ }
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+static void rvic_set_masked(struct kvm_vcpu *vcpu)
+{
+ __rvic_action(vcpu, __rvic_set_masked, false);
+}
+
+static void rvic_clear_masked(struct kvm_vcpu *vcpu)
+{
+ __rvic_action(vcpu, __rvic_clear_masked, false);
+}
+
+static void rvic_clear_pending(struct kvm_vcpu *vcpu)
+{
+ __rvic_action(vcpu, __rvic_clear_pending, false);
+}
+
+static void rvic_signal(struct kvm_vcpu *vcpu)
+{
+ __rvic_action(vcpu, __rvic_set_pending, true);
+}
+
+static void rvic_is_pending(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+ struct rvic *rvic;
+ int intid;
+ bool res;
+
+ if (validate_rvic_call(vcpu, &rvic, &intid))
+ return;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ res = __rvic_is_pending(rvic, intid);
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+ res, 0, 0);
+}
+
+/*
+ * Ack and Resample are the only "interesting" operations that are
+ * strictly per-CPU.
+ */
+static void rvic_acknowledge(struct kvm_vcpu *vcpu)
+{
+ unsigned long a0, a1;
+ unsigned long flags;
+ unsigned int intid;
+ struct rvic *rvic;
+
+ rvic = kvm_vcpu_to_rvic(vcpu);
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ if (unlikely(!__rvic_is_enabled(rvic))) {
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+ a1 = 0;
+ } else {
+ intid = __rvic_ack(rvic);
+ __rvic_sync_hcr(vcpu, rvic, true);
+ if (unlikely(intid >= rvic->nr_total)) {
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_NO_INTERRUPTS, 0);
+ a1 = 0;
+ } else {
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+ a1 = intid;
+ }
+ }
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_resample(struct kvm_vcpu *vcpu)
+{
+ unsigned int intid = smccc_get_arg1(vcpu);
+ unsigned long flags;
+ unsigned long a0;
+ struct rvic *rvic;
+
+ rvic = kvm_vcpu_to_rvic(vcpu);
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ if (unlikely(intid >= rvic->nr_trusted)) {
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+ } else {
+ __rvic_resample(rvic, intid);
+
+ /*
+ * Don't bother finding out if we were signalling, we
+ * will update HCR_EL2 anyway as we are guaranteed not
+ * to be in a cross-call.
+ */
+ __rvic_sync_hcr(vcpu, rvic, true);
+ a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+ }
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu)
+{
+ pr_debug("RVIC: HC %08x", (unsigned int)smccc_get_function(vcpu));
+ switch (smccc_get_function(vcpu)) {
+ case SMC64_RVIC_VERSION:
+ rvic_version(vcpu);
+ break;
+ case SMC64_RVIC_INFO:
+ rvic_info(vcpu);
+ break;
+ case SMC64_RVIC_ENABLE:
+ rvic_enable(vcpu);
+ break;
+ case SMC64_RVIC_DISABLE:
+ rvic_disable(vcpu);
+ break;
+ case SMC64_RVIC_SET_MASKED:
+ rvic_set_masked(vcpu);
+ break;
+ case SMC64_RVIC_CLEAR_MASKED:
+ rvic_clear_masked(vcpu);
+ break;
+ case SMC64_RVIC_IS_PENDING:
+ rvic_is_pending(vcpu);
+ break;
+ case SMC64_RVIC_SIGNAL:
+ rvic_signal(vcpu);
+ break;
+ case SMC64_RVIC_CLEAR_PENDING:
+ rvic_clear_pending(vcpu);
+ break;
+ case SMC64_RVIC_ACKNOWLEDGE:
+ rvic_acknowledge(vcpu);
+ break;
+ case SMC64_RVIC_RESAMPLE:
+ rvic_resample(vcpu);
+ break;
+ default:
+ smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+ break;
+ }
+
+ return 1;
+}
+
+static void rvid_version(struct kvm_vcpu *vcpu)
+{
+ /* ALP0.3 is the name of the game */
+ smccc_set_retval(vcpu, RVID_STATUS_SUCCESS, RVID_VERSION(0, 3), 0, 0);
+}
+
+static void rvid_map(struct kvm_vcpu *vcpu)
+{
+ unsigned long input = smccc_get_arg1(vcpu);
+ unsigned long mpidr = smccc_get_arg2(vcpu);
+ unsigned int intid = smccc_get_arg3(vcpu);
+ unsigned long flags;
+ struct rvic_vm_data *data;
+ struct kvm_vcpu *target;
+
+ data = vcpu->kvm->arch.irqchip_data;
+
+ if (input > rvic_nr_untrusted(data)) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+ 0, 0, 0);
+ return;
+ }
+
+ /* FIXME: different error from RVIC. Why? */
+ target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+ if (!target) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 1),
+ 0, 0, 0);
+ return;
+ }
+
+ if (intid < data->nr_trusted || intid >= data->nr_total) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 2),
+ 0, 0, 0);
+ return;
+ }
+
+ spin_lock_irqsave(&data->lock, flags);
+ data->rvid_map[input].target_vcpu = target->vcpu_id;
+ data->rvid_map[input].intid = intid;
+ spin_unlock_irqrestore(&data->lock, flags);
+
+ smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+static void rvid_unmap(struct kvm_vcpu *vcpu)
+{
+ unsigned long input = smccc_get_arg1(vcpu);
+ unsigned long flags;
+ struct rvic_vm_data *data;
+
+ data = vcpu->kvm->arch.irqchip_data;
+
+ if (input > rvic_nr_untrusted(data)) {
+ smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+ 0, 0, 0);
+ return;
+ }
+
+ spin_lock_irqsave(&data->lock, flags);
+ data->rvid_map[input].target_vcpu = 0;
+ data->rvid_map[input].intid = 0;
+ spin_unlock_irqrestore(&data->lock, flags);
+
+ smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu)
+{
+ pr_debug("RVID: HC %08x", (unsigned int)smccc_get_function(vcpu));
+ switch (smccc_get_function(vcpu)) {
+ case SMC64_RVID_VERSION:
+ rvid_version(vcpu);
+ break;
+ case SMC64_RVID_MAP:
+ rvid_map(vcpu);
+ break;
+ case SMC64_RVID_UNMAP:
+ rvid_unmap(vcpu);
+ break;
+ default:
+ smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+ break;
+ }
+
+ return 1;
+}
+
+/*
+ * KVM internal interface to the rVIC
+ */
+
+/* This *must* be called from the vcpu thread */
+static void rvic_flush_signaling_state(struct kvm_vcpu *vcpu)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ __rvic_sync_hcr(vcpu, rvic, true);
+
+ spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+/* This can be called from any context */
+static void rvic_vcpu_inject_irq(struct kvm_vcpu *vcpu, unsigned int intid,
+ bool level)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long flags;
+ bool prev;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+
+ if (WARN_ON(intid >= rvic->nr_total))
+ goto out;
+
+ /*
+ * Although really ugly, this should be safe as we hold the
+ * rvic lock, and the only path that uses this information is
+ * resample, which takes this lock too.
+ */
+ if (!rvic->irqs[intid].get_line_level)
+ rvic->irqs[intid].line_level = level;
+
+ if (level) {
+ prev = __rvic_can_signal(rvic);
+ __rvic_set_pending(rvic, intid);
+ if (prev != __rvic_can_signal(rvic))
+ __rvic_kick_vcpu(vcpu);
+ }
+out:
+ spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+static int rvic_inject_irq(struct kvm *kvm, unsigned int cpu,
+ unsigned int intid, bool level, void *owner)
+{
+ struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
+ struct rvic *rvic;
+
+ if (unlikely(!vcpu))
+ return -EINVAL;
+
+ rvic = kvm_vcpu_to_rvic(vcpu);
+ if (unlikely(intid >= rvic->nr_total))
+ return -EINVAL;
+
+ /* Ignore interrupt owner for now */
+ rvic_vcpu_inject_irq(vcpu, intid, level);
+ return 0;
+}
+
+static int rvic_inject_userspace_irq(struct kvm *kvm, unsigned int type,
+ unsigned int cpu,
+ unsigned int intid, bool level)
+{
+ struct rvic_vm_data *data = kvm->arch.irqchip_data;
+ unsigned long flags;
+ u16 output;
+
+ switch (type) {
+ case KVM_ARM_IRQ_TYPE_SPI:
+ /*
+ * Userspace can only inject interrupts that are
+ * translated by the rvid, so the cpu parameter is
+ * irrelevant and we override it when resolving the
+ * translation.
+ */
+ if (intid >= rvic_nr_untrusted(data))
+ return -EINVAL;
+
+ spin_lock_irqsave(&data->lock, flags);
+ output = data->rvid_map[intid].intid;
+ cpu = data->rvid_map[intid].target_vcpu;
+ spin_unlock_irqrestore(&data->lock, flags);
+
+ /* Silently ignore unmapped interrupts */
+ if (output < data->nr_trusted)
+ return 0;
+
+ return rvic_inject_irq(kvm, cpu, output, level, NULL);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int rvic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct rvic_vm_data *data = vcpu->kvm->arch.irqchip_data;
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ int i;
+
+ /* irqchip not ready yet, we will come back later */
+ if (!data)
+ return 0;
+
+ if (WARN_ON(rvic->irqs))
+ return -EINVAL;
+
+ spin_lock_init(&rvic->lock);
+ INIT_LIST_HEAD(&rvic->delivery);
+ rvic->nr_trusted = data->nr_trusted;
+ rvic->nr_total = data->nr_total;
+ rvic->enabled = false;
+
+ rvic->irqs = kcalloc(rvic->nr_total, sizeof(*rvic->irqs), GFP_ATOMIC);
+ if (!rvic->irqs)
+ return -ENOMEM;
+
+ for (i = 0; i < rvic->nr_total; i++) {
+ struct rvic_irq *irq = &rvic->irqs[i];
+
+ spin_lock_init(&irq->lock);
+ INIT_LIST_HEAD(&irq->delivery_entry);
+ irq->get_line_level = NULL;
+ irq->intid = i;
+ irq->host_irq = 0;
+ irq->pending = false;
+ irq->masked = true;
+ irq->line_level = false;
+ }
+
+ return 0;
+}
+
+static void rvic_destroy(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ mutex_lock(&kvm->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+
+ INIT_LIST_HEAD(&rvic->delivery);
+ kfree(rvic->irqs);
+ rvic->irqs = NULL;
+ }
+
+ mutex_unlock(&kvm->lock);
+}
+
+static int rvic_pending_irq(struct kvm_vcpu *vcpu)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&rvic->lock, flags);
+ res = __rvic_can_signal(rvic);
+ spin_unlock_irqrestore(&rvic->lock, flags);
+
+ return res;
+}
+
+static int rvic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
+ u32 intid, bool (*get_line_level)(int))
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq->lock, flags);
+ irq->host_irq = host_irq;
+ irq->get_line_level = get_line_level;
+ spin_unlock_irqrestore(&irq->lock, flags);
+
+ return 0;
+}
+
+static int rvic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int intid)
+{
+ struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+ struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq->lock, flags);
+ irq->host_irq = 0;
+ irq->get_line_level = NULL;
+ spin_unlock_irqrestore(&irq->lock, flags);
+
+ return 0;
+}
+
+static int rvic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ /* Abuse the userspace interface to perform the routing*/
+ return rvic_inject_userspace_irq(kvm, KVM_ARM_IRQ_TYPE_SPI, 0,
+ e->irqchip.pin, level);
+}
+
+static int rvic_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ return -ENODEV;
+}
+
+static int rvic_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ if (e->type != KVM_IRQ_ROUTING_IRQCHIP)
+ return -EWOULDBLOCK;
+
+ return rvic_irqfd_set_irq(e, kvm, irq_source_id, level, line_status);
+}
+
+static const struct kvm_irqchip_flow rvic_irqchip_flow = {
+ .irqchip_destroy = rvic_destroy,
+ .irqchip_vcpu_init = rvic_vcpu_init,
+ /* Nothing to do on block/unblock */
+ /* Nothing to do on load/put */
+ .irqchip_vcpu_pending_irq = rvic_pending_irq,
+ .irqchip_vcpu_flush_hwstate = rvic_flush_signaling_state,
+ /* Nothing tp do on sync_hwstate */
+ .irqchip_inject_irq = rvic_inject_irq,
+ .irqchip_inject_userspace_irq = rvic_inject_userspace_irq,
+ /* No reset_mapped_irq as we allow spurious interrupts */
+ .irqchip_map_phys_irq = rvic_map_phys_irq,
+ .irqchip_unmap_phys_irq = rvic_unmap_phys_irq,
+ .irqchip_irqfd_set_irq = rvic_irqfd_set_irq,
+ .irqchip_set_msi = rvic_set_msi,
+ .irqchip_set_irq_inatomic = rvic_set_irq_inatomic,
+};
+
+static int rvic_setup_default_irq_routing(struct kvm *kvm)
+{
+ struct rvic_vm_data *data = kvm->arch.irqchip_data;
+ unsigned int nr = rvic_nr_untrusted(data);
+ struct kvm_irq_routing_entry *entries;
+ int i, ret;
+
+ entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ entries[i].gsi = i;
+ entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ entries[i].u.irqchip.irqchip = 0;
+ entries[i].u.irqchip.pin = i;
+ }
+ ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+ kfree(entries);
+ return ret;
+}
+
+/* Device management */
+static int rvic_device_create(struct kvm_device *dev, u32 type)
+{
+ struct kvm *kvm = dev->kvm;
+ struct kvm_vcpu *vcpu;
+ int i, ret;
+
+ if (irqchip_in_kernel(kvm))
+ return -EEXIST;
+
+ ret = -EBUSY;
+ if (!lock_all_vcpus(kvm))
+ return ret;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.has_run_once)
+ goto out_unlock;
+ }
+
+ ret = 0;
+
+ /*
+ * The good thing about not having any HW is that you don't
+ * get the limitations of the HW...
+ */
+ kvm->arch.max_vcpus = KVM_MAX_VCPUS;
+ kvm->arch.irqchip_type = IRQCHIP_RVIC;
+ kvm->arch.irqchip_flow = rvic_irqchip_flow;
+ kvm->arch.irqchip_data = NULL;
+
+out_unlock:
+ unlock_all_vcpus(kvm);
+ return ret;
+}
+
+static void rvic_device_destroy(struct kvm_device *dev)
+{
+ kfree(dev->kvm->arch.irqchip_data);
+ kfree(dev);
+}
+
+static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct rvic_vm_data *data;
+ struct kvm_vcpu *vcpu;
+ u32 __user *uaddr, val;
+ u16 trusted, total;
+ int i, ret = -ENXIO;
+
+ mutex_lock(&dev->kvm->lock);
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+ if (attr->attr)
+ break;
+
+ if (dev->kvm->arch.irqchip_data) {
+ ret = -EBUSY;
+ break;
+ }
+
+ uaddr = (u32 __user *)(uintptr_t)attr->addr;
+ if (get_user(val, uaddr)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
+ total = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
+ if (total < trusted || trusted < 32 || total < 64 ||
+ trusted % 32 || total % 32 || total > 2048) {
+ ret = -EINVAL;
+ break;
+ }
+
+ data = kzalloc(struct_size(data, rvid_map, (total - trusted)),
+ GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ data->nr_trusted = trusted;
+ data->nr_total = total;
+ spin_lock_init(&data->lock);
+ /* Default to no mapping */
+ for (i = 0; i < (total - trusted); i++) {
+ /*
+ * an intid < nr_trusted is invalid as the
+ * result of a translation through the rvid,
+ * hence the input in unmapped.
+ */
+ data->rvid_map[i].target_vcpu = 0;
+ data->rvid_map[i].intid = 0;
+ }
+
+ dev->kvm->arch.irqchip_data = data;
+
+ ret = 0;
+ break;
+
+ case KVM_DEV_ARM_RVIC_GRP_INIT:
+ if (attr->attr)
+ break;
+
+ if (!dev->kvm->arch.irqchip_data)
+ break;
+
+ ret = 0;
+
+ /* Init the rvic on any already created vcpu */
+ kvm_for_each_vcpu(i, vcpu, dev->kvm) {
+ ret = rvic_vcpu_init(vcpu);
+ if (ret)
+ break;
+ }
+
+ if (!ret)
+ ret = rvic_setup_default_irq_routing(dev->kvm);
+ if (!ret)
+ dev->kvm->arch.irqchip_finalized = true;
+ break;
+
+ default:
+ break;
+ }
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+}
+
+static int rvic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct rvic_vm_data *data;
+ u32 __user *uaddr, val;
+ int ret = -ENXIO;
+
+ mutex_lock(&dev->kvm->lock);
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+ if (attr->attr)
+ break;
+
+ data = dev->kvm->arch.irqchip_data;
+ if (!data)
+ break;
+
+ val = FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK,
+ data->nr_trusted);
+ val |= FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK,
+ data->nr_total);
+
+ uaddr = (u32 __user *)(uintptr_t)attr->addr;
+ ret = put_user(val, uaddr);
+ break;
+
+ default:
+ break;
+ }
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+}
+
+static int rvic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+ case KVM_DEV_ARM_RVIC_GRP_INIT:
+ if (attr->attr)
+ break;
+ ret = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static const struct kvm_device_ops rvic_dev_ops = {
+ .name = "kvm-arm-rvic",
+ .create = rvic_device_create,
+ .destroy = rvic_device_destroy,
+ .set_attr = rvic_set_attr,
+ .get_attr = rvic_get_attr,
+ .has_attr = rvic_has_attr,
+};
+
+int kvm_register_rvic_device(void)
+{
+ return kvm_register_device_ops(&rvic_dev_ops, KVM_DEV_TYPE_ARM_RVIC);
+}
diff --git a/include/kvm/arm_rvic.h b/include/kvm/arm_rvic.h
new file mode 100644
index 000000000000..9e67a83fa384
--- /dev/null
+++ b/include/kvm/arm_rvic.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <maz at kernel.org>
+ */
+
+#ifndef __KVM_ARM_RVIC_H__
+#define __KVM_ARM_RVIC_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct kvm_vcpu;
+
+struct rvic_irq {
+ spinlock_t lock;
+ struct list_head delivery_entry;
+ bool (*get_line_level)(int intid);
+ unsigned int intid;
+ unsigned int host_irq;
+ bool pending;
+ bool masked;
+ bool line_level; /* If get_line_level == NULL */
+};
+
+struct rvic {
+ spinlock_t lock;
+ struct list_head delivery;
+ struct rvic_irq *irqs;
+ unsigned int nr_trusted;
+ unsigned int nr_total;
+ bool enabled;
+};
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_register_rvic_device(void);
+
+#endif
diff --git a/include/linux/irqchip/irq-rvic.h b/include/linux/irqchip/irq-rvic.h
index 4545c1e89741..b188773729fb 100644
--- a/include/linux/irqchip/irq-rvic.h
+++ b/include/linux/irqchip/irq-rvic.h
@@ -57,6 +57,8 @@
#define SMC64_RVIC_ACKNOWLEDGE SMC64_RVIC_FN(9)
#define SMC64_RVIC_RESAMPLE SMC64_RVIC_FN(10)
+#define SMC64_RVIC_LAST SMC64_RVIC_RESAMPLE
+
#define RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS 0
#define RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS 1
@@ -82,6 +84,8 @@
#define SMC64_RVID_MAP SMC64_RVID_FN(1)
#define SMC64_RVID_UNMAP SMC64_RVID_FN(2)
+#define SMC64_RVID_LAST SMC64_RVID_UNMAP
+
#define RVID_VERSION(M, m) RVIx_VERSION((M), (m))
#define RVID_VERSION_MAJOR(v) RVIx_VERSION_MAJOR((v))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f6d86033c4fa..6d245d2dc9e6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1264,6 +1264,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
+ KVM_DEV_TYPE_ARM_RVIC,
+#define KVM_DEV_TYPE_ARM_RVIC KVM_DEV_TYPE_ARM_RVIC
KVM_DEV_TYPE_MAX,
};
--
2.27.0
More information about the linux-arm-kernel
mailing list