[RFC PATCH v2 16/58] KVM: arm64: iommu: Add domains
Mostafa Saleh
smostafa at google.com
Thu Dec 12 10:03:40 PST 2024
The IOMMU domain abstraction allows to share the same page tables
between multiple devices. That may be necessary due to hardware
constraints, if multiple devices cannot be isolated by the IOMMU
(conventional PCI bus for example). It may also help with optimizing
resource or TLB use. For pKVM in particular, it may be useful to reduce
the amount of memory required for page tables. All devices owned by the
host kernel could be attached to the same domain (though that requires
host changes).
There is one shared domain space with all IOMMUs holding up to 2^16
domains.
Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org>
Signed-off-by: Mostafa Saleh <smostafa at google.com>
---
arch/arm64/kvm/hyp/hyp-constants.c | 1 +
arch/arm64/kvm/hyp/include/nvhe/iommu.h | 4 +
arch/arm64/kvm/hyp/nvhe/iommu/iommu.c | 102 +++++++++++++++++++++++-
arch/arm64/kvm/iommu.c | 10 +++
include/kvm/iommu.h | 48 +++++++++++
5 files changed, 161 insertions(+), 4 deletions(-)
create mode 100644 include/kvm/iommu.h
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
index 5fb26cabd606..96a6b45b424a 100644
--- a/arch/arm64/kvm/hyp/hyp-constants.c
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -8,5 +8,6 @@
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
+ DEFINE(HYP_SPINLOCK_SIZE, sizeof(hyp_spinlock_t));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
index 5f91605cd48a..8f619f415d1f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/iommu.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
@@ -4,6 +4,8 @@
#include <asm/kvm_host.h>
+#include <kvm/iommu.h>
+
#include <nvhe/alloc_mgt.h>
/* Hypercall handlers */
@@ -31,6 +33,8 @@ void kvm_iommu_reclaim_pages(void *p, u8 order);
struct kvm_iommu_ops {
int (*init)(void);
+ int (*alloc_domain)(struct kvm_hyp_iommu_domain *domain, int type);
+ void (*free_domain)(struct kvm_hyp_iommu_domain *domain);
};
int kvm_iommu_init(void);
diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index af6ae9b4dc51..ba2aed52a74f 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -4,12 +4,15 @@
*
* Copyright (C) 2022 Linaro Ltd.
*/
+#include <kvm/iommu.h>
+
#include <nvhe/iommu.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
/* Only one set of ops supported, similary to the kernel */
struct kvm_iommu_ops *kvm_iommu_ops;
+void **kvm_hyp_iommu_domains;
/*
* Common pool that can be used by IOMMU driver to allocate pages.
@@ -18,6 +21,9 @@ static struct hyp_pool iommu_host_pool;
DECLARE_PER_CPU(struct kvm_hyp_req, host_hyp_reqs);
+/* Protects domains in kvm_hyp_iommu_domains */
+static DEFINE_HYP_SPINLOCK(kvm_iommu_domain_lock);
+
static int kvm_iommu_refill(struct kvm_hyp_memcache *host_mc)
{
if (!kvm_iommu_ops)
@@ -89,28 +95,116 @@ void kvm_iommu_reclaim_pages(void *p, u8 order)
hyp_put_page(&iommu_host_pool, p);
}
+static struct kvm_hyp_iommu_domain *
+handle_to_domain(pkvm_handle_t domain_id)
+{
+ int idx;
+ struct kvm_hyp_iommu_domain *domains;
+
+ if (domain_id >= KVM_IOMMU_MAX_DOMAINS)
+ return NULL;
+ domain_id = array_index_nospec(domain_id, KVM_IOMMU_MAX_DOMAINS);
+
+ idx = domain_id / KVM_IOMMU_DOMAINS_PER_PAGE;
+ domains = (struct kvm_hyp_iommu_domain *)READ_ONCE(kvm_hyp_iommu_domains[idx]);
+ if (!domains) {
+ domains = kvm_iommu_donate_page();
+ if (!domains)
+ return NULL;
+ /*
+ * handle_to_domain() does not have to be called under a lock,
+ * but even though we allocate a leaf in all cases, it's only
+ * really a valid thing to do under alloc_domain(), which uses a
+ * lock. Races are therefore a host bug and we don't need to be
+ * delicate about it.
+ */
+ if (WARN_ON(cmpxchg64_relaxed(&kvm_hyp_iommu_domains[idx], 0,
+ (void *)domains) != 0)) {
+ kvm_iommu_reclaim_page(domains);
+ return NULL;
+ }
+ }
+ return &domains[domain_id % KVM_IOMMU_DOMAINS_PER_PAGE];
+}
+
int kvm_iommu_init(void)
{
int ret;
+ u64 domain_root_pfn = __hyp_pa(kvm_hyp_iommu_domains) >> PAGE_SHIFT;
- if (!kvm_iommu_ops || !kvm_iommu_ops->init)
+ if (!kvm_iommu_ops ||
+ !kvm_iommu_ops->init ||
+ !kvm_iommu_ops->alloc_domain ||
+ !kvm_iommu_ops->free_domain)
return -ENODEV;
ret = hyp_pool_init_empty(&iommu_host_pool, 64);
if (ret)
return ret;
- return kvm_iommu_ops->init();
+ ret = __pkvm_host_donate_hyp(domain_root_pfn,
+ KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
+ if (ret)
+ return ret;
+
+ ret = kvm_iommu_ops->init();
+ if (ret)
+ goto out_reclaim_domain;
+
+ return ret;
+
+out_reclaim_domain:
+ __pkvm_hyp_donate_host(domain_root_pfn, KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
+ return ret;
}
int kvm_iommu_alloc_domain(pkvm_handle_t domain_id, int type)
{
- return -ENODEV;
+ int ret = -EINVAL;
+ struct kvm_hyp_iommu_domain *domain;
+
+ domain = handle_to_domain(domain_id);
+ if (!domain)
+ return -ENOMEM;
+
+ hyp_spin_lock(&kvm_iommu_domain_lock);
+ if (atomic_read(&domain->refs))
+ goto out_unlock;
+
+ domain->domain_id = domain_id;
+ ret = kvm_iommu_ops->alloc_domain(domain, type);
+ if (ret)
+ goto out_unlock;
+
+ atomic_set_release(&domain->refs, 1);
+out_unlock:
+ hyp_spin_unlock(&kvm_iommu_domain_lock);
+ return ret;
}
int kvm_iommu_free_domain(pkvm_handle_t domain_id)
{
- return -ENODEV;
+ int ret = 0;
+ struct kvm_hyp_iommu_domain *domain;
+
+ domain = handle_to_domain(domain_id);
+ if (!domain)
+ return -EINVAL;
+
+ hyp_spin_lock(&kvm_iommu_domain_lock);
+ if (WARN_ON(atomic_cmpxchg_acquire(&domain->refs, 1, 0) != 1)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ kvm_iommu_ops->free_domain(domain);
+
+ memset(domain, 0, sizeof(*domain));
+
+out_unlock:
+ hyp_spin_unlock(&kvm_iommu_domain_lock);
+
+ return ret;
}
int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
diff --git a/arch/arm64/kvm/iommu.c b/arch/arm64/kvm/iommu.c
index ed77ea0d12bb..af3417e6259d 100644
--- a/arch/arm64/kvm/iommu.c
+++ b/arch/arm64/kvm/iommu.c
@@ -5,6 +5,9 @@
*/
#include <asm/kvm_mmu.h>
+
+#include <kvm/iommu.h>
+
#include <linux/kvm_host.h>
struct kvm_iommu_driver *iommu_driver;
@@ -37,6 +40,13 @@ int kvm_iommu_init_driver(void)
return -ENODEV;
}
+ kvm_hyp_iommu_domains = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(KVM_IOMMU_DOMAINS_ROOT_SIZE));
+ if (!kvm_hyp_iommu_domains)
+ return -ENOMEM;
+
+ kvm_hyp_iommu_domains = kern_hyp_va(kvm_hyp_iommu_domains);
+
return iommu_driver->init_driver();
}
diff --git a/include/kvm/iommu.h b/include/kvm/iommu.h
new file mode 100644
index 000000000000..10ecaae0f6a3
--- /dev/null
+++ b/include/kvm/iommu.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_IOMMU_H
+#define __KVM_IOMMU_H
+
+#include <asm/kvm_host.h>
+#include <linux/io-pgtable.h>
+#ifdef __KVM_NVHE_HYPERVISOR__
+#include <nvhe/spinlock.h>
+#else
+#include "hyp_constants.h"
+#endif
+
+struct kvm_hyp_iommu_domain {
+ atomic_t refs;
+ pkvm_handle_t domain_id;
+ void *priv;
+};
+
+extern void **kvm_nvhe_sym(kvm_hyp_iommu_domains);
+#define kvm_hyp_iommu_domains kvm_nvhe_sym(kvm_hyp_iommu_domains)
+
+/*
+ * At the moment the number of domains is limited to 2^16
+ * In practice we're rarely going to need a lot of domains. To avoid allocating
+ * a large domain table, we use a two-level table, indexed by domain ID. With
+ * 4kB pages and 16-bytes domains, the leaf table contains 256 domains, and the
+ * root table 256 pointers. With 64kB pages, the leaf table contains 4096
+ * domains and the root table 16 pointers. In this case, or when using 8-bit
+ * VMIDs, it may be more advantageous to use a single level. But using two
+ * levels allows to easily extend the domain size.
+ */
+#define KVM_IOMMU_MAX_DOMAINS (1 << 16)
+
+/* Number of entries in the level-2 domain table */
+#define KVM_IOMMU_DOMAINS_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct kvm_hyp_iommu_domain))
+
+/* Number of entries in the root domain table */
+#define KVM_IOMMU_DOMAINS_ROOT_ENTRIES \
+ (KVM_IOMMU_MAX_DOMAINS / KVM_IOMMU_DOMAINS_PER_PAGE)
+
+#define KVM_IOMMU_DOMAINS_ROOT_SIZE \
+ (KVM_IOMMU_DOMAINS_ROOT_ENTRIES * sizeof(void *))
+
+#define KVM_IOMMU_DOMAINS_ROOT_ORDER_NR \
+ (1 << get_order(KVM_IOMMU_DOMAINS_ROOT_SIZE))
+
+#endif /* __KVM_IOMMU_H */
--
2.47.0.338.g60cca15819-goog
More information about the linux-arm-kernel
mailing list