[RFC PATCH 40/45] iommu/arm-smmu-v3-kvm: Add IOMMU ops

Mostafa Saleh smostafa at google.com
Wed Sep 20 09:27:41 PDT 2023


On Wed, Feb 01, 2023 at 12:53:24PM +0000, Jean-Philippe Brucker wrote:
> Forward alloc_domain(), attach_dev(), map_pages(), etc to the
> hypervisor.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   | 330 +++++++++++++++++-
>  1 file changed, 328 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> index 55489d56fb5b..930d78f6e29f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
> @@ -22,10 +22,28 @@ struct host_arm_smmu_device {
>  #define smmu_to_host(_smmu) \
>  	container_of(_smmu, struct host_arm_smmu_device, smmu);
>  
> +struct kvm_arm_smmu_master {
> +	struct arm_smmu_device		*smmu;
> +	struct device			*dev;
> +	struct kvm_arm_smmu_domain	*domain;
> +};
> +
> +struct kvm_arm_smmu_domain {
> +	struct iommu_domain		domain;
> +	struct arm_smmu_device		*smmu;
> +	struct mutex			init_mutex;
> +	unsigned long			pgd;
> +	pkvm_handle_t			id;
> +};
> +
> +#define to_kvm_smmu_domain(_domain) \
> +	container_of(_domain, struct kvm_arm_smmu_domain, domain)
> +
>  static size_t				kvm_arm_smmu_cur;
>  static size_t				kvm_arm_smmu_count;
>  static struct hyp_arm_smmu_v3_device	*kvm_arm_smmu_array;
>  static struct kvm_hyp_iommu_memcache	*kvm_arm_smmu_memcache;
> +static DEFINE_IDA(kvm_arm_smmu_domain_ida);
>  
>  static DEFINE_PER_CPU(local_lock_t, memcache_lock) =
>  				INIT_LOCAL_LOCK(memcache_lock);
> @@ -57,7 +75,6 @@ static void *kvm_arm_smmu_host_va(phys_addr_t pa)
>  	return __va(pa);
>  }
>  
> -__maybe_unused
>  static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -74,7 +91,6 @@ static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu)
>  				     kvm_arm_smmu_host_pa, smmu);
>  }
>  
> -__maybe_unused
>  static void kvm_arm_smmu_reclaim_memcache(void)
>  {
>  	struct kvm_hyp_memcache *mc;
> @@ -101,6 +117,299 @@ static void kvm_arm_smmu_reclaim_memcache(void)
>  	__ret;							\
>  })
>  
> +static struct platform_driver kvm_arm_smmu_driver;
> +
> +static struct arm_smmu_device *
> +kvm_arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
> +{
> +	struct device *dev;
> +
> +	dev = driver_find_device_by_fwnode(&kvm_arm_smmu_driver.driver, fwnode);
> +	put_device(dev);
> +	return dev ? dev_get_drvdata(dev) : NULL;
> +}
> +
> +static struct iommu_ops kvm_arm_smmu_ops;
> +
> +static struct iommu_device *kvm_arm_smmu_probe_device(struct device *dev)
> +{
> +	struct arm_smmu_device *smmu;
> +	struct kvm_arm_smmu_master *master;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +
> +	if (!fwspec || fwspec->ops != &kvm_arm_smmu_ops)
> +		return ERR_PTR(-ENODEV);
> +
> +	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
> +		return ERR_PTR(-EBUSY);
> +
> +	smmu = kvm_arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return ERR_PTR(-ENODEV);
> +
> +	master = kzalloc(sizeof(*master), GFP_KERNEL);
> +	if (!master)
> +		return ERR_PTR(-ENOMEM);
> +
> +	master->dev = dev;
> +	master->smmu = smmu;
> +	dev_iommu_priv_set(dev, master);
> +
> +	return &smmu->iommu;
> +}
> +
> +static void kvm_arm_smmu_release_device(struct device *dev)
> +{
> +	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
> +
> +	kfree(master);
> +	iommu_fwspec_free(dev);
> +}
> +
> +static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
> +{
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain;
> +
> +	/*
> +	 * We don't support
> +	 * - IOMMU_DOMAIN_IDENTITY because we rely on the host telling the
> +	 *   hypervisor which pages are used for DMA.
> +	 * - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
> +	 *   donation to guests.
> +	 */
> +	if (type != IOMMU_DOMAIN_DMA &&
> +	    type != IOMMU_DOMAIN_UNMANAGED)
> +		return NULL;
> +
> +	kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
> +	if (!kvm_smmu_domain)
> +		return NULL;
> +
> +	mutex_init(&kvm_smmu_domain->init_mutex);
> +
> +	return &kvm_smmu_domain->domain;
> +}
> +
> +static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
> +					struct kvm_arm_smmu_master *master)
> +{
> +	int ret = 0;
> +	struct page *p;
> +	unsigned long pgd;
> +	struct arm_smmu_device *smmu = master->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	if (kvm_smmu_domain->smmu) {
> +		if (kvm_smmu_domain->smmu != smmu)
> +			return -EINVAL;
> +		return 0;
> +	}
> +
> +	ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, 0, 1 << smmu->vmid_bits,
> +			      GFP_KERNEL);
> +	if (ret < 0)
> +		return ret;
> +	kvm_smmu_domain->id = ret;
> +
> +	/*
> +	 * PGD allocation does not use the memcache because it may be of higher
> +	 * order when concatenated.
> +	 */
> +	p = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL | __GFP_ZERO,
> +			     host_smmu->pgd_order);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	pgd = (unsigned long)page_to_virt(p);
> +
> +	local_lock_irq(&memcache_lock);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
> +				   host_smmu->id, kvm_smmu_domain->id, pgd);
> +	local_unlock_irq(&memcache_lock);
> +	if (ret)
> +		goto err_free;
> +
> +	kvm_smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap;
> +	kvm_smmu_domain->domain.geometry.aperture_end = (1UL << smmu->ias) - 1;
> +	kvm_smmu_domain->domain.geometry.force_aperture = true;
> +	kvm_smmu_domain->smmu = smmu;
> +	kvm_smmu_domain->pgd = pgd;
> +
> +	return 0;
> +
> +err_free:
> +	free_pages(pgd, host_smmu->pgd_order);
> +	ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
> +	return ret;
> +}
> +
> +static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
> +{
> +	int ret;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +
> +	if (smmu) {
> +		struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain,
> +					host_smmu->id, kvm_smmu_domain->id);
> +		/*
> +		 * On failure, leak the pgd because it probably hasn't been
> +		 * reclaimed by the host.
> +		 */
> +		if (!WARN_ON(ret))
> +			free_pages(kvm_smmu_domain->pgd, host_smmu->pgd_order);
I believe this doube-free the pgd in case of attatch_dev fails, as it
would try to free it their also (in kvm_arm_smmu_domain_finalize).

I think this is right place to free the pgd.

> +		ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
> +	}
> +	kfree(kvm_smmu_domain);
> +}
> +
> +static int kvm_arm_smmu_detach_dev(struct host_arm_smmu_device *host_smmu,
> +				   struct kvm_arm_smmu_master *master)
> +{
> +	int i, ret;
> +	struct arm_smmu_device *smmu = &host_smmu->smmu;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
> +
> +	if (!master->domain)
> +		return 0;
> +
> +	for (i = 0; i < fwspec->num_ids; i++) {
> +		int sid = fwspec->ids[i];
> +
> +		ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_detach_dev,
> +					host_smmu->id, master->domain->id, sid);
> +		if (ret) {
> +			dev_err(smmu->dev, "cannot detach device %s (0x%x): %d\n",
> +				dev_name(master->dev), sid, ret);
> +			break;
> +		}
> +	}
> +
> +	master->domain = NULL;
> +
> +	return ret;
> +}
> +
> +static int kvm_arm_smmu_attach_dev(struct iommu_domain *domain,
> +				   struct device *dev)
> +{
> +	int i, ret;
> +	struct arm_smmu_device *smmu;
> +	struct host_arm_smmu_device *host_smmu;
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +	struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +
> +	if (!master)
> +		return -ENODEV;
> +
> +	smmu = master->smmu;
> +	host_smmu = smmu_to_host(smmu);
> +
> +	ret = kvm_arm_smmu_detach_dev(host_smmu, master);
> +	if (ret)
> +		return ret;
> +
> +	mutex_lock(&kvm_smmu_domain->init_mutex);
> +	ret = kvm_arm_smmu_domain_finalize(kvm_smmu_domain, master);
> +	mutex_unlock(&kvm_smmu_domain->init_mutex);
> +	if (ret)
> +		return ret;
> +
> +	local_lock_irq(&memcache_lock);
> +	for (i = 0; i < fwspec->num_ids; i++) {
> +		int sid = fwspec->ids[i];
> +
> +		ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_attach_dev,
> +					   host_smmu->id, kvm_smmu_domain->id,
> +					   sid);
> +		if (ret) {
> +			dev_err(smmu->dev, "cannot attach device %s (0x%x): %d\n",
> +				dev_name(dev), sid, ret);
> +			goto out_unlock;
> +		}
> +	}
> +	master->domain = kvm_smmu_domain;
> +
> +out_unlock:
> +	if (ret)
> +		kvm_arm_smmu_detach_dev(host_smmu, master);
> +	local_unlock_irq(&memcache_lock);
> +	return ret;
> +}
> +
> +static int kvm_arm_smmu_map_pages(struct iommu_domain *domain,
> +				  unsigned long iova, phys_addr_t paddr,
> +				  size_t pgsize, size_t pgcount, int prot,
> +				  gfp_t gfp, size_t *mapped)
> +{
> +	int ret;
> +	unsigned long irqflags;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	local_lock_irqsave(&memcache_lock, irqflags);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_map_pages,
> +				   host_smmu->id, kvm_smmu_domain->id, iova,
> +				   paddr, pgsize, pgcount, prot);
> +	local_unlock_irqrestore(&memcache_lock, irqflags);
> +	if (ret)
> +		return ret;
> +
> +	*mapped = pgsize * pgcount;
> +	return 0;
> +}
> +
> +static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
> +				       unsigned long iova, size_t pgsize,
> +				       size_t pgcount,
> +				       struct iommu_iotlb_gather *iotlb_gather)
> +{
> +	int ret;
> +	unsigned long irqflags;
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
> +
> +	local_lock_irqsave(&memcache_lock, irqflags);
> +	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_unmap_pages,
> +				   host_smmu->id, kvm_smmu_domain->id, iova,
> +				   pgsize, pgcount);
> +	local_unlock_irqrestore(&memcache_lock, irqflags);
> +
> +	return ret ? 0 : pgsize * pgcount;
> +}
> +
> +static phys_addr_t kvm_arm_smmu_iova_to_phys(struct iommu_domain *domain,
> +					     dma_addr_t iova)
> +{
> +	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
> +	struct host_arm_smmu_device *host_smmu = smmu_to_host(kvm_smmu_domain->smmu);
> +
> +	return kvm_call_hyp_nvhe(__pkvm_host_iommu_iova_to_phys, host_smmu->id,
> +				 kvm_smmu_domain->id, iova);
> +}
> +
> +static struct iommu_ops kvm_arm_smmu_ops = {
> +	.capable		= arm_smmu_capable,
> +	.device_group		= arm_smmu_device_group,
> +	.of_xlate		= arm_smmu_of_xlate,
> +	.probe_device		= kvm_arm_smmu_probe_device,
> +	.release_device		= kvm_arm_smmu_release_device,
> +	.domain_alloc		= kvm_arm_smmu_domain_alloc,
> +	.owner			= THIS_MODULE,
> +	.default_domain_ops = &(const struct iommu_domain_ops) {
> +		.attach_dev	= kvm_arm_smmu_attach_dev,
> +		.free		= kvm_arm_smmu_domain_free,
> +		.map_pages	= kvm_arm_smmu_map_pages,
> +		.unmap_pages	= kvm_arm_smmu_unmap_pages,
> +		.iova_to_phys	= kvm_arm_smmu_iova_to_phys,
> +	}
> +};
> +
>  static bool kvm_arm_smmu_validate_features(struct arm_smmu_device *smmu)
>  {
>  	unsigned long oas;
> @@ -186,6 +495,12 @@ static int kvm_arm_smmu_device_reset(struct host_arm_smmu_device *host_smmu)
>  	return 0;
>  }
>  
> +static void *kvm_arm_smmu_alloc_domains(struct arm_smmu_device *smmu)
> +{
> +	return (void *)devm_get_free_pages(smmu->dev, GFP_KERNEL | __GFP_ZERO,
> +					   get_order(KVM_IOMMU_DOMAINS_ROOT_SIZE));
> +}
> +
>  static int kvm_arm_smmu_probe(struct platform_device *pdev)
>  {
>  	int ret;
> @@ -274,6 +589,16 @@ static int kvm_arm_smmu_probe(struct platform_device *pdev)
>  	if (ret)
>  		return ret;
>  
> +	hyp_smmu->iommu.domains = kvm_arm_smmu_alloc_domains(smmu);
> +	if (!hyp_smmu->iommu.domains)
> +		return -ENOMEM;
> +
> +	hyp_smmu->iommu.nr_domains = 1 << smmu->vmid_bits;
> +
> +	ret = arm_smmu_register_iommu(smmu, &kvm_arm_smmu_ops, ioaddr);
> +	if (ret)
> +		return ret;
> +
>  	platform_set_drvdata(pdev, host_smmu);
>  
>  	/* Hypervisor parameters */
> @@ -296,6 +621,7 @@ static int kvm_arm_smmu_remove(struct platform_device *pdev)
>  	 * There was an error during hypervisor setup. The hyp driver may
>  	 * have already enabled the device, so disable it.
>  	 */
> +	arm_smmu_unregister_iommu(smmu);
>  	arm_smmu_device_disable(smmu);
>  	arm_smmu_update_gbpa(smmu, host_smmu->boot_gbpa, GBPA_ABORT);
>  	return 0;
> -- 
> 2.39.0
> 
>
Thanks,
Mostafa



More information about the linux-arm-kernel mailing list