[PATCH v2 5/7] iommu/riscv: Device directory management.

Baolu Lu baolu.lu at linux.intel.com
Sun Apr 21 22:11:52 PDT 2024


On 4/19/24 12:32 AM, Tomasz Jeznach wrote:
> Introduce device context allocation and device directory tree
> management including capabilities discovery sequence, as described
> in Chapter 2.1 of the RISC-V IOMMU Architecture Specification.
> 
> Device directory mode will be auto detected using DDTP WARL property,
> using highest mode supported by the driver and hardware. If none
> supported can be configured, driver will fall back to global pass-through.
> 
> First level DDTP page can be located in I/O (detected using DDTP WARL)
> and system memory.
> 
> Only identity protection domain is supported by this implementation.
> 
> Co-developed-by: Nick Kossifidis <mick at ics.forth.gr>
> Signed-off-by: Nick Kossifidis <mick at ics.forth.gr>
> Signed-off-by: Tomasz Jeznach <tjeznach at rivosinc.com>
> ---
>   drivers/iommu/riscv/iommu.c | 369 +++++++++++++++++++++++++++++++++++-
>   drivers/iommu/riscv/iommu.h |   5 +
>   2 files changed, 365 insertions(+), 9 deletions(-)

[ ... ]

> +
> +/*
> + * Discover supported DDT modes starting from requested value,
> + * configure DDTP register with accepted mode and root DDT address.
> + * Accepted iommu->ddt_mode is updated on success.
> + */
> +static int riscv_iommu_set_ddtp_mode(struct riscv_iommu_device *iommu,
> +				     unsigned int ddtp_mode)
> +{
> +	struct device *dev = iommu->dev;
> +	u64 ddtp, rq_ddtp;
> +	unsigned int mode, rq_mode = ddtp_mode;
> +	int rc;
> +
> +	rc = readq_relaxed_poll_timeout(iommu->reg + RISCV_IOMMU_REG_DDTP,
> +					ddtp, !(ddtp & RISCV_IOMMU_DDTP_BUSY),
> +					10, RISCV_IOMMU_DDTP_TIMEOUT);
> +	if (rc < 0)
> +		return -EBUSY;
> +
> +	/* Disallow state transition from xLVL to xLVL. */
> +	switch (FIELD_GET(RISCV_IOMMU_DDTP_MODE, ddtp)) {
> +	case RISCV_IOMMU_DDTP_MODE_BARE:
> +	case RISCV_IOMMU_DDTP_MODE_OFF:
> +		break;
> +	default:
> +		if (rq_mode != RISCV_IOMMU_DDTP_MODE_BARE &&
> +		    rq_mode != RISCV_IOMMU_DDTP_MODE_OFF)
> +			return -EINVAL;

Is this check duplicate? It appears that it's always true in the default
branch.

> +		break;
> +	}
> +
> +	do {
> +		rq_ddtp = FIELD_PREP(RISCV_IOMMU_DDTP_MODE, rq_mode);
> +		if (rq_mode > RISCV_IOMMU_DDTP_MODE_BARE)
> +			rq_ddtp |= phys_to_ppn(iommu->ddt_phys);
> +
> +		riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, rq_ddtp);
> +
> +		rc = readq_relaxed_poll_timeout(iommu->reg + RISCV_IOMMU_REG_DDTP,
> +						ddtp, !(ddtp & RISCV_IOMMU_DDTP_BUSY),
> +						10, RISCV_IOMMU_DDTP_TIMEOUT);
> +		if (rc < 0) {
> +			dev_warn(dev, "timeout when setting ddtp (ddt mode: %u, read: %llx)\n",
> +				 rq_mode, ddtp);
> +			return -EBUSY;
> +		}
> +
> +		/* Verify IOMMU hardware accepts new DDTP config. */
> +		mode = FIELD_GET(RISCV_IOMMU_DDTP_MODE, ddtp);
> +
> +		if (rq_mode == mode)
> +			break;
> +
> +		/* Hardware mandatory DDTP mode has not been accepted. */
> +		if (rq_mode < RISCV_IOMMU_DDTP_MODE_1LVL && rq_ddtp != ddtp) {
> +			dev_warn(dev, "DDTP update failed hw: %llx vs %llx\n", ddtp, rq_ddtp);
> +			return -EINVAL;
> +		}
> +
> +		/*
> +		 * Mode field is WARL, an IOMMU may support a subset of
> +		 * directory table levels in which case if we tried to set
> +		 * an unsupported number of levels we'll readback either
> +		 * a valid xLVL or off/bare. If we got off/bare, try again
> +		 * with a smaller xLVL.
> +		 */
> +		if (mode < RISCV_IOMMU_DDTP_MODE_1LVL &&
> +		    rq_mode > RISCV_IOMMU_DDTP_MODE_1LVL) {
> +			dev_dbg(dev, "DDTP hw mode %u vs %u\n", mode, rq_mode);
> +			rq_mode--;
> +			continue;
> +		}
> +
> +		/*
> +		 * We tried all supported modes and IOMMU hardware failed to
> +		 * accept new settings, something went very wrong since off/bare
> +		 * and at least one xLVL must be supported.
> +		 */
> +		dev_warn(dev, "DDTP hw mode %u, failed to set %u\n", mode, ddtp_mode);
> +		return -EINVAL;
> +	} while (1);
> +
> +	iommu->ddt_mode = mode;
> +	if (mode != ddtp_mode)
> +		dev_warn(dev, "DDTP failover to %u mode, requested %u\n",
> +			 mode, ddtp_mode);
> +
> +	return 0;
> +}
> +

[ ... ]

> +
> +static int riscv_iommu_attach_domain(struct riscv_iommu_device *iommu,
> +				     struct device *dev,
> +				     struct iommu_domain *iommu_domain)
> +{
> +	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> +	struct riscv_iommu_dc *dc;
> +	u64 fsc, ta, tc;
> +	int i;
> +
> +	if (!iommu_domain) {
> +		ta = 0;
> +		tc = 0;
> +		fsc = 0;
> +	} else if (iommu_domain->type == IOMMU_DOMAIN_IDENTITY) {
> +		ta = 0;
> +		tc = RISCV_IOMMU_DC_TC_V;
> +		fsc = FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE, RISCV_IOMMU_DC_FSC_MODE_BARE);
> +	} else {
> +		/* This should never happen. */
> +		return -ENODEV;
> +	}

Move the domain->type check code to the domain-specific ops.

> +
> +	/* Update existing or allocate new entries in device directory */
> +	for (i = 0; i < fwspec->num_ids; i++) {
> +		dc = riscv_iommu_get_dc(iommu, fwspec->ids[i], !iommu_domain);
> +		if (!dc && !iommu_domain)
> +			continue;
> +		if (!dc)
> +			return -ENODEV;
> +
> +		/* Swap device context, update TC valid bit as the last operation */
> +		xchg64(&dc->fsc, fsc);
> +		xchg64(&dc->ta, ta);
> +		xchg64(&dc->tc, tc);
> +
> +		/* Device context invalidation will be required. Ignoring for now. */
> +	}
> +
>   	return 0;
>   }
>   
> +static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain,
> +					      struct device *dev)
> +{
> +	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
> +
> +	/* Global pass-through already enabled, do nothing. */
> +	if (iommu->ddt_mode == RISCV_IOMMU_DDTP_MODE_BARE)
> +		return 0;
> +
> +	return riscv_iommu_attach_domain(iommu, dev, iommu_domain);
> +}
> +
>   static struct iommu_domain riscv_iommu_identity_domain = {
>   	.type = IOMMU_DOMAIN_IDENTITY,
>   	.ops = &(const struct iommu_domain_ops) {
> @@ -82,6 +420,13 @@ static void riscv_iommu_probe_finalize(struct device *dev)
>   	iommu_setup_dma_ops(dev, 0, U64_MAX);
>   }
>   
> +static void riscv_iommu_release_device(struct device *dev)
> +{
> +	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
> +
> +	riscv_iommu_attach_domain(iommu, dev, NULL);

Attaching a NULL domain to a device has already been removed. You can
use the iommu_ops->release_domain here.

> +}
> +
>   static const struct iommu_ops riscv_iommu_ops = {
>   	.owner = THIS_MODULE,
>   	.of_xlate = riscv_iommu_of_xlate,
> @@ -90,6 +435,7 @@ static const struct iommu_ops riscv_iommu_ops = {
>   	.device_group = riscv_iommu_device_group,
>   	.probe_device = riscv_iommu_probe_device,
>   	.probe_finalize = riscv_iommu_probe_finalize,

The probe_finalize op will be removed soon.

https://lore.kernel.org/linux-iommu/bebea331c1d688b34d9862eefd5ede47503961b8.1713523152.git.robin.murphy@arm.com/

> +	.release_device = riscv_iommu_release_device,
>   };
>   
>   static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
> @@ -124,6 +470,7 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
>   {
>   	iommu_device_unregister(&iommu->iommu);
>   	iommu_device_sysfs_remove(&iommu->iommu);
> +	riscv_iommu_set_ddtp_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
>   }
>   
>   int riscv_iommu_init(struct riscv_iommu_device *iommu)
> @@ -133,12 +480,14 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
>   	rc = riscv_iommu_init_check(iommu);
>   	if (rc)
>   		return dev_err_probe(iommu->dev, rc, "unexpected device state\n");
> -	/*
> -	 * Placeholder for a complete IOMMU device initialization.
> -	 * For now, only bare minimum: enable global identity mapping mode and register sysfs.
> -	 */
> -	riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP,
> -			   FIELD_PREP(RISCV_IOMMU_DDTP_MODE, RISCV_IOMMU_DDTP_MODE_BARE));
> +
> +	rc = riscv_iommu_ddt_alloc(iommu);
> +	if (WARN(rc, "cannot allocate device directory\n"))
> +		goto err_init;
> +
> +	rc = riscv_iommu_set_ddtp_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX);
> +	if (WARN(rc, "cannot enable iommu device\n"))
> +		goto err_init;
>   
>   	rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s",
>   				    dev_name(iommu->dev));
> @@ -154,5 +503,7 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
>   err_iommu:
>   	iommu_device_sysfs_remove(&iommu->iommu);
>   err_sysfs:
> +	riscv_iommu_set_ddtp_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> +err_init:
>   	return rc;
>   }
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index 700e33dc2446..f1696926582c 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -34,6 +34,11 @@ struct riscv_iommu_device {
>   	/* available interrupt numbers, MSI or WSI */
>   	unsigned int irqs[RISCV_IOMMU_INTR_COUNT];
>   	unsigned int irqs_count;
> +
> +	/* device directory */
> +	unsigned int ddt_mode;
> +	dma_addr_t ddt_phys;
> +	u64 *ddt_root;
>   };
>   
>   int riscv_iommu_init(struct riscv_iommu_device *iommu);

Best regards,
baolu



More information about the linux-riscv mailing list