[PATCH 06/11] RISC-V: drivers/iommu/riscv: Add command, fault, page-req queues
Robin Murphy
robin.murphy at arm.com
Wed Aug 16 11:49:02 PDT 2023
On 2023-07-19 20:33, Tomasz Jeznach wrote:
> Enables message or wire signal interrupts for PCIe and platforms devices.
>
> Co-developed-by: Nick Kossifidis <mick at ics.forth.gr>
> Signed-off-by: Nick Kossifidis <mick at ics.forth.gr>
> Signed-off-by: Tomasz Jeznach <tjeznach at rivosinc.com>
> ---
> drivers/iommu/riscv/iommu-pci.c | 72 ++++
> drivers/iommu/riscv/iommu-platform.c | 66 +++
> drivers/iommu/riscv/iommu.c | 604 ++++++++++++++++++++++++++-
> drivers/iommu/riscv/iommu.h | 28 ++
> 4 files changed, 769 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c
> index c91f963d7a29..9ea0647f7b92 100644
> --- a/drivers/iommu/riscv/iommu-pci.c
> +++ b/drivers/iommu/riscv/iommu-pci.c
> @@ -34,6 +34,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
> {
> struct device *dev = &pdev->dev;
> struct riscv_iommu_device *iommu;
> + u64 icvec;
> int ret;
>
> ret = pci_enable_device_mem(pdev);
> @@ -67,14 +68,84 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
> iommu->dev = dev;
> dev_set_drvdata(dev, iommu);
>
> + /* Check device reported capabilities. */
> + iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
> +
> + /* The PCI driver only uses MSIs, make sure the IOMMU supports this */
> + switch (FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap)) {
> + case RISCV_IOMMU_CAP_IGS_MSI:
> + case RISCV_IOMMU_CAP_IGS_BOTH:
> + break;
> + default:
> + dev_err(dev, "unable to use message-signaled interrupts\n");
> + ret = -ENODEV;
> + goto fail;
> + }
> +
> dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> pci_set_master(pdev);
>
> + /* Allocate and assign IRQ vectors for the various events */
> + ret = pci_alloc_irq_vectors(pdev, 1, RISCV_IOMMU_INTR_COUNT, PCI_IRQ_MSIX);
> + if (ret < 0) {
> + dev_err(dev, "unable to allocate irq vectors\n");
> + goto fail;
> + }
> +
> + ret = -ENODEV;
> +
> + iommu->irq_cmdq = msi_get_virq(dev, RISCV_IOMMU_INTR_CQ);
> + if (!iommu->irq_cmdq) {
> + dev_warn(dev, "no MSI vector %d for the command queue\n",
> + RISCV_IOMMU_INTR_CQ);
> + goto fail;
> + }
> +
> + iommu->irq_fltq = msi_get_virq(dev, RISCV_IOMMU_INTR_FQ);
> + if (!iommu->irq_fltq) {
> + dev_warn(dev, "no MSI vector %d for the fault/event queue\n",
> + RISCV_IOMMU_INTR_FQ);
> + goto fail;
> + }
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
> + iommu->irq_pm = msi_get_virq(dev, RISCV_IOMMU_INTR_PM);
> + if (!iommu->irq_pm) {
> + dev_warn(dev,
> + "no MSI vector %d for performance monitoring\n",
> + RISCV_IOMMU_INTR_PM);
> + goto fail;
> + }
> + }
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
> + iommu->irq_priq = msi_get_virq(dev, RISCV_IOMMU_INTR_PQ);
> + if (!iommu->irq_priq) {
> + dev_warn(dev,
> + "no MSI vector %d for page-request queue\n",
> + RISCV_IOMMU_INTR_PQ);
> + goto fail;
> + }
> + }
> +
> + /* Set simple 1:1 mapping for MSI vectors */
> + icvec = FIELD_PREP(RISCV_IOMMU_IVEC_CIV, RISCV_IOMMU_INTR_CQ) |
> + FIELD_PREP(RISCV_IOMMU_IVEC_FIV, RISCV_IOMMU_INTR_FQ);
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_HPM)
> + icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PMIV, RISCV_IOMMU_INTR_PM);
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_ATS)
> + icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PIV, RISCV_IOMMU_INTR_PQ);
> +
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IVEC, icvec);
> +
> ret = riscv_iommu_init(iommu);
> if (!ret)
> return ret;
>
> fail:
> + pci_free_irq_vectors(pdev);
> pci_clear_master(pdev);
> pci_release_regions(pdev);
> pci_disable_device(pdev);
> @@ -85,6 +156,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
> static void riscv_iommu_pci_remove(struct pci_dev *pdev)
> {
> riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
> + pci_free_irq_vectors(pdev);
> pci_clear_master(pdev);
> pci_release_regions(pdev);
> pci_disable_device(pdev);
> diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
> index e4e8ca6711e7..35935d3c7ef4 100644
> --- a/drivers/iommu/riscv/iommu-platform.c
> +++ b/drivers/iommu/riscv/iommu-platform.c
> @@ -20,6 +20,8 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
> struct device *dev = &pdev->dev;
> struct riscv_iommu_device *iommu = NULL;
> struct resource *res = NULL;
> + u32 fctl = 0;
> + int irq = 0;
> int ret = 0;
>
> iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
> @@ -53,6 +55,70 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
> goto fail;
> }
>
> + iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
> +
> + /* For now we only support WSIs until we have AIA support */
> + ret = FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap);
> + if (ret == RISCV_IOMMU_CAP_IGS_MSI) {
> + dev_err(dev, "IOMMU only supports MSIs\n");
> + goto fail;
> + }
> +
> + /* Parse IRQ assignment */
> + irq = platform_get_irq_byname_optional(pdev, "cmdq");
> + if (irq > 0)
> + iommu->irq_cmdq = irq;
> + else {
> + dev_err(dev, "no IRQ provided for the command queue\n");
> + goto fail;
> + }
> +
> + irq = platform_get_irq_byname_optional(pdev, "fltq");
> + if (irq > 0)
> + iommu->irq_fltq = irq;
> + else {
> + dev_err(dev, "no IRQ provided for the fault/event queue\n");
> + goto fail;
> + }
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
> + irq = platform_get_irq_byname_optional(pdev, "pm");
> + if (irq > 0)
> + iommu->irq_pm = irq;
> + else {
> + dev_err(dev, "no IRQ provided for performance monitoring\n");
> + goto fail;
> + }
> + }
> +
> + if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
> + irq = platform_get_irq_byname_optional(pdev, "priq");
> + if (irq > 0)
> + iommu->irq_priq = irq;
> + else {
> + dev_err(dev, "no IRQ provided for the page-request queue\n");
> + goto fail;
> + }
> + }
> +
> + /* Make sure fctl.WSI is set */
> + fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
> + fctl |= RISCV_IOMMU_FCTL_WSI;
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, fctl);
> +
> + /* Parse Queue lengts */
> + ret = of_property_read_u32(pdev->dev.of_node, "cmdq_len", &iommu->cmdq_len);
> + if (!ret)
> + dev_info(dev, "command queue length set to %i\n", iommu->cmdq_len);
> +
> + ret = of_property_read_u32(pdev->dev.of_node, "fltq_len", &iommu->fltq_len);
> + if (!ret)
> + dev_info(dev, "fault/event queue length set to %i\n", iommu->fltq_len);
> +
> + ret = of_property_read_u32(pdev->dev.of_node, "priq_len", &iommu->priq_len);
> + if (!ret)
> + dev_info(dev, "page request queue length set to %i\n", iommu->priq_len);
These properties are not documented in the binding, but are clearly
Linux-specific driver policy which does not belong in DT anyway.
> +
> dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>
> return riscv_iommu_init(iommu);
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index 31dc3c458e13..5c4cf9875302 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -45,6 +45,18 @@ static int ddt_mode = RISCV_IOMMU_DDTP_MODE_BARE;
> module_param(ddt_mode, int, 0644);
> MODULE_PARM_DESC(ddt_mode, "Device Directory Table mode.");
>
> +static int cmdq_length = 1024;
> +module_param(cmdq_length, int, 0644);
> +MODULE_PARM_DESC(cmdq_length, "Command queue length.");
> +
> +static int fltq_length = 1024;
> +module_param(fltq_length, int, 0644);
> +MODULE_PARM_DESC(fltq_length, "Fault queue length.");
> +
> +static int priq_length = 1024;
> +module_param(priq_length, int, 0644);
> +MODULE_PARM_DESC(priq_length, "Page request interface queue length.");
> +
> /* IOMMU PSCID allocation namespace. */
> #define RISCV_IOMMU_MAX_PSCID (1U << 20)
> static DEFINE_IDA(riscv_iommu_pscids);
> @@ -65,6 +77,497 @@ static DEFINE_IDA(riscv_iommu_pscids);
> static const struct iommu_domain_ops riscv_iommu_domain_ops;
> static const struct iommu_ops riscv_iommu_ops;
>
> +/*
> + * Common queue management routines
> + */
> +
> +/* Note: offsets are the same for all queues */
> +#define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
> +#define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
> +
> +static unsigned riscv_iommu_queue_consume(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_queue *q, unsigned *ready)
> +{
> + u32 tail = riscv_iommu_readl(iommu, Q_TAIL(q));
> + *ready = q->lui;
> +
> + BUG_ON(q->cnt <= tail);
> + if (q->lui <= tail)
> + return tail - q->lui;
> + return q->cnt - q->lui;
> +}
> +
> +static void riscv_iommu_queue_release(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_queue *q, unsigned count)
> +{
> + q->lui = (q->lui + count) & (q->cnt - 1);
> + riscv_iommu_writel(iommu, Q_HEAD(q), q->lui);
> +}
> +
> +static u32 riscv_iommu_queue_ctrl(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_queue *q, u32 val)
> +{
> + cycles_t end_cycles = RISCV_IOMMU_TIMEOUT + get_cycles();
> +
> + riscv_iommu_writel(iommu, q->qcr, val);
> + do {
> + val = riscv_iommu_readl(iommu, q->qcr);
> + if (!(val & RISCV_IOMMU_QUEUE_BUSY))
> + break;
> + cpu_relax();
> + } while (get_cycles() < end_cycles);
> +
> + return val;
> +}
> +
> +static void riscv_iommu_queue_free(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_queue *q)
> +{
> + size_t size = q->len * q->cnt;
> +
> + riscv_iommu_queue_ctrl(iommu, q, 0);
> +
> + if (q->base) {
> + if (q->in_iomem)
> + iounmap(q->base);
> + else
> + dmam_free_coherent(iommu->dev, size, q->base, q->base_dma);
> + }
> + if (q->irq)
> + free_irq(q->irq, q);
> +}
> +
> +static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data);
> +static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data);
> +static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data);
> +static irqreturn_t riscv_iommu_fltq_process(int irq, void *data);
> +static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data);
> +static irqreturn_t riscv_iommu_priq_process(int irq, void *data);
> +
> +static int riscv_iommu_queue_init(struct riscv_iommu_device *iommu, int queue_id)
> +{
> + struct device *dev = iommu->dev;
> + struct riscv_iommu_queue *q = NULL;
> + size_t queue_size = 0;
> + irq_handler_t irq_check;
> + irq_handler_t irq_process;
> + const char *name;
> + int count = 0;
> + int irq = 0;
> + unsigned order = 0;
> + u64 qbr_val = 0;
> + u64 qbr_readback = 0;
> + u64 qbr_paddr = 0;
> + int ret = 0;
> +
> + switch (queue_id) {
> + case RISCV_IOMMU_COMMAND_QUEUE:
> + q = &iommu->cmdq;
> + q->len = sizeof(struct riscv_iommu_command);
> + count = iommu->cmdq_len;
> + irq = iommu->irq_cmdq;
> + irq_check = riscv_iommu_cmdq_irq_check;
> + irq_process = riscv_iommu_cmdq_process;
> + q->qbr = RISCV_IOMMU_REG_CQB;
> + q->qcr = RISCV_IOMMU_REG_CQCSR;
> + name = "cmdq";
> + break;
> + case RISCV_IOMMU_FAULT_QUEUE:
> + q = &iommu->fltq;
> + q->len = sizeof(struct riscv_iommu_fq_record);
> + count = iommu->fltq_len;
> + irq = iommu->irq_fltq;
> + irq_check = riscv_iommu_fltq_irq_check;
> + irq_process = riscv_iommu_fltq_process;
> + q->qbr = RISCV_IOMMU_REG_FQB;
> + q->qcr = RISCV_IOMMU_REG_FQCSR;
> + name = "fltq";
> + break;
> + case RISCV_IOMMU_PAGE_REQUEST_QUEUE:
> + q = &iommu->priq;
> + q->len = sizeof(struct riscv_iommu_pq_record);
> + count = iommu->priq_len;
> + irq = iommu->irq_priq;
> + irq_check = riscv_iommu_priq_irq_check;
> + irq_process = riscv_iommu_priq_process;
> + q->qbr = RISCV_IOMMU_REG_PQB;
> + q->qcr = RISCV_IOMMU_REG_PQCSR;
> + name = "priq";
> + break;
> + default:
> + dev_err(dev, "invalid queue interrupt index in queue_init!\n");
> + return -EINVAL;
> + }
> +
> + /* Polling not implemented */
> + if (!irq)
> + return -ENODEV;
> +
> + /* Allocate queue in memory and set the base register */
> + order = ilog2(count);
> + do {
> + queue_size = q->len * (1ULL << order);
> + q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);
> + if (q->base || queue_size < PAGE_SIZE)
> + break;
> +
> + order--;
> + } while (1);
> +
> + if (!q->base) {
> + dev_err(dev, "failed to allocate %s queue (cnt: %u)\n", name, count);
> + return -ENOMEM;
> + }
> +
> + q->cnt = 1ULL << order;
> +
> + qbr_val = phys_to_ppn(q->base_dma) |
> + FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
> +
> + riscv_iommu_writeq(iommu, q->qbr, qbr_val);
> +
> + /*
> + * Queue base registers are WARL, so it's possible that whatever we wrote
> + * there was illegal/not supported by the hw in which case we need to make
> + * sure we set a supported PPN and/or queue size.
> + */
> + qbr_readback = riscv_iommu_readq(iommu, q->qbr);
> + if (qbr_readback == qbr_val)
> + goto irq;
> +
> + dmam_free_coherent(dev, queue_size, q->base, q->base_dma);
> +
> + /* Get supported queue size */
> + order = FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qbr_readback) + 1;
> + q->cnt = 1ULL << order;
> + queue_size = q->len * q->cnt;
Um... What? We allocate an arbitrarily-sized queue, free it again,
*then* check what the hardware actually supports, and maybe allocate
another queue? I can't help thinking there's a much better way...
> +
> + /*
> + * In case we also failed to set PPN, it means the field is hardcoded and the
> + * queue resides in I/O memory instead, so get its physical address and
> + * ioremap it.
> + */
> + qbr_paddr = ppn_to_phys(qbr_readback);
> + if (qbr_paddr != q->base_dma) {
> + dev_info(dev,
> + "hardcoded ppn in %s base register, using io memory for the queue\n",
> + name);
> + dev_info(dev, "queue length for %s set to %i\n", name, q->cnt);
> + q->in_iomem = true;
> + q->base = ioremap(qbr_paddr, queue_size);
> + if (!q->base) {
> + dev_err(dev, "failed to map %s queue (cnt: %u)\n", name, q->cnt);
> + return -ENOMEM;
> + }
> + q->base_dma = qbr_paddr;
> + } else {
> + /*
> + * We only failed to set the queue size, re-try to allocate memory with
> + * the queue size supported by the hw.
> + */
> + dev_info(dev, "hardcoded queue size in %s base register\n", name);
> + dev_info(dev, "retrying with queue length: %i\n", q->cnt);
> + q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);
Note that dma_alloc_coherent only guarantees natural alignment here, so
if you need a minimum alignment of 4KB as the spec claims you should
really make clamp your minimum allocation size to that.
> + if (!q->base) {
> + dev_err(dev, "failed to allocate %s queue (cnt: %u)\n",
> + name, q->cnt);
> + return -ENOMEM;
> + }
> + }
> +
> + qbr_val = phys_to_ppn(q->base_dma) |
> + FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
> + riscv_iommu_writeq(iommu, q->qbr, qbr_val);
> +
> + /* Final check to make sure hw accepted our write */
> + qbr_readback = riscv_iommu_readq(iommu, q->qbr);
> + if (qbr_readback != qbr_val) {
> + dev_err(dev, "failed to set base register for %s\n", name);
> + goto fail;
> + }
> +
> + irq:
> + if (request_threaded_irq(irq, irq_check, irq_process, IRQF_ONESHOT | IRQF_SHARED,
> + dev_name(dev), q)) {
> + dev_err(dev, "fail to request irq %d for %s\n", irq, name);
> + goto fail;
> + }
> +
> + q->irq = irq;
> +
> + /* Note: All RIO_xQ_EN/IE fields are in the same offsets */
> + ret =
> + riscv_iommu_queue_ctrl(iommu, q,
> + RISCV_IOMMU_QUEUE_ENABLE |
> + RISCV_IOMMU_QUEUE_INTR_ENABLE);
> + if (ret & RISCV_IOMMU_QUEUE_BUSY) {
> + dev_err(dev, "%s init timeout\n", name);
> + ret = -EBUSY;
> + goto fail;
> + }
> +
> + return 0;
> +
> + fail:
> + riscv_iommu_queue_free(iommu, q);
> + return 0;
> +}
> +
> +/*
> + * I/O MMU Command queue chapter 3.1
> + */
> +
> +static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
> +{
> + cmd->dword0 =
> + FIELD_PREP(RISCV_IOMMU_CMD_OPCODE,
> + RISCV_IOMMU_CMD_IOTINVAL_OPCODE) | FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
> + RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
Interesting indentation... :/
> + cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
> + u64 addr)
> +{
> + cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
> + cmd->dword1 = addr;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
> + unsigned pscid)
> +{
> + cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
> + RISCV_IOMMU_CMD_IOTINVAL_PSCV;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
> + unsigned gscid)
> +{
> + cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
> + RISCV_IOMMU_CMD_IOTINVAL_GV;
> +}
> +
> +static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
> +{
> + cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> + FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C);
> + cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
> + u64 addr, u32 data)
> +{
> + cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> + FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
> + FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) | RISCV_IOMMU_CMD_IOFENCE_AV;
> + cmd->dword1 = (addr >> 2);
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
> +{
> + cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> + FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
> + cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
> +{
> + cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> + FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
> + cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
> + unsigned devid)
> +{
> + cmd->dword0 |=
> + FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) | RISCV_IOMMU_CMD_IODIR_DV;
> +}
> +
> +/* TODO: Convert into lock-less MPSC implementation. */
> +static bool riscv_iommu_post_sync(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_command *cmd, bool sync)
> +{
> + u32 head, tail, next, last;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&iommu->cq_lock, flags);
> + head = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) & (iommu->cmdq.cnt - 1);
> + tail = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
> + last = iommu->cmdq.lui;
> + if (tail != last) {
> + spin_unlock_irqrestore(&iommu->cq_lock, flags);
> + /*
> + * FIXME: This is a workaround for dropped MMIO writes/reads on QEMU platform.
> + * While debugging of the problem is still ongoing, this provides
> + * a simple impolementation of try-again policy.
> + * Will be changed to lock-less algorithm in the feature.
> + */
> + dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (1st)\n", last, tail);
> + spin_lock_irqsave(&iommu->cq_lock, flags);
> + tail =
> + riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
> + last = iommu->cmdq.lui;
> + if (tail != last) {
> + spin_unlock_irqrestore(&iommu->cq_lock, flags);
> + dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (2nd)\n", last, tail);
> + spin_lock_irqsave(&iommu->cq_lock, flags);
> + }
> + }
> +
> + next = (last + 1) & (iommu->cmdq.cnt - 1);
> + if (next != head) {
> + struct riscv_iommu_command *ptr = iommu->cmdq.base;
> + ptr[last] = *cmd;
> + wmb();
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQT, next);
> + iommu->cmdq.lui = next;
> + }
> +
> + spin_unlock_irqrestore(&iommu->cq_lock, flags);
> +
> + if (sync && head != next) {
> + cycles_t start_time = get_cycles();
> + while (1) {
> + last = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) &
> + (iommu->cmdq.cnt - 1);
> + if (head < next && last >= next)
> + break;
> + if (head > next && last < head && last >= next)
> + break;
> + if (RISCV_IOMMU_TIMEOUT < (get_cycles() - start_time)) {
> + dev_err(iommu->dev, "IOFENCE TIMEOUT\n");
> + return false;
> + }
> + cpu_relax();
> + }
> + }
> +
> + return next != head;
> +}
> +
> +static bool riscv_iommu_post(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_command *cmd)
> +{
> + return riscv_iommu_post_sync(iommu, cmd, false);
> +}
> +
> +static bool riscv_iommu_iofence_sync(struct riscv_iommu_device *iommu)
> +{
> + struct riscv_iommu_command cmd;
> + riscv_iommu_cmd_iofence(&cmd);
> + return riscv_iommu_post_sync(iommu, &cmd, true);
> +}
> +
> +/* Command queue primary interrupt handler */
> +static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu =
> + container_of(q, struct riscv_iommu_device, cmdq);
> + u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
> + if (ipsr & RISCV_IOMMU_IPSR_CIP)
> + return IRQ_WAKE_THREAD;
> + return IRQ_NONE;
> +}
> +
> +/* Command queue interrupt hanlder thread function */
> +static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu;
> + unsigned ctrl;
> +
> + iommu = container_of(q, struct riscv_iommu_device, cmdq);
> +
> + /* Error reporting, clear error reports if any. */
> + ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQCSR);
> + if (ctrl & (RISCV_IOMMU_CQCSR_CQMF |
> + RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL)) {
> + riscv_iommu_queue_ctrl(iommu, &iommu->cmdq, ctrl);
> + dev_warn_ratelimited(iommu->dev,
> + "Command queue error: fault: %d tout: %d err: %d\n",
> + !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
> + !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
> + !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL));
> + }
> +
> + /* Clear fault interrupt pending. */
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_CIP);
> +
> + return IRQ_HANDLED;
> +}
> +
> +/*
> + * Fault/event queue, chapter 3.2
> + */
> +
> +static void riscv_iommu_fault_report(struct riscv_iommu_device *iommu,
> + struct riscv_iommu_fq_record *event)
> +{
> + unsigned err, devid;
> +
> + err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
> + devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
> +
> + dev_warn_ratelimited(iommu->dev,
> + "Fault %d devid: %d" " iotval: %llx iotval2: %llx\n", err,
> + devid, event->iotval, event->iotval2);
> +}
> +
> +/* Fault/event queue primary interrupt handler */
> +static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu =
> + container_of(q, struct riscv_iommu_device, fltq);
> + u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
> + if (ipsr & RISCV_IOMMU_IPSR_FIP)
> + return IRQ_WAKE_THREAD;
> + return IRQ_NONE;
> +}
> +
> +/* Fault queue interrupt hanlder thread function */
> +static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu;
> + struct riscv_iommu_fq_record *events;
> + unsigned cnt, len, idx, ctrl;
> +
> + iommu = container_of(q, struct riscv_iommu_device, fltq);
> + events = (struct riscv_iommu_fq_record *)q->base;
> +
> + /* Error reporting, clear error reports if any. */
> + ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FQCSR);
> + if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
> + riscv_iommu_queue_ctrl(iommu, &iommu->fltq, ctrl);
> + dev_warn_ratelimited(iommu->dev,
> + "Fault queue error: fault: %d full: %d\n",
> + !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
> + !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
> + }
> +
> + /* Clear fault interrupt pending. */
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_FIP);
> +
> + /* Report fault events. */
> + do {
> + cnt = riscv_iommu_queue_consume(iommu, q, &idx);
> + if (!cnt)
> + break;
> + for (len = 0; len < cnt; idx++, len++)
> + riscv_iommu_fault_report(iommu, &events[idx]);
> + riscv_iommu_queue_release(iommu, q, cnt);
> + } while (1);
> +
> + return IRQ_HANDLED;
> +}
> +
> +/*
> + * Page request queue, chapter 3.3
> + */
> +
> /*
> * Register device for IOMMU tracking.
> */
> @@ -97,6 +600,54 @@ static void riscv_iommu_add_device(struct riscv_iommu_device *iommu, struct devi
> mutex_unlock(&iommu->eps_mutex);
> }
>
> +/* Page request interface queue primary interrupt handler */
> +static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu =
> + container_of(q, struct riscv_iommu_device, priq);
> + u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
> + if (ipsr & RISCV_IOMMU_IPSR_PIP)
> + return IRQ_WAKE_THREAD;
> + return IRQ_NONE;
> +}
> +
> +/* Page request interface queue interrupt hanlder thread function */
> +static irqreturn_t riscv_iommu_priq_process(int irq, void *data)
> +{
> + struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
> + struct riscv_iommu_device *iommu;
> + struct riscv_iommu_pq_record *requests;
> + unsigned cnt, idx, ctrl;
> +
> + iommu = container_of(q, struct riscv_iommu_device, priq);
> + requests = (struct riscv_iommu_pq_record *)q->base;
> +
> + /* Error reporting, clear error reports if any. */
> + ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_PQCSR);
> + if (ctrl & (RISCV_IOMMU_PQCSR_PQMF | RISCV_IOMMU_PQCSR_PQOF)) {
> + riscv_iommu_queue_ctrl(iommu, &iommu->priq, ctrl);
> + dev_warn_ratelimited(iommu->dev,
> + "Page request queue error: fault: %d full: %d\n",
> + !!(ctrl & RISCV_IOMMU_PQCSR_PQMF),
> + !!(ctrl & RISCV_IOMMU_PQCSR_PQOF));
> + }
> +
> + /* Clear page request interrupt pending. */
> + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_PIP);
> +
> + /* Process page requests. */
> + do {
> + cnt = riscv_iommu_queue_consume(iommu, q, &idx);
> + if (!cnt)
> + break;
> + dev_warn(iommu->dev, "unexpected %u page requests\n", cnt);
> + riscv_iommu_queue_release(iommu, q, cnt);
> + } while (1);
> +
> + return IRQ_HANDLED;
> +}
> +
> /*
> * Endpoint management
> */
> @@ -350,7 +901,29 @@ static void riscv_iommu_flush_iotlb_range(struct iommu_domain *iommu_domain,
> unsigned long *start, unsigned long *end,
> size_t *pgsize)
> {
> - /* Command interface not implemented */
> + struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
> + struct riscv_iommu_command cmd;
> + unsigned long iova;
> +
> + if (domain->mode == RISCV_IOMMU_DC_FSC_MODE_BARE)
That should probably not happen - things shouldn't be calling TLB ops on
identity domains (and ideally your identity domains wouldn't even *have*
iotlb callbacks...)
> + return;
> +
> + /* Domain not attached to an IOMMU! */
> + BUG_ON(!domain->iommu);
However I'm not sure how iommu_create_device_direct_mappings() isn't
hitting that?
Thanks,
Robin.
> +
> + riscv_iommu_cmd_inval_vma(&cmd);
> + riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
> +
> + if (start && end && pgsize) {
> + /* Cover only the range that is needed */
> + for (iova = *start; iova <= *end; iova += *pgsize) {
> + riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> + riscv_iommu_post(domain->iommu, &cmd);
> + }
> + } else {
> + riscv_iommu_post(domain->iommu, &cmd);
> + }
> + riscv_iommu_iofence_sync(domain->iommu);
> }
>
> static void riscv_iommu_flush_iotlb_all(struct iommu_domain *iommu_domain)
> @@ -610,6 +1183,9 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
> iommu_device_unregister(&iommu->iommu);
> iommu_device_sysfs_remove(&iommu->iommu);
> riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> + riscv_iommu_queue_free(iommu, &iommu->cmdq);
> + riscv_iommu_queue_free(iommu, &iommu->fltq);
> + riscv_iommu_queue_free(iommu, &iommu->priq);
> }
>
> int riscv_iommu_init(struct riscv_iommu_device *iommu)
> @@ -632,6 +1208,16 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
> }
> #endif
>
> + /*
> + * Assign queue lengths from module parameters if not already
> + * set on the device tree.
> + */
> + if (!iommu->cmdq_len)
> + iommu->cmdq_len = cmdq_length;
> + if (!iommu->fltq_len)
> + iommu->fltq_len = fltq_length;
> + if (!iommu->priq_len)
> + iommu->priq_len = priq_length;
> /* Clear any pending interrupt flag. */
> riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR,
> RISCV_IOMMU_IPSR_CIP |
> @@ -639,7 +1225,20 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
> RISCV_IOMMU_IPSR_PMIP | RISCV_IOMMU_IPSR_PIP);
> spin_lock_init(&iommu->cq_lock);
> mutex_init(&iommu->eps_mutex);
> + ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_COMMAND_QUEUE);
> + if (ret)
> + goto fail;
> + ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_FAULT_QUEUE);
> + if (ret)
> + goto fail;
> + if (!(iommu->cap & RISCV_IOMMU_CAP_ATS))
> + goto no_ats;
> +
> + ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_PAGE_REQUEST_QUEUE);
> + if (ret)
> + goto fail;
>
> + no_ats:
> ret = riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_BARE);
>
> if (ret) {
> @@ -663,5 +1262,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
> return 0;
> fail:
> riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> + riscv_iommu_queue_free(iommu, &iommu->priq);
> + riscv_iommu_queue_free(iommu, &iommu->fltq);
> + riscv_iommu_queue_free(iommu, &iommu->cmdq);
> return ret;
> }
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index 7dc9baa59a50..04148a2a8ffd 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -28,6 +28,24 @@
> #define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
> #define IOMMU_PAGE_SIZE_512G BIT_ULL(39)
>
> +struct riscv_iommu_queue {
> + dma_addr_t base_dma; /* ring buffer bus address */
> + void *base; /* ring buffer pointer */
> + size_t len; /* single item length */
> + u32 cnt; /* items count */
> + u32 lui; /* last used index, consumer/producer share */
> + unsigned qbr; /* queue base register offset */
> + unsigned qcr; /* queue control and status register offset */
> + int irq; /* registered interrupt number */
> + bool in_iomem; /* indicates queue data are in I/O memory */
> +};
> +
> +enum riscv_queue_ids {
> + RISCV_IOMMU_COMMAND_QUEUE = 0,
> + RISCV_IOMMU_FAULT_QUEUE = 1,
> + RISCV_IOMMU_PAGE_REQUEST_QUEUE = 2
> +};
> +
> struct riscv_iommu_device {
> struct iommu_device iommu; /* iommu core interface */
> struct device *dev; /* iommu hardware */
> @@ -42,6 +60,11 @@ struct riscv_iommu_device {
> int irq_pm;
> int irq_priq;
>
> + /* Queue lengths */
> + int cmdq_len;
> + int fltq_len;
> + int priq_len;
> +
> /* supported and enabled hardware capabilities */
> u64 cap;
>
> @@ -53,6 +76,11 @@ struct riscv_iommu_device {
> unsigned ddt_mode;
> bool ddtp_in_iomem;
>
> + /* hardware queues */
> + struct riscv_iommu_queue cmdq;
> + struct riscv_iommu_queue fltq;
> + struct riscv_iommu_queue priq;
> +
> /* Connected end-points */
> struct rb_root eps;
> struct mutex eps_mutex;
More information about the linux-riscv
mailing list