[PATCH 06/11] RISC-V: drivers/iommu/riscv: Add command, fault, page-req queues

Tomasz Jeznach tjeznach at rivosinc.com
Wed Jul 19 12:33:50 PDT 2023


Enables message or wire signal interrupts for PCIe and platforms devices.

Co-developed-by: Nick Kossifidis <mick at ics.forth.gr>
Signed-off-by: Nick Kossifidis <mick at ics.forth.gr>
Signed-off-by: Tomasz Jeznach <tjeznach at rivosinc.com>
---
 drivers/iommu/riscv/iommu-pci.c      |  72 ++++
 drivers/iommu/riscv/iommu-platform.c |  66 +++
 drivers/iommu/riscv/iommu.c          | 604 ++++++++++++++++++++++++++-
 drivers/iommu/riscv/iommu.h          |  28 ++
 4 files changed, 769 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c
index c91f963d7a29..9ea0647f7b92 100644
--- a/drivers/iommu/riscv/iommu-pci.c
+++ b/drivers/iommu/riscv/iommu-pci.c
@@ -34,6 +34,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
 {
 	struct device *dev = &pdev->dev;
 	struct riscv_iommu_device *iommu;
+	u64 icvec;
 	int ret;
 
 	ret = pci_enable_device_mem(pdev);
@@ -67,14 +68,84 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
 	iommu->dev = dev;
 	dev_set_drvdata(dev, iommu);
 
+	/* Check device reported capabilities. */
+	iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
+
+	/* The PCI driver only uses MSIs, make sure the IOMMU supports this */
+	switch (FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap)) {
+	case RISCV_IOMMU_CAP_IGS_MSI:
+	case RISCV_IOMMU_CAP_IGS_BOTH:
+		break;
+	default:
+		dev_err(dev, "unable to use message-signaled interrupts\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
 	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	pci_set_master(pdev);
 
+	/* Allocate and assign IRQ vectors for the various events */
+	ret = pci_alloc_irq_vectors(pdev, 1, RISCV_IOMMU_INTR_COUNT, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(dev, "unable to allocate irq vectors\n");
+		goto fail;
+	}
+
+	ret = -ENODEV;
+
+	iommu->irq_cmdq = msi_get_virq(dev, RISCV_IOMMU_INTR_CQ);
+	if (!iommu->irq_cmdq) {
+		dev_warn(dev, "no MSI vector %d for the command queue\n",
+			 RISCV_IOMMU_INTR_CQ);
+		goto fail;
+	}
+
+	iommu->irq_fltq = msi_get_virq(dev, RISCV_IOMMU_INTR_FQ);
+	if (!iommu->irq_fltq) {
+		dev_warn(dev, "no MSI vector %d for the fault/event queue\n",
+			 RISCV_IOMMU_INTR_FQ);
+		goto fail;
+	}
+
+	if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
+		iommu->irq_pm = msi_get_virq(dev, RISCV_IOMMU_INTR_PM);
+		if (!iommu->irq_pm) {
+			dev_warn(dev,
+				 "no MSI vector %d for performance monitoring\n",
+				 RISCV_IOMMU_INTR_PM);
+			goto fail;
+		}
+	}
+
+	if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
+		iommu->irq_priq = msi_get_virq(dev, RISCV_IOMMU_INTR_PQ);
+		if (!iommu->irq_priq) {
+			dev_warn(dev,
+				 "no MSI vector %d for page-request queue\n",
+				 RISCV_IOMMU_INTR_PQ);
+			goto fail;
+		}
+	}
+
+	/* Set simple 1:1 mapping for MSI vectors */
+	icvec = FIELD_PREP(RISCV_IOMMU_IVEC_CIV, RISCV_IOMMU_INTR_CQ) |
+	    FIELD_PREP(RISCV_IOMMU_IVEC_FIV, RISCV_IOMMU_INTR_FQ);
+
+	if (iommu->cap & RISCV_IOMMU_CAP_HPM)
+		icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PMIV, RISCV_IOMMU_INTR_PM);
+
+	if (iommu->cap & RISCV_IOMMU_CAP_ATS)
+		icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PIV, RISCV_IOMMU_INTR_PQ);
+
+	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IVEC, icvec);
+
 	ret = riscv_iommu_init(iommu);
 	if (!ret)
 		return ret;
 
  fail:
+	pci_free_irq_vectors(pdev);
 	pci_clear_master(pdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
@@ -85,6 +156,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
 static void riscv_iommu_pci_remove(struct pci_dev *pdev)
 {
 	riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
+	pci_free_irq_vectors(pdev);
 	pci_clear_master(pdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
index e4e8ca6711e7..35935d3c7ef4 100644
--- a/drivers/iommu/riscv/iommu-platform.c
+++ b/drivers/iommu/riscv/iommu-platform.c
@@ -20,6 +20,8 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct riscv_iommu_device *iommu = NULL;
 	struct resource *res = NULL;
+	u32 fctl = 0;
+	int irq = 0;
 	int ret = 0;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
@@ -53,6 +55,70 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
 		goto fail;
 	}
 
+	iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
+
+	/* For now we only support WSIs until we have AIA support */
+	ret = FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap);
+	if (ret == RISCV_IOMMU_CAP_IGS_MSI) {
+		dev_err(dev, "IOMMU only supports MSIs\n");
+		goto fail;
+	}
+
+	/* Parse IRQ assignment */
+	irq = platform_get_irq_byname_optional(pdev, "cmdq");
+	if (irq > 0)
+		iommu->irq_cmdq = irq;
+	else {
+		dev_err(dev, "no IRQ provided for the command queue\n");
+		goto fail;
+	}
+
+	irq = platform_get_irq_byname_optional(pdev, "fltq");
+	if (irq > 0)
+		iommu->irq_fltq = irq;
+	else {
+		dev_err(dev, "no IRQ provided for the fault/event queue\n");
+		goto fail;
+	}
+
+	if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
+		irq = platform_get_irq_byname_optional(pdev, "pm");
+		if (irq > 0)
+			iommu->irq_pm = irq;
+		else {
+			dev_err(dev, "no IRQ provided for performance monitoring\n");
+			goto fail;
+		}
+	}
+
+	if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
+		irq = platform_get_irq_byname_optional(pdev, "priq");
+		if (irq > 0)
+			iommu->irq_priq = irq;
+		else {
+			dev_err(dev, "no IRQ provided for the page-request queue\n");
+			goto fail;
+		}
+	}
+
+	/* Make sure fctl.WSI is set */
+	fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
+	fctl |= RISCV_IOMMU_FCTL_WSI;
+	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, fctl);
+
+	/* Parse Queue lengts */
+	ret = of_property_read_u32(pdev->dev.of_node, "cmdq_len", &iommu->cmdq_len);
+	if (!ret)
+		dev_info(dev, "command queue length set to %i\n", iommu->cmdq_len);
+
+	ret = of_property_read_u32(pdev->dev.of_node, "fltq_len", &iommu->fltq_len);
+	if (!ret)
+		dev_info(dev, "fault/event queue length set to %i\n", iommu->fltq_len);
+
+	ret = of_property_read_u32(pdev->dev.of_node, "priq_len", &iommu->priq_len);
+	if (!ret)
+		dev_info(dev, "page request queue length set to %i\n", iommu->priq_len);
+
 	dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 
 	return riscv_iommu_init(iommu);
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 31dc3c458e13..5c4cf9875302 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -45,6 +45,18 @@ static int ddt_mode = RISCV_IOMMU_DDTP_MODE_BARE;
 module_param(ddt_mode, int, 0644);
 MODULE_PARM_DESC(ddt_mode, "Device Directory Table mode.");
 
+static int cmdq_length = 1024;
+module_param(cmdq_length, int, 0644);
+MODULE_PARM_DESC(cmdq_length, "Command queue length.");
+
+static int fltq_length = 1024;
+module_param(fltq_length, int, 0644);
+MODULE_PARM_DESC(fltq_length, "Fault queue length.");
+
+static int priq_length = 1024;
+module_param(priq_length, int, 0644);
+MODULE_PARM_DESC(priq_length, "Page request interface queue length.");
+
 /* IOMMU PSCID allocation namespace. */
 #define RISCV_IOMMU_MAX_PSCID	(1U << 20)
 static DEFINE_IDA(riscv_iommu_pscids);
@@ -65,6 +77,497 @@ static DEFINE_IDA(riscv_iommu_pscids);
 static const struct iommu_domain_ops riscv_iommu_domain_ops;
 static const struct iommu_ops riscv_iommu_ops;
 
+/*
+ * Common queue management routines
+ */
+
+/* Note: offsets are the same for all queues */
+#define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
+#define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
+
+static unsigned riscv_iommu_queue_consume(struct riscv_iommu_device *iommu,
+					  struct riscv_iommu_queue *q, unsigned *ready)
+{
+	u32 tail = riscv_iommu_readl(iommu, Q_TAIL(q));
+	*ready = q->lui;
+
+	BUG_ON(q->cnt <= tail);
+	if (q->lui <= tail)
+		return tail - q->lui;
+	return q->cnt - q->lui;
+}
+
+static void riscv_iommu_queue_release(struct riscv_iommu_device *iommu,
+				      struct riscv_iommu_queue *q, unsigned count)
+{
+	q->lui = (q->lui + count) & (q->cnt - 1);
+	riscv_iommu_writel(iommu, Q_HEAD(q), q->lui);
+}
+
+static u32 riscv_iommu_queue_ctrl(struct riscv_iommu_device *iommu,
+				  struct riscv_iommu_queue *q, u32 val)
+{
+	cycles_t end_cycles = RISCV_IOMMU_TIMEOUT + get_cycles();
+
+	riscv_iommu_writel(iommu, q->qcr, val);
+	do {
+		val = riscv_iommu_readl(iommu, q->qcr);
+		if (!(val & RISCV_IOMMU_QUEUE_BUSY))
+			break;
+		cpu_relax();
+	} while (get_cycles() < end_cycles);
+
+	return val;
+}
+
+static void riscv_iommu_queue_free(struct riscv_iommu_device *iommu,
+				   struct riscv_iommu_queue *q)
+{
+	size_t size = q->len * q->cnt;
+
+	riscv_iommu_queue_ctrl(iommu, q, 0);
+
+	if (q->base) {
+		if (q->in_iomem)
+			iounmap(q->base);
+		else
+			dmam_free_coherent(iommu->dev, size, q->base, q->base_dma);
+	}
+	if (q->irq)
+		free_irq(q->irq, q);
+}
+
+static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data);
+static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_fltq_process(int irq, void *data);
+static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_priq_process(int irq, void *data);
+
+static int riscv_iommu_queue_init(struct riscv_iommu_device *iommu, int queue_id)
+{
+	struct device *dev = iommu->dev;
+	struct riscv_iommu_queue *q = NULL;
+	size_t queue_size = 0;
+	irq_handler_t irq_check;
+	irq_handler_t irq_process;
+	const char *name;
+	int count = 0;
+	int irq = 0;
+	unsigned order = 0;
+	u64 qbr_val = 0;
+	u64 qbr_readback = 0;
+	u64 qbr_paddr = 0;
+	int ret = 0;
+
+	switch (queue_id) {
+	case RISCV_IOMMU_COMMAND_QUEUE:
+		q = &iommu->cmdq;
+		q->len = sizeof(struct riscv_iommu_command);
+		count = iommu->cmdq_len;
+		irq = iommu->irq_cmdq;
+		irq_check = riscv_iommu_cmdq_irq_check;
+		irq_process = riscv_iommu_cmdq_process;
+		q->qbr = RISCV_IOMMU_REG_CQB;
+		q->qcr = RISCV_IOMMU_REG_CQCSR;
+		name = "cmdq";
+		break;
+	case RISCV_IOMMU_FAULT_QUEUE:
+		q = &iommu->fltq;
+		q->len = sizeof(struct riscv_iommu_fq_record);
+		count = iommu->fltq_len;
+		irq = iommu->irq_fltq;
+		irq_check = riscv_iommu_fltq_irq_check;
+		irq_process = riscv_iommu_fltq_process;
+		q->qbr = RISCV_IOMMU_REG_FQB;
+		q->qcr = RISCV_IOMMU_REG_FQCSR;
+		name = "fltq";
+		break;
+	case RISCV_IOMMU_PAGE_REQUEST_QUEUE:
+		q = &iommu->priq;
+		q->len = sizeof(struct riscv_iommu_pq_record);
+		count = iommu->priq_len;
+		irq = iommu->irq_priq;
+		irq_check = riscv_iommu_priq_irq_check;
+		irq_process = riscv_iommu_priq_process;
+		q->qbr = RISCV_IOMMU_REG_PQB;
+		q->qcr = RISCV_IOMMU_REG_PQCSR;
+		name = "priq";
+		break;
+	default:
+		dev_err(dev, "invalid queue interrupt index in queue_init!\n");
+		return -EINVAL;
+	}
+
+	/* Polling not implemented */
+	if (!irq)
+		return -ENODEV;
+
+	/* Allocate queue in memory and set the base register */
+	order = ilog2(count);
+	do {
+		queue_size = q->len * (1ULL << order);
+		q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);
+		if (q->base || queue_size < PAGE_SIZE)
+			break;
+
+		order--;
+	} while (1);
+
+	if (!q->base) {
+		dev_err(dev, "failed to allocate %s queue (cnt: %u)\n", name, count);
+		return -ENOMEM;
+	}
+
+	q->cnt = 1ULL << order;
+
+	qbr_val = phys_to_ppn(q->base_dma) |
+	    FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
+
+	riscv_iommu_writeq(iommu, q->qbr, qbr_val);
+
+	/*
+	 * Queue base registers are WARL, so it's possible that whatever we wrote
+	 * there was illegal/not supported by the hw in which case we need to make
+	 * sure we set a supported PPN and/or queue size.
+	 */
+	qbr_readback = riscv_iommu_readq(iommu, q->qbr);
+	if (qbr_readback == qbr_val)
+		goto irq;
+
+	dmam_free_coherent(dev, queue_size, q->base, q->base_dma);
+
+	/* Get supported queue size */
+	order = FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qbr_readback) + 1;
+	q->cnt = 1ULL << order;
+	queue_size = q->len * q->cnt;
+
+	/*
+	 * In case we also failed to set PPN, it means the field is hardcoded and the
+	 * queue resides in I/O memory instead, so get its physical address and
+	 * ioremap it.
+	 */
+	qbr_paddr = ppn_to_phys(qbr_readback);
+	if (qbr_paddr != q->base_dma) {
+		dev_info(dev,
+			 "hardcoded ppn in %s base register, using io memory for the queue\n",
+			 name);
+		dev_info(dev, "queue length for %s set to %i\n", name, q->cnt);
+		q->in_iomem = true;
+		q->base = ioremap(qbr_paddr, queue_size);
+		if (!q->base) {
+			dev_err(dev, "failed to map %s queue (cnt: %u)\n", name, q->cnt);
+			return -ENOMEM;
+		}
+		q->base_dma = qbr_paddr;
+	} else {
+		/*
+		 * We only failed to set the queue size, re-try to allocate memory with
+		 * the queue size supported by the hw.
+		 */
+		dev_info(dev, "hardcoded queue size in %s base register\n", name);
+		dev_info(dev, "retrying with queue length: %i\n", q->cnt);
+		q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);
+		if (!q->base) {
+			dev_err(dev, "failed to allocate %s queue (cnt: %u)\n",
+				name, q->cnt);
+			return -ENOMEM;
+		}
+	}
+
+	qbr_val = phys_to_ppn(q->base_dma) |
+	    FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
+	riscv_iommu_writeq(iommu, q->qbr, qbr_val);
+
+	/* Final check to make sure hw accepted our write */
+	qbr_readback = riscv_iommu_readq(iommu, q->qbr);
+	if (qbr_readback != qbr_val) {
+		dev_err(dev, "failed to set base register for %s\n", name);
+		goto fail;
+	}
+
+ irq:
+	if (request_threaded_irq(irq, irq_check, irq_process, IRQF_ONESHOT | IRQF_SHARED,
+				 dev_name(dev), q)) {
+		dev_err(dev, "fail to request irq %d for %s\n", irq, name);
+		goto fail;
+	}
+
+	q->irq = irq;
+
+	/* Note: All RIO_xQ_EN/IE fields are in the same offsets */
+	ret =
+	    riscv_iommu_queue_ctrl(iommu, q,
+				   RISCV_IOMMU_QUEUE_ENABLE |
+				   RISCV_IOMMU_QUEUE_INTR_ENABLE);
+	if (ret & RISCV_IOMMU_QUEUE_BUSY) {
+		dev_err(dev, "%s init timeout\n", name);
+		ret = -EBUSY;
+		goto fail;
+	}
+
+	return 0;
+
+ fail:
+	riscv_iommu_queue_free(iommu, q);
+	return 0;
+}
+
+/*
+ * I/O MMU Command queue chapter 3.1
+ */
+
+static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
+{
+	cmd->dword0 =
+	    FIELD_PREP(RISCV_IOMMU_CMD_OPCODE,
+		       RISCV_IOMMU_CMD_IOTINVAL_OPCODE) | FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
+								     RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
+	cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
+						  u64 addr)
+{
+	cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
+	cmd->dword1 = addr;
+}
+
+static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
+						   unsigned pscid)
+{
+	cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
+	    RISCV_IOMMU_CMD_IOTINVAL_PSCV;
+}
+
+static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
+						   unsigned gscid)
+{
+	cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
+	    RISCV_IOMMU_CMD_IOTINVAL_GV;
+}
+
+static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
+{
+	cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+	    FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C);
+	cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
+						  u64 addr, u32 data)
+{
+	cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+	    FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
+	    FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) | RISCV_IOMMU_CMD_IOFENCE_AV;
+	cmd->dword1 = (addr >> 2);
+}
+
+static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
+{
+	cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+	    FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
+	cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
+{
+	cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+	    FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
+	cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
+						 unsigned devid)
+{
+	cmd->dword0 |=
+	    FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) | RISCV_IOMMU_CMD_IODIR_DV;
+}
+
+/* TODO: Convert into lock-less MPSC implementation. */
+static bool riscv_iommu_post_sync(struct riscv_iommu_device *iommu,
+				  struct riscv_iommu_command *cmd, bool sync)
+{
+	u32 head, tail, next, last;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu->cq_lock, flags);
+	head = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) & (iommu->cmdq.cnt - 1);
+	tail = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
+	last = iommu->cmdq.lui;
+	if (tail != last) {
+		spin_unlock_irqrestore(&iommu->cq_lock, flags);
+		/*
+		 * FIXME: This is a workaround for dropped MMIO writes/reads on QEMU platform.
+		 *        While debugging of the problem is still ongoing, this provides
+		 *        a simple impolementation of try-again policy.
+		 *        Will be changed to lock-less algorithm in the feature.
+		 */
+		dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (1st)\n", last, tail);
+		spin_lock_irqsave(&iommu->cq_lock, flags);
+		tail =
+		    riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
+		last = iommu->cmdq.lui;
+		if (tail != last) {
+			spin_unlock_irqrestore(&iommu->cq_lock, flags);
+			dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (2nd)\n", last, tail);
+			spin_lock_irqsave(&iommu->cq_lock, flags);
+		}
+	}
+
+	next = (last + 1) & (iommu->cmdq.cnt - 1);
+	if (next != head) {
+		struct riscv_iommu_command *ptr = iommu->cmdq.base;
+		ptr[last] = *cmd;
+		wmb();
+		riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQT, next);
+		iommu->cmdq.lui = next;
+	}
+
+	spin_unlock_irqrestore(&iommu->cq_lock, flags);
+
+	if (sync && head != next) {
+		cycles_t start_time = get_cycles();
+		while (1) {
+			last = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) &
+			    (iommu->cmdq.cnt - 1);
+			if (head < next && last >= next)
+				break;
+			if (head > next && last < head && last >= next)
+				break;
+			if (RISCV_IOMMU_TIMEOUT < (get_cycles() - start_time)) {
+				dev_err(iommu->dev, "IOFENCE TIMEOUT\n");
+				return false;
+			}
+			cpu_relax();
+		}
+	}
+
+	return next != head;
+}
+
+static bool riscv_iommu_post(struct riscv_iommu_device *iommu,
+			     struct riscv_iommu_command *cmd)
+{
+	return riscv_iommu_post_sync(iommu, cmd, false);
+}
+
+static bool riscv_iommu_iofence_sync(struct riscv_iommu_device *iommu)
+{
+	struct riscv_iommu_command cmd;
+	riscv_iommu_cmd_iofence(&cmd);
+	return riscv_iommu_post_sync(iommu, &cmd, true);
+}
+
+/* Command queue primary interrupt handler */
+static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu =
+	    container_of(q, struct riscv_iommu_device, cmdq);
+	u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+	if (ipsr & RISCV_IOMMU_IPSR_CIP)
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
+}
+
+/* Command queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu;
+	unsigned ctrl;
+
+	iommu = container_of(q, struct riscv_iommu_device, cmdq);
+
+	/* Error reporting, clear error reports if any. */
+	ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQCSR);
+	if (ctrl & (RISCV_IOMMU_CQCSR_CQMF |
+		    RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL)) {
+		riscv_iommu_queue_ctrl(iommu, &iommu->cmdq, ctrl);
+		dev_warn_ratelimited(iommu->dev,
+				     "Command queue error: fault: %d tout: %d err: %d\n",
+				     !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
+				     !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
+				     !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL));
+	}
+
+	/* Clear fault interrupt pending. */
+	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_CIP);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Fault/event queue, chapter 3.2
+ */
+
+static void riscv_iommu_fault_report(struct riscv_iommu_device *iommu,
+				     struct riscv_iommu_fq_record *event)
+{
+	unsigned err, devid;
+
+	err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
+	devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
+
+	dev_warn_ratelimited(iommu->dev,
+			     "Fault %d devid: %d" " iotval: %llx iotval2: %llx\n", err,
+			     devid, event->iotval, event->iotval2);
+}
+
+/* Fault/event queue primary interrupt handler */
+static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu =
+	    container_of(q, struct riscv_iommu_device, fltq);
+	u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+	if (ipsr & RISCV_IOMMU_IPSR_FIP)
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
+}
+
+/* Fault queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu;
+	struct riscv_iommu_fq_record *events;
+	unsigned cnt, len, idx, ctrl;
+
+	iommu = container_of(q, struct riscv_iommu_device, fltq);
+	events = (struct riscv_iommu_fq_record *)q->base;
+
+	/* Error reporting, clear error reports if any. */
+	ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FQCSR);
+	if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
+		riscv_iommu_queue_ctrl(iommu, &iommu->fltq, ctrl);
+		dev_warn_ratelimited(iommu->dev,
+				     "Fault queue error: fault: %d full: %d\n",
+				     !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
+				     !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
+	}
+
+	/* Clear fault interrupt pending. */
+	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_FIP);
+
+	/* Report fault events. */
+	do {
+		cnt = riscv_iommu_queue_consume(iommu, q, &idx);
+		if (!cnt)
+			break;
+		for (len = 0; len < cnt; idx++, len++)
+			riscv_iommu_fault_report(iommu, &events[idx]);
+		riscv_iommu_queue_release(iommu, q, cnt);
+	} while (1);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Page request queue, chapter 3.3
+ */
+
 /*
  * Register device for IOMMU tracking.
  */
@@ -97,6 +600,54 @@ static void riscv_iommu_add_device(struct riscv_iommu_device *iommu, struct devi
 	mutex_unlock(&iommu->eps_mutex);
 }
 
+/* Page request interface queue primary interrupt handler */
+static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu =
+	    container_of(q, struct riscv_iommu_device, priq);
+	u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+	if (ipsr & RISCV_IOMMU_IPSR_PIP)
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
+}
+
+/* Page request interface queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_priq_process(int irq, void *data)
+{
+	struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+	struct riscv_iommu_device *iommu;
+	struct riscv_iommu_pq_record *requests;
+	unsigned cnt, idx, ctrl;
+
+	iommu = container_of(q, struct riscv_iommu_device, priq);
+	requests = (struct riscv_iommu_pq_record *)q->base;
+
+	/* Error reporting, clear error reports if any. */
+	ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_PQCSR);
+	if (ctrl & (RISCV_IOMMU_PQCSR_PQMF | RISCV_IOMMU_PQCSR_PQOF)) {
+		riscv_iommu_queue_ctrl(iommu, &iommu->priq, ctrl);
+		dev_warn_ratelimited(iommu->dev,
+				     "Page request queue error: fault: %d full: %d\n",
+				     !!(ctrl & RISCV_IOMMU_PQCSR_PQMF),
+				     !!(ctrl & RISCV_IOMMU_PQCSR_PQOF));
+	}
+
+	/* Clear page request interrupt pending. */
+	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_PIP);
+
+	/* Process page requests. */
+	do {
+		cnt = riscv_iommu_queue_consume(iommu, q, &idx);
+		if (!cnt)
+			break;
+		dev_warn(iommu->dev, "unexpected %u page requests\n", cnt);
+		riscv_iommu_queue_release(iommu, q, cnt);
+	} while (1);
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Endpoint management
  */
@@ -350,7 +901,29 @@ static void riscv_iommu_flush_iotlb_range(struct iommu_domain *iommu_domain,
 					  unsigned long *start, unsigned long *end,
 					  size_t *pgsize)
 {
-	/* Command interface not implemented */
+	struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
+	struct riscv_iommu_command cmd;
+	unsigned long iova;
+
+	if (domain->mode == RISCV_IOMMU_DC_FSC_MODE_BARE)
+		return;
+
+	/* Domain not attached to an IOMMU! */
+	BUG_ON(!domain->iommu);
+
+	riscv_iommu_cmd_inval_vma(&cmd);
+	riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
+
+	if (start && end && pgsize) {
+		/* Cover only the range that is needed */
+		for (iova = *start; iova <= *end; iova += *pgsize) {
+			riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+			riscv_iommu_post(domain->iommu, &cmd);
+		}
+	} else {
+		riscv_iommu_post(domain->iommu, &cmd);
+	}
+	riscv_iommu_iofence_sync(domain->iommu);
 }
 
 static void riscv_iommu_flush_iotlb_all(struct iommu_domain *iommu_domain)
@@ -610,6 +1183,9 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
 	iommu_device_unregister(&iommu->iommu);
 	iommu_device_sysfs_remove(&iommu->iommu);
 	riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
+	riscv_iommu_queue_free(iommu, &iommu->cmdq);
+	riscv_iommu_queue_free(iommu, &iommu->fltq);
+	riscv_iommu_queue_free(iommu, &iommu->priq);
 }
 
 int riscv_iommu_init(struct riscv_iommu_device *iommu)
@@ -632,6 +1208,16 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
 	}
 #endif
 
+	/*
+	 * Assign queue lengths from module parameters if not already
+	 * set on the device tree.
+	 */
+	if (!iommu->cmdq_len)
+		iommu->cmdq_len = cmdq_length;
+	if (!iommu->fltq_len)
+		iommu->fltq_len = fltq_length;
+	if (!iommu->priq_len)
+		iommu->priq_len = priq_length;
 	/* Clear any pending interrupt flag. */
 	riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR,
 			   RISCV_IOMMU_IPSR_CIP |
@@ -639,7 +1225,20 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
 			   RISCV_IOMMU_IPSR_PMIP | RISCV_IOMMU_IPSR_PIP);
 	spin_lock_init(&iommu->cq_lock);
 	mutex_init(&iommu->eps_mutex);
+	ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_COMMAND_QUEUE);
+	if (ret)
+		goto fail;
+	ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_FAULT_QUEUE);
+	if (ret)
+		goto fail;
+	if (!(iommu->cap & RISCV_IOMMU_CAP_ATS))
+		goto no_ats;
+
+	ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_PAGE_REQUEST_QUEUE);
+	if (ret)
+		goto fail;
 
+ no_ats:
 	ret = riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_BARE);
 
 	if (ret) {
@@ -663,5 +1262,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
 	return 0;
  fail:
 	riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
+	riscv_iommu_queue_free(iommu, &iommu->priq);
+	riscv_iommu_queue_free(iommu, &iommu->fltq);
+	riscv_iommu_queue_free(iommu, &iommu->cmdq);
 	return ret;
 }
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 7dc9baa59a50..04148a2a8ffd 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -28,6 +28,24 @@
 #define IOMMU_PAGE_SIZE_1G	BIT_ULL(30)
 #define IOMMU_PAGE_SIZE_512G	BIT_ULL(39)
 
+struct riscv_iommu_queue {
+	dma_addr_t base_dma;	/* ring buffer bus address */
+	void *base;		/* ring buffer pointer */
+	size_t len;		/* single item length */
+	u32 cnt;		/* items count */
+	u32 lui;		/* last used index, consumer/producer share */
+	unsigned qbr;		/* queue base register offset */
+	unsigned qcr;		/* queue control and status register offset */
+	int irq;		/* registered interrupt number */
+	bool in_iomem;		/* indicates queue data are in I/O memory  */
+};
+
+enum riscv_queue_ids {
+	RISCV_IOMMU_COMMAND_QUEUE	= 0,
+	RISCV_IOMMU_FAULT_QUEUE		= 1,
+	RISCV_IOMMU_PAGE_REQUEST_QUEUE	= 2
+};
+
 struct riscv_iommu_device {
 	struct iommu_device iommu;	/* iommu core interface */
 	struct device *dev;		/* iommu hardware */
@@ -42,6 +60,11 @@ struct riscv_iommu_device {
 	int irq_pm;
 	int irq_priq;
 
+	/* Queue lengths */
+	int cmdq_len;
+	int fltq_len;
+	int priq_len;
+
 	/* supported and enabled hardware capabilities */
 	u64 cap;
 
@@ -53,6 +76,11 @@ struct riscv_iommu_device {
 	unsigned ddt_mode;
 	bool ddtp_in_iomem;
 
+	/* hardware queues */
+	struct riscv_iommu_queue cmdq;
+	struct riscv_iommu_queue fltq;
+	struct riscv_iommu_queue priq;
+
 	/* Connected end-points */
 	struct rb_root eps;
 	struct mutex eps_mutex;
-- 
2.34.1




More information about the linux-riscv mailing list