[RFC PATCH] i3c: dw: add DMA support for data transfers
Haoyu Lu
hechushiguitu666 at gmail.com
Fri May 15 08:56:35 PDT 2026
Add DMA support for the DesignWare I3C master controller to
accelerate data transfers.
The driver now requests tx and rx DMA channels during probe and
uses them for data transfers of 4 bytes or more. Transfers smaller
than 4 bytes, or transfers where DMA setup fails, fall back to PIO
mode. DMA tail bytes (non-4-byte-aligned remainder) are handled
with PIO as well.
DMA transfers may sleep, so the xferqueue spinlock is temporarily
dropped for DMA operations. This is safe because xferqueue.cur is
only changed when a transfer completes, and the calling context
serializes simultaneous transfers.
The DMA enable bit (DEV_CTRL_DMA_EN, BIT(28)) is set when DMA is
used and cleared otherwise, to ensure the hardware uses the correct
data transfer path.
Signed-off-by: Haoyu Lu <hechushiguitu666 at gmail.com>
---
This is an RFC patch to gather feedback on the overall approach before
a formal submission. A few specific questions for reviewers:
1. DMA threshold: The current implementation uses DMA for transfers
>= 4 bytes, with tail bytes handled by PIO. Is 4 bytes a reasonable
threshold, or should we consider a higher value (e.g. 16 or 32) to
avoid DMA setup overhead for very small transfers?
2. Spinlock safety: dw_i3c_master_start_xfer_locked() and
dw_i3c_master_end_xfer_locked() temporarily drop xferqueue.lock
around DMA operations. The analysis is that xferqueue.cur is only
modified on transfer completion, and the caller serializes
simultaneous transfers. Is this safe, or should we restructure the
locking (e.g., convert to mutex)?
3. DMA channel handling: dw_i3c_dma_request() is called during probe
and silently degrades to PIO if DMA channels are unavailable. Is
this the right behavior, or should probe fail on DMA errors other
than -ENODEV/-EPROBE_DEFER?
4. Error handling for DMA timeouts: when a DMA timeout occurs,
dmaengine_terminate_all() is called but the transfer proceeds as if
it were successful, potentially reading stale data. Should we instead
signal an error to the I3C core?
5. IBI path: dw_i3c_master_read_ibi_fifo() still uses PIO. Should
IBI reads also use DMA, or is the IBI payload typically small enough
that PIO is preferred?
Any other feedback on the design or implementation is welcome.
drivers/i3c/master/dw-i3c-master.c | 260 ++++++++++++++++++++++++++++-
drivers/i3c/master/dw-i3c-master.h | 17 ++
2 files changed, 270 insertions(+), 7 deletions(-)
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index d6bdb32397fb..7b6c8681ff0c 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/i3c/master.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/ioport.h>
#include <linux/iopoll.h>
#include <linux/list.h>
@@ -24,6 +25,10 @@
#include <linux/reset.h>
#include <linux/slab.h>
+#include <linux/dma-direction.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+
#include "../internals.h"
#include "dw-i3c-master.h"
@@ -32,6 +37,7 @@
#define DEV_CTRL_RESUME BIT(30)
#define DEV_CTRL_HOT_JOIN_NACK BIT(8)
#define DEV_CTRL_I2C_SLAVE_PRESENT BIT(7)
+#define DEV_CTRL_DMA_EN BIT(28)
#define DEVICE_ADDR 0x4
#define DEV_ADDR_DYNAMIC_ADDR_VALID BIT(31)
@@ -359,16 +365,123 @@ static int dw_i3c_master_get_free_pos(struct dw_i3c_master *master)
return ffs(master->free_pos) - 1;
}
+static void dw_i3c_dma_callback(void *arg)
+{
+ struct dw_i3c_dma *dma = arg;
+
+ dma_unmap_single(dma->chan_using->device->dev, dma->dma_buf,
+ dma->dma_len, dma->dma_data_dir);
+ complete(&dma->complete);
+}
+
+static int dw_i3c_master_dma_xfer(struct dw_i3c_dma *dma, const u8 *bytes)
+{
+ struct device *chan_dev = dma->chan_using->device->dev;
+ struct dma_async_tx_descriptor *txdesc;
+
+ dma->dma_buf = dma_map_single(chan_dev, (void *)bytes, dma->dma_len,
+ dma->dma_data_dir);
+ if (dma_mapping_error(chan_dev, dma->dma_buf))
+ return -EINVAL;
+
+ txdesc = dmaengine_prep_slave_single(dma->chan_using, dma->dma_buf,
+ dma->dma_len, dma->dma_transfer_dir,
+ DMA_PREP_INTERRUPT);
+ if (!txdesc) {
+ dma_unmap_single(chan_dev, dma->dma_buf, dma->dma_len,
+ dma->dma_data_dir);
+ return -ENOMEM;
+ }
+
+ reinit_completion(&dma->complete);
+ txdesc->callback = dw_i3c_dma_callback;
+ txdesc->callback_param = dma;
+ if (dma_submit_error(dmaengine_submit(txdesc))) {
+ dma_unmap_single(chan_dev, dma->dma_buf, dma->dma_len,
+ dma->dma_data_dir);
+ return -EIO;
+ }
+
+ dma_async_issue_pending(dma->chan_using);
+ return 0;
+}
+
static void dw_i3c_master_wr_tx_fifo(struct dw_i3c_master *master,
const u8 *bytes, int nbytes)
{
- i3c_writel_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ struct dw_i3c_dma *dma = master->dma;
+ unsigned long time_left;
+ int ret;
+
+ if (!master->use_dma || nbytes < 4) {
+ i3c_writel_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ return;
+ }
+
+ dma->chan_using = dma->chan_tx;
+ dma->dma_transfer_dir = DMA_MEM_TO_DEV;
+ dma->dma_data_dir = DMA_TO_DEVICE;
+ dma->dma_len = ALIGN_DOWN(nbytes, 4);
+
+ ret = dw_i3c_master_dma_xfer(dma, bytes);
+ if (ret) {
+ dev_err(&master->base.dev, "DMA TX setup failed (%d)\n", ret);
+ i3c_writel_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ return;
+ }
+
+ time_left = wait_for_completion_timeout(&dma->complete,
+ XFER_TIMEOUT);
+ if (!time_left) {
+ dmaengine_terminate_all(dma->chan_using);
+ dev_err(&master->base.dev, "DMA TX timeout\n");
+ }
+
+ if (nbytes & 3) {
+ u32 tmp = 0;
+
+ memcpy(&tmp, bytes + (nbytes & ~3), nbytes & 3);
+ writesl(master->regs + RX_TX_DATA_PORT, &tmp, 1);
+ }
}
static void dw_i3c_master_read_rx_fifo(struct dw_i3c_master *master,
u8 *bytes, int nbytes)
{
- i3c_readl_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ struct dw_i3c_dma *dma = master->dma;
+ unsigned long time_left;
+ int ret;
+
+ if (!master->use_dma || nbytes < 4) {
+ i3c_readl_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ return;
+ }
+
+ dma->chan_using = dma->chan_rx;
+ dma->dma_transfer_dir = DMA_DEV_TO_MEM;
+ dma->dma_data_dir = DMA_FROM_DEVICE;
+ dma->dma_len = ALIGN_DOWN(nbytes, 4);
+
+ ret = dw_i3c_master_dma_xfer(dma, bytes);
+ if (ret) {
+ dev_err(&master->base.dev, "DMA RX setup failed (%d)\n", ret);
+ i3c_readl_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes);
+ return;
+ }
+
+ time_left = wait_for_completion_timeout(&dma->complete,
+ XFER_TIMEOUT);
+ if (!time_left) {
+ dmaengine_terminate_all(dma->chan_using);
+ dev_err(&master->base.dev, "DMA RX timeout\n");
+ }
+
+ if (nbytes & 3) {
+ u32 tmp;
+
+ readsl(master->regs + RX_TX_DATA_PORT, &tmp, 1);
+ memcpy(bytes + (nbytes & ~3), &tmp, nbytes & 3);
+ }
}
static void dw_i3c_master_read_ibi_fifo(struct dw_i3c_master *master,
@@ -403,14 +516,29 @@ static void dw_i3c_master_start_xfer_locked(struct dw_i3c_master *master)
struct dw_i3c_xfer *xfer = master->xferqueue.cur;
unsigned int i;
u32 thld_ctrl;
+ u32 dev_ctrl;
if (!xfer)
return;
- for (i = 0; i < xfer->ncmds; i++) {
- struct dw_i3c_cmd *cmd = &xfer->cmds[i];
-
- dw_i3c_master_wr_tx_fifo(master, cmd->tx_buf, cmd->tx_len);
+ /*
+ * DMA transfers may sleep, so we must drop the spinlock while
+ * writing the TX FIFO. The xferqueue lock is held by our caller;
+ * xferqueue.cur is safe because:
+ * - New xfers are only added to the list (not cur) when cur is set.
+ * - Timeout-based dequeue is blocked until the caller calls
+ * wait_for_completion_timeout(), which happens after we return.
+ */
+ if (master->use_dma) {
+ spin_unlock(&master->xferqueue.lock);
+ for (i = 0; i < xfer->ncmds; i++)
+ dw_i3c_master_wr_tx_fifo(master, xfer->cmds[i].tx_buf,
+ xfer->cmds[i].tx_len);
+ spin_lock(&master->xferqueue.lock);
+ } else {
+ for (i = 0; i < xfer->ncmds; i++)
+ dw_i3c_master_wr_tx_fifo(master, xfer->cmds[i].tx_buf,
+ xfer->cmds[i].tx_len);
}
thld_ctrl = readl(master->regs + QUEUE_THLD_CTRL);
@@ -418,6 +546,13 @@ static void dw_i3c_master_start_xfer_locked(struct dw_i3c_master *master)
thld_ctrl |= QUEUE_THLD_CTRL_RESP_BUF(xfer->ncmds);
writel(thld_ctrl, master->regs + QUEUE_THLD_CTRL);
+ dev_ctrl = readl(master->regs + DEVICE_CTRL);
+ if (master->use_dma)
+ dev_ctrl |= DEV_CTRL_DMA_EN;
+ else
+ dev_ctrl &= ~DEV_CTRL_DMA_EN;
+ writel(dev_ctrl, master->regs + DEVICE_CTRL);
+
for (i = 0; i < xfer->ncmds; i++) {
struct dw_i3c_cmd *cmd = &xfer->cmds[i];
@@ -486,9 +621,17 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr)
cmd = &xfer->cmds[RESPONSE_PORT_TID(resp)];
cmd->rx_len = RESPONSE_PORT_DATA_LEN(resp);
cmd->error = RESPONSE_PORT_ERR_STATUS(resp);
- if (cmd->rx_len && !cmd->error)
+ if (cmd->rx_len && !cmd->error) {
+ /*
+ * DMA RX may sleep; drop the lock temporarily.
+ * Safe because xferqueue.cur is not changed
+ * by other paths while a transfer is in flight.
+ */
+ spin_unlock(&master->xferqueue.lock);
dw_i3c_master_read_rx_fifo(master, cmd->rx_buf,
cmd->rx_len);
+ spin_lock(&master->xferqueue.lock);
+ }
}
for (i = 0; i < nresp; i++) {
@@ -1582,6 +1725,107 @@ static void dw_i3c_hj_work(struct work_struct *work)
i3c_master_do_daa(&master->base);
}
+static void dw_i3c_dma_free(struct dw_i3c_master *master)
+{
+ struct dw_i3c_dma *dma = master->dma;
+
+ if (!dma)
+ return;
+
+ if (dma->chan_tx) {
+ dma_release_channel(dma->chan_tx);
+ dma->chan_tx = NULL;
+ }
+ if (dma->chan_rx) {
+ dma_release_channel(dma->chan_rx);
+ dma->chan_rx = NULL;
+ }
+ dma->chan_using = NULL;
+ master->dma = NULL;
+ master->use_dma = false;
+}
+
+static void dw_i3c_dma_request(struct platform_device *pdev)
+{
+ struct dw_i3c_dma *dma;
+ struct dma_slave_config dma_sconfig;
+ struct dw_i3c_master *master;
+ struct resource *res;
+ dma_addr_t phy_addr;
+ int ret;
+
+ master = platform_get_drvdata(pdev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return;
+ phy_addr = res->start;
+
+ dma = devm_kzalloc(&pdev->dev, sizeof(*dma), GFP_KERNEL);
+ if (!dma)
+ return;
+
+ dma->chan_tx = dma_request_chan(&pdev->dev, "tx");
+ if (IS_ERR(dma->chan_tx)) {
+ ret = PTR_ERR(dma->chan_tx);
+ if (ret != -ENODEV && ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev,
+ "can't request DMA tx channel (%d)\n", ret);
+ goto fail;
+ }
+
+ memset(&dma_sconfig, 0, sizeof(dma_sconfig));
+ dma_sconfig.dst_addr = phy_addr + RX_TX_DATA_PORT;
+ dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_sconfig.dst_maxburst = 1;
+ dma_sconfig.direction = DMA_MEM_TO_DEV;
+ ret = dmaengine_slave_config(dma->chan_tx, &dma_sconfig);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "can't configure DMA tx channel (%d)\n", ret);
+ goto fail_tx;
+ }
+
+ dma->chan_rx = dma_request_chan(&pdev->dev, "rx");
+ if (IS_ERR(dma->chan_rx)) {
+ ret = PTR_ERR(dma->chan_rx);
+ if (ret != -ENODEV && ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev,
+ "can't request DMA rx channel (%d)\n", ret);
+ goto fail_tx;
+ }
+
+ memset(&dma_sconfig, 0, sizeof(dma_sconfig));
+ dma_sconfig.src_addr = phy_addr + RX_TX_DATA_PORT;
+ dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_sconfig.src_maxburst = 1;
+ dma_sconfig.direction = DMA_DEV_TO_MEM;
+ ret = dmaengine_slave_config(dma->chan_rx, &dma_sconfig);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "can't configure DMA rx channel (%d)\n", ret);
+ goto fail_rx;
+ }
+
+ init_completion(&dma->complete);
+ master->dma = dma;
+ master->use_dma = true;
+
+ dev_info(&pdev->dev, "using %s (tx) and %s (rx) for DMA transfers\n",
+ dma_chan_name(dma->chan_tx), dma_chan_name(dma->chan_rx));
+
+ return;
+
+fail_rx:
+ dma_release_channel(dma->chan_rx);
+ dma->chan_rx = NULL;
+fail_tx:
+ dma_release_channel(dma->chan_tx);
+ dma->chan_tx = NULL;
+fail:
+ devm_kfree(&pdev->dev, dma);
+}
+
int dw_i3c_common_probe(struct dw_i3c_master *master,
struct platform_device *pdev)
{
@@ -1627,6 +1871,7 @@ int dw_i3c_common_probe(struct dw_i3c_master *master,
goto err_assert_rst;
platform_set_drvdata(pdev, master);
+ dw_i3c_dma_request(pdev);
pm_runtime_set_autosuspend_delay(&pdev->dev, RPM_AUTOSUSPEND_TIMEOUT);
pm_runtime_use_autosuspend(&pdev->dev);
@@ -1683,6 +1928,7 @@ EXPORT_SYMBOL_GPL(dw_i3c_common_probe);
void dw_i3c_common_remove(struct dw_i3c_master *master)
{
cancel_work_sync(&master->hj_work);
+ dw_i3c_dma_free(master);
i3c_master_unregister(&master->base);
/* Balance pm_runtime_get_noresume() from probe() */
diff --git a/drivers/i3c/master/dw-i3c-master.h b/drivers/i3c/master/dw-i3c-master.h
index c5cb695c16ab..633b7abdfaae 100644
--- a/drivers/i3c/master/dw-i3c-master.h
+++ b/drivers/i3c/master/dw-i3c-master.h
@@ -6,12 +6,27 @@
*/
#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
#include <linux/i3c/master.h>
#include <linux/reset.h>
#include <linux/types.h>
#define DW_I3C_MAX_DEVS 32
+struct dw_i3c_dma {
+ struct dma_chan *chan_tx;
+ struct dma_chan *chan_rx;
+ struct dma_chan *chan_using;
+ dma_addr_t dma_buf;
+ unsigned int dma_len;
+ enum dma_transfer_direction dma_transfer_dir;
+ enum dma_data_direction dma_data_dir;
+ struct completion complete;
+};
+
struct dw_i3c_master_caps {
u8 cmdfifodepth;
u8 datafifodepth;
@@ -70,6 +85,8 @@ struct dw_i3c_master {
const struct dw_i3c_platform_ops *platform_ops;
struct work_struct hj_work;
+ bool use_dma;
+ struct dw_i3c_dma *dma;
};
struct dw_i3c_platform_ops {
--
2.17.1
More information about the linux-i3c
mailing list