[PATCH 10/12] i3c: mipi-i3c-hci: Fix race in DMA error handling in interrupt context
Adrian Hunter
adrian.hunter at intel.com
Mon Mar 2 00:45:25 PST 2026
On 27/02/2026 18:40, Frank Li wrote:
> On Fri, Feb 27, 2026 at 04:11:47PM +0200, Adrian Hunter wrote:
>> The DMA ring halts whenever a transfer encounters an error. The interrupt
>> handler previously attempted to detect this situation and restart the ring
>> if a transfer completed at the same time. However, this restart logic runs
>> entirely in interrupt context and is inherently racy: it interacts with
>> other paths manipulating the ring state, and fully serializing it within
>> the interrupt handler is not practical.
>>
>> Move this error-recovery logic out of the interrupt handler and into the
>> transfer-processing path (i3c_hci_process_xfer()), where serialization and
>> state management are already controlled. Introduce a new optional I/O-ops
>> callback, handle_error(), invoked when a completed transfer reports an
>> error. For DMA operation, the implementation simply calls the existing
>> dequeue function, which safely aborts and restarts the ring when needed.
>>
>> This removes the fragile ring-restart logic from the interrupt handler and
>> centralizes error handling where proper sequencing can be ensured.
>>
>> Fixes: ccdb2e0e3b00d ("i3c: mipi-i3c-hci: Add Intel specific quirk to ring resuming")
>> Cc: stable at vger.kernel.org
>> Signed-off-by: Adrian Hunter <adrian.hunter at intel.com>
>> ---
>> drivers/i3c/master/mipi-i3c-hci/core.c | 19 ++++++++++++----
>> drivers/i3c/master/mipi-i3c-hci/dma.c | 31 +++++++-------------------
>> drivers/i3c/master/mipi-i3c-hci/hci.h | 1 +
>> 3 files changed, 24 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
>> index 9cfe12fb9a0e..741f543aae68 100644
>> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
>> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
>> @@ -223,10 +223,21 @@ int i3c_hci_process_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n)
>> if (ret)
>> return ret;
>>
>> - if (!wait_for_completion_timeout(done, timeout) &&
>> - hci->io->dequeue_xfer(hci, xfer, n)) {
>> - dev_err(&hci->master.dev, "%s: timeout error\n", __func__);
>> - return -ETIMEDOUT;
>> + if (!wait_for_completion_timeout(done, timeout)) {
>> + if (hci->io->dequeue_xfer(hci, xfer, n)) {
>
> It is also an error, why not call handle_error() here.
It is different between PIO and DMA. DMA has an extra need to
restart the DMA ring after an error. PIO does not need that,
so for PIO ->handle_error() should do nothing, which is not the
same as ->dequeue_xfer().
>
> Frank
>> + dev_err(&hci->master.dev, "%s: timeout error\n", __func__);
>> + return -ETIMEDOUT;
>> + }
>> + return 0;
>> + }
>> +
>> + if (hci->io->handle_error) {
>> + bool error = false;
>> +
>> + for (int i = 0; i < n && !error; i++)
>> + error = RESP_STATUS(xfer[i].response);
>> + if (error)
>> + return hci->io->handle_error(hci, xfer, n);
>> }
>>
>> return 0;
>> diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
>> index 323c5ead82a5..4951fd0a63ac 100644
>> --- a/drivers/i3c/master/mipi-i3c-hci/dma.c
>> +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
>> @@ -613,6 +613,11 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
>> return did_unqueue;
>> }
>>
>> +static int hci_dma_handle_error(struct i3c_hci *hci, struct hci_xfer *xfer_list, int n)
>> +{
>> + return hci_dma_dequeue_xfer(hci, xfer_list, n) ? -EIO : 0;
>> +}
>> +
>> static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh)
>> {
>> u32 op1_val, op2_val, resp, *ring_resp;
>> @@ -880,29 +885,8 @@ static bool hci_dma_irq_handler(struct i3c_hci *hci)
>> hci_dma_xfer_done(hci, rh);
>> if (status & INTR_RING_OP)
>> complete(&rh->op_done);
>> -
>> - if (status & INTR_TRANSFER_ABORT) {
>> - u32 ring_status;
>> -
>> - dev_notice_ratelimited(&hci->master.dev,
>> - "Ring %d: Transfer Aborted\n", i);
>> - mipi_i3c_hci_resume(hci);
>> - ring_status = rh_reg_read(RING_STATUS);
>> - if (!(ring_status & RING_STATUS_RUNNING) &&
>> - status & INTR_TRANSFER_COMPLETION &&
>> - status & INTR_TRANSFER_ERR) {
>> - /*
>> - * Ring stop followed by run is an Intel
>> - * specific required quirk after resuming the
>> - * halted controller. Do it only when the ring
>> - * is not in running state after a transfer
>> - * error.
>> - */
>> - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
>> - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE |
>> - RING_CTRL_RUN_STOP);
>> - }
>> - }
>> + if (status & INTR_TRANSFER_ABORT)
>> + dev_dbg(&hci->master.dev, "Ring %d: Transfer Aborted\n", i);
>> if (status & INTR_IBI_RING_FULL)
>> dev_err_ratelimited(&hci->master.dev,
>> "Ring %d: IBI Ring Full Condition\n", i);
>> @@ -918,6 +902,7 @@ const struct hci_io_ops mipi_i3c_hci_dma = {
>> .cleanup = hci_dma_cleanup,
>> .queue_xfer = hci_dma_queue_xfer,
>> .dequeue_xfer = hci_dma_dequeue_xfer,
>> + .handle_error = hci_dma_handle_error,
>> .irq_handler = hci_dma_irq_handler,
>> .request_ibi = hci_dma_request_ibi,
>> .free_ibi = hci_dma_free_ibi,
>> diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h
>> index 6680b8640c5a..0f5e5bad9fb1 100644
>> --- a/drivers/i3c/master/mipi-i3c-hci/hci.h
>> +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
>> @@ -121,6 +121,7 @@ struct hci_io_ops {
>> bool (*irq_handler)(struct i3c_hci *hci);
>> int (*queue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n);
>> bool (*dequeue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n);
>> + int (*handle_error)(struct i3c_hci *hci, struct hci_xfer *xfer, int n);
>> int (*request_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev,
>> const struct i3c_ibi_setup *req);
>> void (*free_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev);
>> --
>> 2.51.0
>>
>>
>> --
>> linux-i3c mailing list
>> linux-i3c at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-i3c
More information about the linux-i3c
mailing list