[PATCH V4 13/17] i3c: mipi-i3c-hci: Add DMA-mode recovery for internal controller errors

Frank Li Frank.li at nxp.com
Tue Jun 2 09:45:10 PDT 2026


On Fri, May 15, 2026 at 07:26:17PM +0300, Adrian Hunter wrote:
> Handle internal I3C HCI errors when operating in DMA mode by adding a
> simple recovery mechanism.
>
> On detection of an internal controller error, mark recovery as needed and
> attempt to restore operation by performing a software reset followed by
> state restore.  To keep recovery straightforward on this unlikely error
> path, all currently queued transfers are terminated and completed with an
> error.
>
> This allows the controller to resume operation after internal failures
> rather than remaining permanently stuck.
>
> Note, internal errors indicated by INTR_HC_INTERNAL_ERR, cause the
> controller to stop.
>
> Signed-off-by: Adrian Hunter <adrian.hunter at intel.com>
> ---

Reviewed-by: Frank Li <Frank.Li at nxp.com>

>
>
> Changes in V4:
>
> 	None
>
> Changes in V3:
>
> 	When erroring out transfers, ensure the final transfer of a
> 	transfer list is processed last
>
> Changes in V2:
>
> 	Rename completing_xfer to final_xfer
> 	Add hci_dma_xfer_done() before checking for an already complete
> 	transfer
> 	Improve commit message
>
>
>  drivers/i3c/master/mipi-i3c-hci/cmd.h  |  6 ++
>  drivers/i3c/master/mipi-i3c-hci/core.c |  1 +
>  drivers/i3c/master/mipi-i3c-hci/dma.c  | 93 +++++++++++++++++++++++---
>  drivers/i3c/master/mipi-i3c-hci/hci.h  |  1 +
>  4 files changed, 92 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd.h b/drivers/i3c/master/mipi-i3c-hci/cmd.h
> index b1bf87daa651..7bada7b4b2de 100644
> --- a/drivers/i3c/master/mipi-i3c-hci/cmd.h
> +++ b/drivers/i3c/master/mipi-i3c-hci/cmd.h
> @@ -65,4 +65,10 @@ struct hci_cmd_ops {
>  extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v1;
>  extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v2;
>
> +static inline void hci_cmd_set_resp_err(u32 *response, int resp_err)
> +{
> +	*response &= ~RESP_ERR_FIELD;
> +	*response |= FIELD_PREP(RESP_ERR_FIELD, resp_err);
> +}
> +
>  #endif
> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> index 12a0122fb709..69dcf5dad3a5 100644
> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> @@ -668,6 +668,7 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id)
>  	if (val & INTR_HC_INTERNAL_ERR) {
>  		dev_err(&hci->master.dev, "Host Controller Internal Error\n");
>  		val &= ~INTR_HC_INTERNAL_ERR;
> +		hci->recovery_needed = true;
>  	}
>
>  	if (val)
> diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
> index f9023cb3c5a2..f39a6ce2aad5 100644
> --- a/drivers/i3c/master/mipi-i3c-hci/dma.c
> +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
> @@ -9,6 +9,7 @@
>   */
>
>  #include <linux/bitfield.h>
> +#include <linux/delay.h>
>  #include <linux/device.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/errno.h>
> @@ -258,6 +259,10 @@ static void hci_dma_init_rh(struct i3c_hci *hci, struct hci_rh_data *rh, int i)
>  	rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
>  	rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_RUN_STOP);
>
> +	/*
> +	 * Do not clear the entries of rh->src_xfers because the recovery uses
> +	 * them.  In other cases they should be NULL anyway.
> +	 */
>  	rh->done_ptr = 0;
>  	rh->ibi_chunk_ptr = 0;
>  	rh->xfer_space = rh->xfer_entries;
> @@ -362,7 +367,7 @@ static int hci_dma_init(struct i3c_hci *hci)
>  		rh->resp = dma_alloc_coherent(rings->sysdev, resps_sz,
>  					      &rh->resp_dma, GFP_KERNEL);
>  		rh->src_xfers =
> -			kmalloc_objs(*rh->src_xfers, rh->xfer_entries);
> +			kzalloc_objs(*rh->src_xfers, rh->xfer_entries);
>  		ret = -ENOMEM;
>  		if (!rh->xfer || !rh->resp || !rh->src_xfers)
>  			goto err_out;
> @@ -572,13 +577,15 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh)
>  			hci_dma_unmap_xfer(hci, xfer, 1);
>  			rh->src_xfers[done_ptr] = NULL;
>  			xfer->ring_entry = -1;
> -			xfer->response = resp;
>  			if (tid != xfer->cmd_tid) {
>  				dev_err(&hci->master.dev,
>  					"response tid=%d when expecting %d\n",
>  					tid, xfer->cmd_tid);
> -				/* TODO: do something about it? */
> +				hci->recovery_needed = true;
> +				if (!RESP_STATUS(resp))
> +					hci_cmd_set_resp_err(&resp, RESP_ERR_HC_TERMINATED);
>  			}
> +			xfer->response = resp;
>  			if (xfer == xfer->final_xfer || RESP_STATUS(resp))
>  				complete(xfer->final_xfer->completion);
>  			if (RESP_STATUS(resp))
> @@ -625,6 +632,60 @@ static void hci_dma_unblock_enqueue(struct i3c_hci *hci)
>  	}
>  }
>
> +static void hci_dma_error_out_rh(struct i3c_hci *hci, struct hci_rh_data *rh)
> +{
> +	/*
> +	 * The entries of rh->src_xfers are not cleared by
> +	 * i3c_hci_reset_and_restore(), so can be used here.  Do 2 passes so
> +	 * that the final_xfer of an xfer list is always processed last.
> +	 */
> +	for (int pass = 0; pass < 2; pass++)
> +		for (int i = 0; i < rh->xfer_entries; i++) {
> +			struct hci_xfer *xfer = rh->src_xfers[i];
> +
> +			if (!xfer || (!pass && xfer == xfer->final_xfer))
> +				continue;
> +			hci_dma_unmap_xfer(hci, xfer, 1);
> +			rh->src_xfers[i] = NULL;
> +			xfer->ring_entry = -1;
> +			hci_cmd_set_resp_err(&xfer->response, RESP_ERR_HC_TERMINATED);
> +			if (xfer == xfer->final_xfer)
> +				complete(xfer->final_xfer->completion);
> +		}
> +}
> +
> +static void hci_dma_error_out_all(struct i3c_hci *hci)
> +{
> +	struct hci_rings_data *rings = hci->io_data;
> +
> +	for (int i = 0; i < rings->total; i++)
> +		hci_dma_error_out_rh(hci, &rings->headers[i]);
> +}
> +
> +static void hci_dma_recovery(struct i3c_hci *hci)
> +{
> +	int ret;
> +
> +	dev_err(&hci->master.dev, "Attempting to recover from internal errors\n");
> +
> +	for (int i = 0; i < 3; i++) {
> +		ret = i3c_hci_reset_and_restore(hci);
> +		if (!ret)
> +			break;
> +		dev_err(&hci->master.dev, "Reset and restore failed, error %d\n", ret);
> +		/* Just in case the controller is busy, give it some time */
> +		msleep(1000);
> +	}
> +
> +	spin_lock_irq(&hci->lock);
> +	hci_dma_error_out_all(hci);
> +	hci_dma_unblock_enqueue(hci);
> +	hci->recovery_needed = false;
> +	spin_unlock_irq(&hci->lock);
> +
> +	dev_err(&hci->master.dev, "Recovery %s\n", ret ? "failed!" : "done");
> +}
> +
>  static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
>  				 struct hci_xfer *xfer_list, int n)
>  {
> @@ -640,6 +701,17 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
>
>  	ring_status = rh_reg_read(RING_STATUS);
>  	if (ring_status & RING_STATUS_RUNNING) {
> +		/*
> +		 * The transfer may have already completed, especially
> +		 * if recovery has just run.  Do nothing in that case.
> +		 */
> +		hci_dma_xfer_done(hci, rh);
> +		if (xfer_list->final_xfer->ring_entry < 0 &&
> +		    !hci->recovery_needed && !hci->enqueue_blocked &&
> +		    ring_status == (RING_STATUS_ENABLED | RING_STATUS_RUNNING)) {
> +			spin_unlock_irq(&hci->lock);
> +			return false;
> +		}
>  		hci->enqueue_blocked = true;
>  		spin_unlock_irq(&hci->lock);
>  		/* stop the ring */
> @@ -647,12 +719,8 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
>  		spin_lock_irq(&hci->lock);
>  		ring_status = rh_reg_read(RING_STATUS);
>  		if (ring_status & RING_STATUS_RUNNING) {
> -			/*
> -			 * We're deep in it if ever this condition is ever met.
> -			 * Hardware might still be writing to memory, etc.
> -			 */
> -			dev_crit(&hci->master.dev, "unable to abort the ring\n");
> -			WARN_ON(1);
> +			dev_err(&hci->master.dev, "Unable to abort the DMA ring\n");
> +			hci->recovery_needed = true;
>  		}
>  	}
>
> @@ -662,6 +730,13 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
>
>  	hci_dma_xfer_done(hci, rh);
>
> +	if (hci->recovery_needed) {
> +		hci->enqueue_blocked = true;
> +		spin_unlock_irq(&hci->lock);
> +		hci_dma_recovery(hci);
> +		return true;
> +	}
> +
>  	for (i = 0; i < n; i++) {
>  		struct hci_xfer *xfer = xfer_list + i;
>  		int idx = xfer->ring_entry;
> diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h
> index a3151c26827e..4bf2c66c97b4 100644
> --- a/drivers/i3c/master/mipi-i3c-hci/hci.h
> +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
> @@ -55,6 +55,7 @@ struct i3c_hci {
>  	atomic_t next_cmd_tid;
>  	bool irq_inactive;
>  	bool enqueue_blocked;
> +	bool recovery_needed;
>  	wait_queue_head_t enqueue_wait_queue;
>  	u32 caps;
>  	unsigned int quirks;
> --
> 2.51.0
>



More information about the linux-i3c mailing list