[PATCHv2 11/17] nvme: add Clang context annotations for nvme_queue::sq_lock

Sun Jun 14 06:15:26 PDT 2026

nvme_queue::sq_tail, nvme_queue::last_sq_tail and nvme_queue::sq_cmds
are protected by nvme_queue::sq_lock. Annotate each field with
__guarded_by(&sq_lock) and annotate helpers that access them with
__must_hold(&sq_lock) so that Clang's context analysis can validate
the locking requirements.

Accesses to sq_tail used solely for tracing are annotated with
context_unsafe(), as they only require a lockless snapshot of the
value. Likewise, nvme_init_queue() and nvme_free_queue() operate on
queues that have not yet been published or are no longer reachable,
and therefore do not require sq_lock protection. Similarly,
nvme_alloc_sq_cmds() allocates memory for nvme_queue::sq_cmds for
the queue which is not yet published or in use and hence it's safe
to annotate all these helpers using context_unsafe.

Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
 drivers/nvme/host/pci.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 894da63127a7..86cbc88a05b5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -366,7 +366,7 @@ struct nvme_queue {
 	struct nvme_dev *dev;
 	struct nvme_descriptor_pools descriptor_pools;
 	spinlock_t sq_lock;
-	void *sq_cmds;
+	void *sq_cmds __guarded_by(&sq_lock);
 	 /* only used for poll queues: */
 	spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
 	struct nvme_completion *cqes;
@@ -375,9 +375,9 @@ struct nvme_queue {
 	u32 __iomem *q_db;
 	u32 q_depth;
 	u16 cq_vector;
-	u16 sq_tail;
-	u16 last_sq_tail;
 	u16 cq_head;
+	u16 sq_tail __guarded_by(&sq_lock);
+	u16 last_sq_tail __guarded_by(&sq_lock);
 	u16 qid;
 	u8 cq_phase;
 	u8 sqes;
@@ -711,6 +711,7 @@ static void nvme_pci_map_queues(struct blk_mq_tag_set *set)
  * Write sq tail if we are asked to, or if the next command would wrap.
  */
 static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
+	__must_hold(&nvmeq->sq_lock)
 {
 	if (!write_sq) {
 		u16 next_tail = nvmeq->sq_tail + 1;
@@ -729,6 +730,7 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
 
 static inline void nvme_sq_copy_cmd(struct nvme_queue *nvmeq,
 				    struct nvme_command *cmd)
+	__must_hold(&nvmeq->sq_lock)
 {
 	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
 		absolute_pointer(cmd), sizeof(*cmd));
@@ -1581,7 +1583,10 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
 		return;
 	}
 
-	trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
+	/*
+	 * Tracing only; a lockless snapshot of nvmeq sq_xxx/cqe is sufficient.
+	 */
+	context_unsafe(trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail));
 	if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
 	    !blk_mq_add_to_batch(req, iob,
 				 nvme_req(req)->status != NVME_SC_SUCCESS,
@@ -2008,6 +2013,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 }
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
+	__context_unsafe(/* frees queue which is no longer in use */)
 {
 	dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
 				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
@@ -2102,6 +2108,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
 
 static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 				int qid)
+	__context_unsafe(/* safe to allocate sq_cmds without any protection */)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -2176,6 +2183,7 @@ static int queue_request_irq(struct nvme_queue *nvmeq)
 }
 
 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
+	__context_unsafe(/* safe to init queue without any protection */)
 {
 	struct nvme_dev *dev = nvmeq->dev;
 
-- 
2.53.0