[PATCH] nvme: avoid cqe corruption when update at the same time as read

Johannes Thumshirn jthumshirn at suse.de
Thu Mar 10 00:31:42 PST 2016


On Wed, Mar 09, 2016 at 09:08:46AM -0800, Christoph Hellwig wrote:
> On Sat, Mar 05, 2016 at 08:23:15AM +0100, Marta Rybczynska wrote:
> > The cqe structure read normally happens from lower to upper addresses
> > and the validity bit (status) is at the highest address. If the PCI
> > updates the memory when the cqe is read by multiple non-atomic loads,
> > the structure may be corrupted. Avoid this by reading the status
> > first and then the whole structure.
> 
> Doing the phase check separately sounds sensible to me, but how about
> doing something like the version below, which ensures we only read the
> whole cqe after the check, and cleans things up a bit by using a common
> helper:
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 1d1c6d1..f6ea5d3 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -704,6 +704,11 @@ static void nvme_complete_rq(struct request *req)
>  	blk_mq_end_request(req, error);
>  }
>  
> +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head)
> +{
> +	return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == head;
> +}
> +
>  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  {
>  	u16 head, phase;
> @@ -711,13 +716,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  	head = nvmeq->cq_head;
>  	phase = nvmeq->cq_phase;
>  
> -	for (;;) {
> +	while (nvme_cqe_valid(nvmeq, head)) {
>  		struct nvme_completion cqe = nvmeq->cqes[head];
> -		u16 status = le16_to_cpu(cqe.status);
>  		struct request *req;
>  
> -		if ((status & 1) != phase)
> -			break;
>  		if (++head == nvmeq->q_depth) {
>  			head = 0;
>  			phase = !phase;
> @@ -748,7 +750,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
>  		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
>  			memcpy(req->special, &cqe, sizeof(cqe));
> -		blk_mq_complete_request(req, status >> 1);
> +		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
>  
>  	}
>  
> @@ -789,18 +791,17 @@ static irqreturn_t nvme_irq(int irq, void *data)
>  static irqreturn_t nvme_irq_check(int irq, void *data)
>  {
>  	struct nvme_queue *nvmeq = data;
> -	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
> -	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
> -		return IRQ_NONE;
> -	return IRQ_WAKE_THREAD;
> +
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head))
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
>  }
>  
>  static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
>  {
>  	struct nvme_queue *nvmeq = hctx->driver_data;
>  
> -	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
> -	    nvmeq->cq_phase) {
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head)) {
>  		spin_lock_irq(&nvmeq->q_lock);
>  		__nvme_process_cq(nvmeq, &tag);
>  		spin_unlock_irq(&nvmeq->q_lock);
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme

This looks sensible,
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850



More information about the Linux-nvme mailing list