[PATCH 2/2] nvme: fix atomic write size validation

Uday Shankar ushankar at purestorage.com
Thu Jun 26 17:05:09 PDT 2025


On Wed, Jun 25, 2025 at 08:39:56AM +0200, Christoph Hellwig wrote:
> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller.  This avoid spurious
> failures for controllers with namespaces that have different namespaces

nit: having namespaces with different logical block sizes

> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
> 
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.
> 
> Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> Reported-by: Yi Zhang <yi.zhang at redhat.com>

I couldn't find the report on linux-nvme; if it is public, can you
include a "Closes:" link here?

> Signed-off-by: Christoph Hellwig <hch at lst.de>
> Reviewed-by: Luis Chamberlain <mcgrof at kernel.org>
> Reviewed-by: John Garry <john.g.garry at oracle.com>
> Tested-by: Yi Zhang <yi.zhang at redhat.com>

I also saw a problem in a system running on 6.16-rc3 with several NVMe
subsystems, each containing one controller with AWUPF=0, each containing
two namespaces with NSABP=0. One namespace has a 512-byte LBA size while
the other has a 4096-byte LBA size, and some namespaces failed to add:

# dmesg | grep Inconsistent
[    5.537494] nvme nvme4: nvme4n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.539038] nvme nvme6: nvme6n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.560079] nvme nvme7: nvme7n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.595093] nvme nvme3: nvme3n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.597627] nvme nvme8: nvme8n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.600007] nvme nvme0: nvme0n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.605748] nvme nvme5: nvme5n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.608961] nvme nvme11: nvme11n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.618011] nvme nvme12: nvme12n1: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.618251] nvme nvme10: nvme10n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes

Note that despite the messages saying otherwise, only those log lines
containing "Subsystem=512 bytes, Controller/Namespace=4096 bytes"
actually failed to add a namespace - the others had both namespaces
added just fine. I guess it isn't deterministic as to which namespace
was added first.

Anyways, the problem was fixed by this patch set.

Tested-by: Uday Shankar <ushankar at purestorage.com>

> ---
>  drivers/nvme/host/core.c | 33 +++++++++++----------------------
>  drivers/nvme/host/nvme.h |  3 +--
>  2 files changed, 12 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 520fb5f1e214..e533d791955d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>  		 * no clear language in the specification prohibiting different
>  		 * values for different controllers in the subsystem.
>  		 */
> -		atomic_bs = (1 + ns->ctrl->awupf) * bs;
> -	}
> -
> -	if (!ns->ctrl->subsys->atomic_bs) {
> -		ns->ctrl->subsys->atomic_bs = atomic_bs;
> -	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> -		dev_err_ratelimited(ns->ctrl->device,
> -			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> -			ns->disk ? ns->disk->disk_name : "?",
> -			ns->ctrl->subsys->atomic_bs,
> -			atomic_bs);
> +		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
>  	}
>  
>  	lim->atomic_write_hw_max = atomic_bs;
> @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>  	if (!nvme_update_disk_info(ns, id, &lim))
>  		capacity = 0;
>  
> -	/*
> -	 * Validate the max atomic write size fits within the subsystem's
> -	 * atomic write capabilities.
> -	 */
> -	if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> -		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> -		ret = -ENXIO;
> -		goto out;
> -	}
> -
>  	nvme_config_discard(ns, &lim);
>  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>  	    ns->head->ids.csi == NVME_CSI_ZNS)
> @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
>  	memcpy(subsys->model, id->mn, sizeof(subsys->model));
>  	subsys->vendor_id = le16_to_cpu(id->vid);
>  	subsys->cmic = id->cmic;
> +	subsys->awupf = le16_to_cpu(id->awupf);
>  
>  	/* Versions prior to 1.4 don't necessarily report a valid type */
>  	if (id->cntrltype == NVME_CTRL_DISC ||
> @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>  		if (ret)
>  			goto out_free;
>  	}
> +
> +	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> +		dev_err_ratelimited(ctrl->device,
> +			"inconsistent AWUPF, controller not added (%u/%u).\n",
> +			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> +		ret = -EINVAL;
> +		goto out_free;
> +	}
> +

Could you explain (and perhaps add a comment here) why all controllers
in a subsystem must report the same awupf? Is it because namespaces may
inherit awupf from the controller, and may be reachable through multiple
controllers for multipath?

>  	memcpy(ctrl->subsys->firmware_rev, id->fr,
>  	       sizeof(ctrl->subsys->firmware_rev));
>  
> @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>  		dev_pm_qos_expose_latency_tolerance(ctrl->device);
>  	else if (!ctrl->apst_enabled && prev_apst_enabled)
>  		dev_pm_qos_hide_latency_tolerance(ctrl->device);
> -	ctrl->awupf = le16_to_cpu(id->awupf);
>  out_free:
>  	kfree(id);
>  	return ret;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a468cdc5b5cb..7df2ea21851f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -410,7 +410,6 @@ struct nvme_ctrl {
>  
>  	enum nvme_ctrl_type cntrltype;
>  	enum nvme_dctype dctype;
> -	u16 awupf; /* 0's based value. */
>  };
>  
>  static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> @@ -443,11 +442,11 @@ struct nvme_subsystem {
>  	u8			cmic;
>  	enum nvme_subsys_type	subtype;
>  	u16			vendor_id;
> +	u16			awupf; /* 0's based value. */
>  	struct ida		ns_ida;
>  #ifdef CONFIG_NVME_MULTIPATH
>  	enum nvme_iopolicy	iopolicy;
>  #endif
> -	u32			atomic_bs;
>  };
>  
>  /*
> -- 
> 2.47.2
> 
> 



More information about the Linux-nvme mailing list