[Bug Report] nvme-cli fails re-formatting NVMe namespace

Nilay Shroff nilay at linux.ibm.com
Tue Mar 19 22:53:27 PDT 2024



On 3/20/24 07:49, Christoph Hellwig wrote:
> Can you try this patch instead?
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 00864a63447099..4bac54d4e0015b 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2204,6 +2204,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
>  	}
>  
>  	if (!ret && nvme_ns_head_multipath(ns->head)) {
> +		struct queue_limits *ns_lim = &ns->disk->queue->limits;
>  		struct queue_limits lim;
>  
>  		blk_mq_freeze_queue(ns->head->disk->queue);
> @@ -2215,7 +2216,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
>  		set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
>  		nvme_mpath_revalidate_paths(ns);
>  
> +		/*
> +		 * queue_limits mixes values that are the hardware limitations
> +		 * for bio splitting with what is the device configuration.
> +		 *
> +		 * For NVMe the device configuration can change after e.g. a
> +		 * Format command, and we really want to pick up the new format
> +		 * value here.  But we must still stack the queue limits to the
> +		 * least common denominator for multipathing to split the bios
> +		 * properly.
> +		 *
> +		 * To work around this, we explicitly set the device
> +		 * configuration to those that we just queried, but only stack
> +		 * the splitting limits in to make sure we still obey possibly
> +		 * lower limitations of other controllers.
> +		 */
>  		lim = queue_limits_start_update(ns->head->disk->queue);
> +		lim.logical_block_size = ns_lim->logical_block_size;
> +		lim.physical_block_size = ns_lim->physical_block_size;
> +		lim.io_min = ns_lim->io_min;
> +		lim.io_opt = ns_lim->io_opt;
>  		queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
>  					ns->head->disk->disk_name);
>  		ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
> 

I have just tested the above patch and it's working as expected. With the above patch,
I don't see any issue formatting the NVMe disk with block-size of 512. Looks good to me.

Thanks,
--Nilay

PS: For reference, please find below test result obtained using the above patch.
--------------------------------------------------------------------------------

# lspci 
0018:01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe SSD Controller PM173X

# nvme list 
Node                  Generic               SN                   Model                                    Namespace  Usage                      Format           FW Rev  
--------------------- --------------------- -------------------- ---------------------------------------- ---------- -------------------------- ---------------- --------
/dev/nvme0n1          /dev/ng0n1            S6EUNA0R500358       1.6TB NVMe Gen4 U.2 SSD                  0x1          1.60  TB /   1.60  TB      4 KiB +  0 B   REV.SN49

# nvme id-ns /dev/nvme0n1 -H 
NVME Identify Namespace 1:
nsze    : 0xba4d4ab0
ncap    : 0xba4d4ab0
nuse    : 0xba4d4ab0
nsfeat  : 0
  [4:4] : 0	NPWG, NPWA, NPDG, NPDA, and NOWS are Not Supported
  [3:3] : 0	NGUID and EUI64 fields if non-zero, Reused
  [2:2] : 0	Deallocated or Unwritten Logical Block error Not Supported
  [1:1] : 0	Namespace uses AWUN, AWUPF, and ACWU
  [0:0] : 0	Thin Provisioning Not Supported

<snip>
<snip>

nlbaf   : 4
flbas   : 0
  [6:5] : 0	Most significant 2 bits of Current LBA Format Selected
  [4:4] : 0	Metadata Transferred in Separate Contiguous Buffer
  [3:0] : 0	Least significant 4 bits of Current LBA Format Selected
  
<snip>
<snip>  

LBA Format  0 : Metadata Size: 0   bytes - Data Size: 4096 bytes - Relative Performance: 0 Best (in use)
LBA Format  1 : Metadata Size: 8   bytes - Data Size: 4096 bytes - Relative Performance: 0x2 Good 
LBA Format  2 : Metadata Size: 0   bytes - Data Size: 512 bytes - Relative Performance: 0x1 Better 
LBA Format  3 : Metadata Size: 8   bytes - Data Size: 512 bytes - Relative Performance: 0x3 Degraded 
LBA Format  4 : Metadata Size: 64  bytes - Data Size: 4096 bytes - Relative Performance: 0x3 Degraded 

# lsblk -t /dev/nvme0n1 
NAME    ALIGNMENT MIN-IO OPT-IO PHY-SEC LOG-SEC ROTA SCHED RQ-SIZE  RA WSAME
nvme0n1         0   4096      0    4096    4096    0               128    0B
                                   ^^^     ^^^ 	

<< The nvme disk has block size of 4096; now format it with block size of 512

# nvme format /dev/nvme0n1 --lbaf=2 --pil=0 --ms=0 --pi=0 -f 
Success formatting namespace:1

>> Success formatting; no error seen

# lsblk -t /dev/nvme0n1 
NAME    ALIGNMENT MIN-IO OPT-IO PHY-SEC LOG-SEC ROTA SCHED RQ-SIZE  RA WSAME
nvme0n1         0    512      0     512     512    0               128    0B
                                    ^^^     ^^^
# cat /sys/block/nvme0n1/queue/logical_block_size:512
# cat /sys/block/nvme0n1/queue/physical_block_size:512
# cat /sys/block/nvme0n1/queue/optimal_io_size:0
# cat /sys/block/nvme0n1/queue/minimum_io_size:512

# cat /sys/block/nvme0c0n1/queue/logical_block_size:512
# cat /sys/block/nvme0c0n1/queue/physical_block_size:512
# cat /sys/block/nvme0c0n1/queue/optimal_io_size:0
# cat /sys/block/nvme0c0n1/queue/minimum_io_size:512

# nvme id-ns /dev/nvme0n1 -H 
NVME Identify Namespace 1:
nsze    : 0xba4d4ab0
ncap    : 0xba4d4ab0
nuse    : 0xba4d4ab0
nsfeat  : 0
  [4:4] : 0	NPWG, NPWA, NPDG, NPDA, and NOWS are Not Supported
  [3:3] : 0	NGUID and EUI64 fields if non-zero, Reused
  [2:2] : 0	Deallocated or Unwritten Logical Block error Not Supported
  [1:1] : 0	Namespace uses AWUN, AWUPF, and ACWU
  [0:0] : 0	Thin Provisioning Not Supported

<snip>
<snip>

nlbaf   : 4
flbas   : 0x2
  [6:5] : 0	Most significant 2 bits of Current LBA Format Selected
  [4:4] : 0	Metadata Transferred in Separate Contiguous Buffer
  [3:0] : 0x2	Least significant 4 bits of Current LBA Format Selected

<snip>
<snip>

LBA Format  0 : Metadata Size: 0   bytes - Data Size: 4096 bytes - Relative Performance: 0 Best 
LBA Format  1 : Metadata Size: 8   bytes - Data Size: 4096 bytes - Relative Performance: 0x2 Good 
LBA Format  2 : Metadata Size: 0   bytes - Data Size: 512 bytes - Relative Performance: 0x1 Better (in use)
LBA Format  3 : Metadata Size: 8   bytes - Data Size: 512 bytes - Relative Performance: 0x3 Degraded 
LBA Format  4 : Metadata Size: 64  bytes - Data Size: 4096 bytes - Relative Performance: 0x3 Degraded 




More information about the Linux-nvme mailing list