[PATCHv3] nvme: implement non-mdts command limits

Christoph Hellwig hch at lst.de
Thu Mar 25 06:50:28 GMT 2021


On Wed, Mar 24, 2021 at 04:18:05PM -0700, Keith Busch wrote:
> Commands that access LBA contents without a data transfer between the
> host historically have not had a spec defined upper limit. The driver
> set the queue constraints for such commands to the max data transfer
> size just to be safe, but this artificial constraint frequently limits
> devices below their capabilities.
> 
> The NVMe Workgroup ratified TP4040 defines how a controller may
> advertise their non-MDTS limits. Use these if provided and default to
> the current constraints if not. Since the Dataset Management command
> limits are defined in logical blocks, but without a namespace to tell us
> the logical block size, the code defaults to the safe 512b size.
> 
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
> v2->v3:
> 
>   Remove the nvm_config_write_zeroes helper (hch)
> 
>   For clarity, don't use inverted oncs logic (hch)
> 
>   Replace nvme revision check with nvme_ctrl_limited_cns (hch)
> 
>   Rebased patch for nvme-5.13: the previous version was based on a local
>   conflict-resolved 5.12+5.13 branch, so that version wouldn't have
>   successfully applied to either upstream branch.
> 
>  drivers/nvme/host/core.c | 107 ++++++++++++++++++++++++++-------------
>  drivers/nvme/host/nvme.h |   3 ++
>  include/linux/nvme.h     |  10 ++++
>  3 files changed, 86 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 40215a0246e4e..15ee470c1b8c6 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1936,7 +1936,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>  	struct request_queue *queue = disk->queue;
>  	u32 size = queue_logical_block_size(queue);
>  
> -	if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
> +	if (ctrl->max_discard_sectors == 0) {
>  		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
>  		return;
>  	}
> @@ -1954,39 +1954,13 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>  	if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
>  		return;
>  
> -	blk_queue_max_discard_sectors(queue, UINT_MAX);
> -	blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
> +	blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
> +	blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
>  
>  	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
>  		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
>  }
>  
> -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
> -{
> -	u64 max_blocks;
> -
> -	if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
> -	    (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
> -		return;
> -	/*
> -	 * Even though NVMe spec explicitly states that MDTS is not
> -	 * applicable to the write-zeroes:- "The restriction does not apply to
> -	 * commands that do not transfer data between the host and the
> -	 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
> -	 * In order to be more cautious use controller's max_hw_sectors value
> -	 * to configure the maximum sectors for the write-zeroes which is
> -	 * configured based on the controller's MDTS field in the
> -	 * nvme_init_ctrl_finish() if available.
> -	 */
> -	if (ns->ctrl->max_hw_sectors == UINT_MAX)
> -		max_blocks = (u64)USHRT_MAX + 1;
> -	else
> -		max_blocks = ns->ctrl->max_hw_sectors + 1;
> -
> -	blk_queue_max_write_zeroes_sectors(disk->queue,
> -					   nvme_lba_to_sect(ns, max_blocks));
> -}
> -
>  static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
>  {
>  	return !uuid_is_null(&ids->uuid) ||
> @@ -2156,7 +2130,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
>  	set_capacity_and_notify(disk, capacity);
>  
>  	nvme_config_discard(disk, ns);
> -	nvme_config_write_zeroes(disk, ns);
> +	blk_queue_max_write_zeroes_sectors(disk->queue,
> +					   ns->ctrl->max_zeroes_sectors);
>  
>  	set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
>  		test_bit(NVME_NS_FORCE_RO, &ns->flags));
> @@ -3060,14 +3035,73 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
>  	return 0;
>  }
>  
> +static inline u32 nvme_mps_size_to_sectors(struct nvme_ctrl *ctrl, u8 size)
> +{
> +	int page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
> +
> +	return 1 << (size + page_shift - 9);

A little nitpick we can fix when applying if needed:  MPS already
is the memory page size, so the size here seems redundant.

> +	/*
> +	 * Even though NVMe spec explicitly states that MDTS is not applicable
> +	 * to the write-zeroes, we are cautious and limit the size to the
> +	 * controllers max_hw_sectors value, which is based on the MDTS field
> +	 * and possibly other limiting factors.
> +	 */
> +	if (!(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES) &&
> +	    (ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES))

I would place the oncs check befoe the quirks check as it flows a lot
more logical.

But all the actual logic looks fine to me.



More information about the Linux-nvme mailing list