[PATCH] nvme: implement non-mdts command limits

Klaus Jensen its at irrelevant.dk
Thu Mar 11 05:51:26 GMT 2021


On Mar 10 11:57, Keith Busch wrote:
> Commands that access LBA contents without a data transfer between the
> host historically have not had a spec defined upper limit. The driver
> set the queue constraints for such commands to the max data transfer
> size just to be safe, but this artificial constraint frequently limits
> devices below their capabilities.
> 
> The NVMe Workgroup ratified TP4040 defines how a controller may
> advertise their non-MDTS limits. Use these if provided, and default
> to the current constraints if not.
> 
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
>  drivers/nvme/host/core.c | 89 +++++++++++++++++++++++++++++++---------
>  drivers/nvme/host/nvme.h |  3 ++
>  include/linux/nvme.h     | 10 +++++
>  3 files changed, 82 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f7bc808da3d0..af299487f7a8 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1939,7 +1939,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>  	struct request_queue *queue = disk->queue;
>  	u32 size = queue_logical_block_size(queue);
>  
> -	if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
> +	if (!(ctrl->max_discard_sectors)) {
>  		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
>  		return;
>  	}
> @@ -1957,8 +1957,8 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>  	if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
>  		return;
>  
> -	blk_queue_max_discard_sectors(queue, UINT_MAX);
> -	blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
> +	blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
> +	blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
>  
>  	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
>  		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
> @@ -1966,25 +1966,10 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>  
>  static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
>  {
> -	u64 max_blocks;
> +	u64 max_blocks = ns->ctrl->max_zeroes_sectors;
>  
> -	if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
> -	    (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
> +	if (max_blocks == 0)
>  		return;
> -	/*
> -	 * Even though NVMe spec explicitly states that MDTS is not
> -	 * applicable to the write-zeroes:- "The restriction does not apply to
> -	 * commands that do not transfer data between the host and the
> -	 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
> -	 * In order to be more cautious use controller's max_hw_sectors value
> -	 * to configure the maximum sectors for the write-zeroes which is
> -	 * configured based on the controller's MDTS field in the
> -	 * nvme_init_ctrl_finish() if available.
> -	 */
> -	if (ns->ctrl->max_hw_sectors == UINT_MAX)
> -		max_blocks = (u64)USHRT_MAX + 1;
> -	else
> -		max_blocks = ns->ctrl->max_hw_sectors + 1;
>  
>  	blk_queue_max_write_zeroes_sectors(disk->queue,
>  					   nvme_lba_to_sect(ns, max_blocks));
> @@ -3063,6 +3048,65 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
>  	return 0;
>  }
>  
> +static inline u32 nvme_mps_size_to_bytes(u8 size)
> +{
> +	/* XXX: modify if NVME_CTRL_PAGE_SHIFT ever changes */
> +	return 1 << (size + 3);
> +}
> +
> +static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_command c = { };
> +	struct nvme_id_ctrl_nvm *id;
> +	int ret;
> +
> +	if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
> +		ctrl->max_discard_sectors = 0;
> +		ctrl->max_discard_segments = 0;
> +	} else {
> +		ctrl->max_discard_sectors = UINT_MAX;
> +		ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
> +	}
> +
> +	/*
> +	 * Even though NVMe spec explicitly states that MDTS is not applicable
> +	 * to the write-zeroes, we are cautious and limit the default size to
> +	 * the controller's max_hw_sectors value, which is based on the MDTS
> +	 * field and possibly other limiting factors.
> +	 */
> +	if (!(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES) &&
> +	    (ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES))
> +		ctrl->max_zeroes_sectors = ctrl->max_hw_sectors;
> +	else
> +		ctrl->max_zeroes_sectors = 0;
> +
> +	if (ctrl->vs < NVME_VS(1, 2, 0))
> +		return 0;
> +
> +	id = kzalloc(sizeof(*id), GFP_KERNEL);
> +	if (!id)
> +		return 0;
> +
> +	c.identify.opcode = nvme_admin_identify;
> +	c.identify.cns = NVME_ID_CNS_CS_CTRL;
> +	c.identify.csi = NVME_CSI_NVM;
> +
> +	ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
> +	if (ret)
> +		goto free_data;
> +
> +	if (id->dmrl)
> +		ctrl->max_discard_segments = id->dmrl;
> +	if (id->dmrsl)
> +		ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);

Since DMRSL is in terms of LBAs, should this use nvme_lba_to_sect?

> +	if (id->wzsl)
> +		ctrl->max_zeroes_sectors = nvme_mps_size_to_bytes(id->wzsl);
> +
> +free_data:
> +	kfree(id);
> +	return ret;
> +}
> +
>  static int nvme_init_identify(struct nvme_ctrl *ctrl)
>  {
>  	struct nvme_id_ctrl *id;
> @@ -3238,6 +3282,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
>  	if (ret)
>  		return ret;
>  
> +	ret = nvme_init_non_mdts_limits(ctrl);
> +	if (ret < 0)
> +		return ret;
> +
>  	ret = nvme_configure_apst(ctrl);
>  	if (ret < 0)
>  		return ret;
> @@ -4768,6 +4816,7 @@ static inline void _nvme_check_size(void)
>  	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
>  	BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
>  	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
> +	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
>  	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
>  	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
>  	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 76de7ed55d90..cf63ab07be4c 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -276,6 +276,9 @@ struct nvme_ctrl {
>  	u32 max_hw_sectors;
>  	u32 max_segments;
>  	u32 max_integrity_segments;
> +	u32 max_discard_sectors;
> +	u32 max_discard_segments;
> +	u32 max_zeroes_sectors;
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	u32 max_zone_append;
>  #endif
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index b08787cd0881..edcbd60b88b9 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns {
>  	__u8	rsvd1[4095];
>  };
>  
> +struct nvme_id_ctrl_nvm {
> +	__u8	vsl;
> +	__u8	wzsl;
> +	__u8	wusl;
> +	__u8	dmrl;
> +	__le32	dmrsl;
> +	__le64	dmsl;
> +	__u8	rsvd16[4080];
> +};
> +
>  enum {
>  	NVME_ID_CNS_NS			= 0x00,
>  	NVME_ID_CNS_CTRL		= 0x01,
> -- 
> 2.25.4
> 
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme

-- 
One of us - No more doubt, silence or taboo about mental illness.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20210311/85499536/attachment-0001.sig>


More information about the Linux-nvme mailing list