[PATCH] nvme: implement non-mdts command limits
Klaus Jensen
its at irrelevant.dk
Thu Mar 11 05:51:26 GMT 2021
On Mar 10 11:57, Keith Busch wrote:
> Commands that access LBA contents without a data transfer between the
> host historically have not had a spec defined upper limit. The driver
> set the queue constraints for such commands to the max data transfer
> size just to be safe, but this artificial constraint frequently limits
> devices below their capabilities.
>
> The NVMe Workgroup ratified TP4040 defines how a controller may
> advertise their non-MDTS limits. Use these if provided, and default
> to the current constraints if not.
>
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
> drivers/nvme/host/core.c | 89 +++++++++++++++++++++++++++++++---------
> drivers/nvme/host/nvme.h | 3 ++
> include/linux/nvme.h | 10 +++++
> 3 files changed, 82 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f7bc808da3d0..af299487f7a8 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1939,7 +1939,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
> struct request_queue *queue = disk->queue;
> u32 size = queue_logical_block_size(queue);
>
> - if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
> + if (!(ctrl->max_discard_sectors)) {
> blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
> return;
> }
> @@ -1957,8 +1957,8 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
> if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
> return;
>
> - blk_queue_max_discard_sectors(queue, UINT_MAX);
> - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
> + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
> + blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
>
> if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
> blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
> @@ -1966,25 +1966,10 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>
> static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
> {
> - u64 max_blocks;
> + u64 max_blocks = ns->ctrl->max_zeroes_sectors;
>
> - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
> - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
> + if (max_blocks == 0)
> return;
> - /*
> - * Even though NVMe spec explicitly states that MDTS is not
> - * applicable to the write-zeroes:- "The restriction does not apply to
> - * commands that do not transfer data between the host and the
> - * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
> - * In order to be more cautious use controller's max_hw_sectors value
> - * to configure the maximum sectors for the write-zeroes which is
> - * configured based on the controller's MDTS field in the
> - * nvme_init_ctrl_finish() if available.
> - */
> - if (ns->ctrl->max_hw_sectors == UINT_MAX)
> - max_blocks = (u64)USHRT_MAX + 1;
> - else
> - max_blocks = ns->ctrl->max_hw_sectors + 1;
>
> blk_queue_max_write_zeroes_sectors(disk->queue,
> nvme_lba_to_sect(ns, max_blocks));
> @@ -3063,6 +3048,65 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
> return 0;
> }
>
> +static inline u32 nvme_mps_size_to_bytes(u8 size)
> +{
> + /* XXX: modify if NVME_CTRL_PAGE_SHIFT ever changes */
> + return 1 << (size + 3);
> +}
> +
> +static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
> +{
> + struct nvme_command c = { };
> + struct nvme_id_ctrl_nvm *id;
> + int ret;
> +
> + if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
> + ctrl->max_discard_sectors = 0;
> + ctrl->max_discard_segments = 0;
> + } else {
> + ctrl->max_discard_sectors = UINT_MAX;
> + ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
> + }
> +
> + /*
> + * Even though NVMe spec explicitly states that MDTS is not applicable
> + * to the write-zeroes, we are cautious and limit the default size to
> + * the controller's max_hw_sectors value, which is based on the MDTS
> + * field and possibly other limiting factors.
> + */
> + if (!(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES) &&
> + (ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES))
> + ctrl->max_zeroes_sectors = ctrl->max_hw_sectors;
> + else
> + ctrl->max_zeroes_sectors = 0;
> +
> + if (ctrl->vs < NVME_VS(1, 2, 0))
> + return 0;
> +
> + id = kzalloc(sizeof(*id), GFP_KERNEL);
> + if (!id)
> + return 0;
> +
> + c.identify.opcode = nvme_admin_identify;
> + c.identify.cns = NVME_ID_CNS_CS_CTRL;
> + c.identify.csi = NVME_CSI_NVM;
> +
> + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
> + if (ret)
> + goto free_data;
> +
> + if (id->dmrl)
> + ctrl->max_discard_segments = id->dmrl;
> + if (id->dmrsl)
> + ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
Since DMRSL is in terms of LBAs, should this use nvme_lba_to_sect?
> + if (id->wzsl)
> + ctrl->max_zeroes_sectors = nvme_mps_size_to_bytes(id->wzsl);
> +
> +free_data:
> + kfree(id);
> + return ret;
> +}
> +
> static int nvme_init_identify(struct nvme_ctrl *ctrl)
> {
> struct nvme_id_ctrl *id;
> @@ -3238,6 +3282,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
> if (ret)
> return ret;
>
> + ret = nvme_init_non_mdts_limits(ctrl);
> + if (ret < 0)
> + return ret;
> +
> ret = nvme_configure_apst(ctrl);
> if (ret < 0)
> return ret;
> @@ -4768,6 +4816,7 @@ static inline void _nvme_check_size(void)
> BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
> + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
> BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
> BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 76de7ed55d90..cf63ab07be4c 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -276,6 +276,9 @@ struct nvme_ctrl {
> u32 max_hw_sectors;
> u32 max_segments;
> u32 max_integrity_segments;
> + u32 max_discard_sectors;
> + u32 max_discard_segments;
> + u32 max_zeroes_sectors;
> #ifdef CONFIG_BLK_DEV_ZONED
> u32 max_zone_append;
> #endif
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index b08787cd0881..edcbd60b88b9 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns {
> __u8 rsvd1[4095];
> };
>
> +struct nvme_id_ctrl_nvm {
> + __u8 vsl;
> + __u8 wzsl;
> + __u8 wusl;
> + __u8 dmrl;
> + __le32 dmrsl;
> + __le64 dmsl;
> + __u8 rsvd16[4080];
> +};
> +
> enum {
> NVME_ID_CNS_NS = 0x00,
> NVME_ID_CNS_CTRL = 0x01,
> --
> 2.25.4
>
>
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
--
One of us - No more doubt, silence or taboo about mental illness.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20210311/85499536/attachment-0001.sig>
More information about the Linux-nvme
mailing list