[PATCH 1/1] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a Samsung PM1725a

Dmitry Monakhov dmtrmonakhov at yandex-team.ru
Thu Mar 11 10:28:40 GMT 2021



10.03.2021, 16:41, "Christoph Hellwig" <hch at lst.de>:
> On Wed, Mar 10, 2021 at 02:21:56PM +0100, Christoph Hellwig wrote:
>>  Can you try this patch instead?
>>
>>  http://lists.infradead.org/pipermail/linux-nvme/2021-February/023183.html
>
> Actually, please try the patch below instead, it looks like our existing
> logic messes up the units:
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e68a8c4ac5a6ea..1867fdf2205bd7 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1963,30 +1963,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
>                  blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
>  }
>
> -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
> +/*
> + * Even though NVMe spec explicitly states that MDTS is not applicable to the
> + * write-zeroes, we are cautious and limit the size to the controllers
> + * max_hw_sectors value, which is based on the MDTS field and possibly other
> + * limiting factors.
> + */
> +static void nvme_config_write_zeroes(struct request_queue *q,
> + struct nvme_ctrl *ctrl)
>  {
> - u64 max_blocks;
> -
> - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
> - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
> - return;
> - /*
> - * Even though NVMe spec explicitly states that MDTS is not
> - * applicable to the write-zeroes:- "The restriction does not apply to
> - * commands that do not transfer data between the host and the
> - * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
> - * In order to be more cautious use controller's max_hw_sectors value
> - * to configure the maximum sectors for the write-zeroes which is
> - * configured based on the controller's MDTS field in the
> - * nvme_init_identify() if available.
> - */
> - if (ns->ctrl->max_hw_sectors == UINT_MAX)
> - max_blocks = (u64)USHRT_MAX + 1;
> - else
> - max_blocks = ns->ctrl->max_hw_sectors + 1;
> -
> - blk_queue_max_write_zeroes_sectors(disk->queue,
> - nvme_lba_to_sect(ns, max_blocks));
> + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
> + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
> + blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors);
>  }
>
>  static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
> @@ -2158,7 +2146,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
>          set_capacity_and_notify(disk, capacity);
>
>          nvme_config_discard(disk, ns);
> - nvme_config_write_zeroes(disk, ns);
> + nvme_config_write_zeroes(disk->queue, ns->ctrl);
>
>          set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
>                  test_bit(NVME_NS_FORCE_RO, &ns->flags));
In order to exclude possible issue with incorrect request sized I've run test which does write_zeroes,
via fio-fallocate randrtim, which actually does fallocate punch_hole+keep_size which converts to blkdev_issue_zeroout()
note: fio should be patched, see: https://github.com/axboe/fio/pull/1203

fio --name t --ioengine=falloc --rw=randtrim --bs=512 --size=100M --filename=/dev/nvme0n1 --numjobs=16
After a couple of minutes it stuck, and then timeout occour.
cat /sys/kernel/debug/block/nvme0n1/hctx*/busy                                                                                                                                                   
00000000cd27b755 {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=205, .internal_tag=-1}
000000009d3f2b8f {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=244, .internal_tag=-1}
00000000eb4166fe {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=709, .internal_tag=-1}
0000000049b49c60 {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=433, .internal_tag=-1}
0000000018b93c40 {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=5, .internal_tag=-1}
00000000ac15ef73 {.op=WRITE_ZEROES, .cmd_flags=SYNC, .rq_flags=DONTPREP|IO_STAT|STATS, .state=in_flight, .tag=268, .internal_tag=-1}

So, this is definitely hardware issue, and write_zeroes should be disabled for this particular model.




More information about the Linux-nvme mailing list