[PATCH 4/6] block: introduce IOCTL to report dev properties
Matias Bjørling
mb at lightnvm.io
Thu Jun 25 15:58:48 EDT 2020
On 25/06/2020 21.42, Javier González wrote:
> On 25.06.2020 15:10, Matias Bjørling wrote:
>> On 25/06/2020 14.21, Javier González wrote:
>>> From: Javier González <javier.gonz at samsung.com>
>>>
>>> With the addition of ZNS, a new set of properties have been added to
>>> the
>>> zoned block device. This patch introduces a new IOCTL to expose these
>>> rroperties to user space.
>>>
>>> Signed-off-by: Javier González <javier.gonz at samsung.com>
>>> Signed-off-by: SelvaKumar S <selvakuma.s1 at samsung.com>
>>> Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
>>> Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
>>> ---
>>> block/blk-zoned.c | 46 ++++++++++++++++++++++++++
>>> block/ioctl.c | 2 ++
>>> drivers/nvme/host/core.c | 2 ++
>>> drivers/nvme/host/nvme.h | 11 +++++++
>>> drivers/nvme/host/zns.c | 61 +++++++++++++++++++++++++++++++++++
>>> include/linux/blkdev.h | 9 ++++++
>>> include/uapi/linux/blkzoned.h | 13 ++++++++
>>> 7 files changed, 144 insertions(+)
>>>
>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>> index 704fc15813d1..39ec72af9537 100644
>>> --- a/block/blk-zoned.c
>>> +++ b/block/blk-zoned.c
>>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device
>>> *bdev, sector_t sector,
>>> }
>>> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>> + struct blk_zone_dev *zprop)
>>> +{
>>> + struct gendisk *disk = bdev->bd_disk;
>>> +
>>> + if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>> + return -EOPNOTSUPP;
>>> +
>>> + return disk->fops->report_zone_p(disk, zprop);
>>> +}
>>> +
>>> static inline bool blkdev_allow_reset_all_zones(struct block_device
>>> *bdev,
>>> sector_t sector,
>>> sector_t nr_sectors)
>>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device
>>> *bdev, fmode_t mode,
>>> GFP_KERNEL);
>>> }
>>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>> + unsigned int cmd, unsigned long arg)
>>> +{
>>> + void __user *argp = (void __user *)arg;
>>> + struct request_queue *q;
>>> + struct blk_zone_dev zprop;
>>> + int ret;
>>> +
>>> + if (!argp)
>>> + return -EINVAL;
>>> +
>>> + q = bdev_get_queue(bdev);
>>> + if (!q)
>>> + return -ENXIO;
>>> +
>>> + if (!blk_queue_is_zoned(q))
>>> + return -ENOTTY;
>>> +
>>> + if (!capable(CAP_SYS_ADMIN))
>>> + return -EACCES;
>>> +
>>> + if (!(mode & FMODE_WRITE))
>>> + return -EBADF;
>>> +
>>> + ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>> + if (ret)
>>> + goto out;
>>> +
>>> + if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>> + return -EFAULT;
>>> +
>>> +out:
>>> + return ret;
>>> +}
>>> +
>>> static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>> unsigned int nr_zones)
>>> {
>>> diff --git a/block/ioctl.c b/block/ioctl.c
>>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>> --- a/block/ioctl.c
>>> +++ b/block/ioctl.c
>>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct
>>> block_device *bdev, fmode_t mode,
>>> return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>> case BLKMGMTZONE:
>>> return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>> + case BLKZONEDEVPROP:
>>> + return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>> case BLKGETZONESZ:
>>> return put_uint(argp, bdev_zone_sectors(bdev));
>>> case BLKGETNRZONES:
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 5b95c81d2a2d..a32c909a915f 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations
>>> nvme_fops = {
>>> .getgeo = nvme_getgeo,
>>> .revalidate_disk= nvme_revalidate_disk,
>>> .report_zones = nvme_report_zones,
>>> + .report_zone_p = nvme_report_zone_prop,
>>> .pr_ops = &nvme_pr_ops,
>>> };
>>> @@ -2280,6 +2281,7 @@ const struct block_device_operations
>>> nvme_ns_head_ops = {
>>> .compat_ioctl = nvme_compat_ioctl,
>>> .getgeo = nvme_getgeo,
>>> .report_zones = nvme_report_zones,
>>> + .report_zone_p = nvme_report_zone_prop,
>>> .pr_ops = &nvme_pr_ops,
>>> };
>>> #endif /* CONFIG_NVME_MULTIPATH */
>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>> index ecf443efdf91..172e0531f37f 100644
>>> --- a/drivers/nvme/host/nvme.h
>>> +++ b/drivers/nvme/host/nvme.h
>>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>> u8 pi_type;
>>> #ifdef CONFIG_BLK_DEV_ZONED
>>> u64 zsze;
>>> +
>>> + u32 nr_zones;
>>> + u32 mar;
>>> + u32 mor;
>>> + u32 rrl;
>>> + u32 frl;
>>> + u16 zoc;
>>> + u16 ozcs;
>>> #endif
>>> unsigned long features;
>>> unsigned long flags;
>>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk
>>> *disk, struct nvme_ns *ns,
>>> int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>> unsigned int nr_zones, report_zones_cb cb, void *data);
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev
>>> *zprop);
>>> +
>>> blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct
>>> request *req,
>>> struct nvme_command *cmnd,
>>> enum nvme_zone_mgmt_action action);
>>> #else
>>> #define nvme_report_zones NULL
>>> +#define nvme_report_zone_prop NULL
>>> static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns
>>> *ns,
>>> struct request *req, struct nvme_command *cmnd,
>>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>> index 2e6512ac6f01..258d03610cc0 100644
>>> --- a/drivers/nvme/host/zns.c
>>> +++ b/drivers/nvme/host/zns.c
>>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl
>>> *ctrl)
>>> return 0;
>>> }
>>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>> +{
>>> + struct nvme_command c = { };
>>> + struct nvme_zone_report report;
>>> + int buflen = sizeof(struct nvme_zone_report);
>>> + int ret;
>>> +
>>> + c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>> + c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>> + c.zmr.slba = cpu_to_le64(0);
>>> + c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>> + c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>> + c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>> + c.zmr.pr = 0;
>>> +
>>> + ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + return le64_to_cpu(report.nr_zones);
>>> +}
>>> +
>>> int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>> unsigned lbaf)
>>> {
>>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk,
>>> struct nvme_ns *ns,
>>> goto free_data;
>>> }
>>> + ns->nr_zones = nvme_zns_nr_zones(ns);
>>> + ns->mar = le32_to_cpu(id->mar);
>>> + ns->mor = le32_to_cpu(id->mor);
>>> + ns->rrl = le32_to_cpu(id->rrl);
>>> + ns->frl = le32_to_cpu(id->frl);
>>> + ns->zoc = le16_to_cpu(id->zoc);
>>> +
>>> q->limits.zoned = BLK_ZONED_HM;
>>> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>> free_data:
>>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk,
>>> sector_t sector,
>>> return ret;
>>> }
>>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct
>>> blk_zone_dev *zprop)
>>> +{
>>> + zprop->nr_zones = ns->nr_zones;
>>> + zprop->zoc = ns->zoc;
>>> + zprop->ozcs = ns->ozcs;
>>> + zprop->mar = ns->mar;
>>> + zprop->mor = ns->mor;
>>> + zprop->rrl = ns->rrl;
>>> + zprop->frl = ns->frl;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev
>>> *zprop)
>>> +{
>>> + struct nvme_ns_head *head = NULL;
>>> + struct nvme_ns *ns;
>>> + int srcu_idx, ret;
>>> +
>>> + ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>> + if (unlikely(!ns))
>>> + return -EWOULDBLOCK;
>>> +
>>> + if (ns->head->ids.csi == NVME_CSI_ZNS)
>>> + ret = nvme_ns_report_zone_prop(ns, zprop);
>>> + else
>>> + ret = -EINVAL;
>>> + nvme_put_ns_from_disk(head, srcu_idx);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct
>>> request *req,
>>> struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>> {
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8308d8a3720b..0c0faa58b7f4 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct
>>> block_device *bdev, fmode_t mode,
>>> unsigned int cmd, unsigned long arg);
>>> extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>> fmode_t mode,
>>> unsigned int cmd, unsigned long arg);
>>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t
>>> mode,
>>> + unsigned int cmd, unsigned long arg);
>>> #else /* CONFIG_BLK_DEV_ZONED */
>>> static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct
>>> block_device *bdev,
>>> return -ENOTTY;
>>> }
>>> +static inline int blkdev_zonedev_prop(struct block_device *bdev,
>>> fmode_t mode,
>>> + unsigned int cmd, unsigned long arg)
>>> +{
>>> + return -ENOTTY;
>>> +}
>>> +
>>> #endif /* CONFIG_BLK_DEV_ZONED */
>>> struct request_queue {
>>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>> int (*report_zones)(struct gendisk *, sector_t sector,
>>> unsigned int nr_zones, report_zones_cb cb, void *data);
>>> char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>> + int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev
>>> *zprop);
>>> struct module *owner;
>>> const struct pr_ops *pr_ops;
>>> };
>>> diff --git a/include/uapi/linux/blkzoned.h
>>> b/include/uapi/linux/blkzoned.h
>>> index d0978ee10fc7..0c49a4b2ce5d 100644
>>> --- a/include/uapi/linux/blkzoned.h
>>> +++ b/include/uapi/linux/blkzoned.h
>>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>> __u64 nr_sectors;
>>> };
>>> +struct blk_zone_dev {
>>> + __u32 nr_zones;
>>> + __u32 mar;
>>> + __u32 mor;
>>> + __u32 rrl;
>>> + __u32 frl;
>>> + __u16 zoc;
>>> + __u16 ozcs;
>>> + __u32 rsv31[2];
>>> + __u64 rsv63[4];
>>> +};
>>> +
>>> /**
>>> * enum blk_zone_action - Zone state transitions managed from
>>> user-space
>>> *
>>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>> #define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
>>> #define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
>>> #define BLKMGMTZONE _IOR(0x12, 137, struct blk_zone_mgmt)
>>> +#define BLKZONEDEVPROP _IOR(0x12, 138, struct blk_zone_dev)
>>> #endif /* _UAPI_BLKZONED_H */
>>
>> Nak. These properties can already be retrieved using the nvme ioctl
>> passthru command and support have also been added to nvme-cli.
>>
>
> These properties are intended to be consumed by an application, so
> nvme-cli is of not much use. I would also like to avoid sysfs variables.
>
I can recommend libnvme https://github.com/linux-nvme/libnvme
It provides an easy way to retrieve the options.
> We can use nvme passthru, but this bypasses the zoned block abstraction.
> Why not representing ZNS features in the standard zoned block API? I am
> happy to iterate on the actual implementation if you have feedback.
>
> Javier
>
More information about the Linux-nvme
mailing list