[PATCH v3] nvme: enable FDP support
Kanchan Joshi
joshi.k at samsung.com
Tue Jul 2 03:26:19 PDT 2024
Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
to control the placement of logical blocks so as to reduce the SSD WAF.
Userspace can send the data lifetime information using the write hints.
The SCSI driver (sd) can already pass this information to the SCSI
devices. This patch does the same for NVMe.
Fetch the placement-identifiers if the device supports FDP.
The incoming write-hint is mapped to a placement-identifier, which in
turn is set in the DSPEC field of the write command.
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
Signed-off-by: Hui Qi <hui81.qi at samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
---
Changes since v2:
- Base it on nvme-6.11 and resolve a merge conflict
Changes since v1:
- Reduce the fetched plids from 128 to 6 (Keith)
- Use struct_size for a calculation (Keith)
- Handle robot/sparse warning
drivers/nvme/host/core.c | 67 ++++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 4 +++
include/linux/nvme.h | 19 ++++++++++++
3 files changed, 90 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 20e7505852ce..c7455e917e3b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -42,6 +42,20 @@ struct nvme_ns_info {
bool is_removed;
};
+struct nvme_fdp_ruh_status_desc {
+ u16 pid;
+ u16 ruhid;
+ u32 earutr;
+ u64 ruamw;
+ u8 rsvd16[16];
+};
+
+struct nvme_fdp_ruh_status {
+ u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
unsigned int admin_timeout = 60;
module_param(admin_timeout, uint, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
@@ -957,6 +971,16 @@ static bool nvme_valid_atomic_write(struct request *req)
return true;
}
+static inline void nvme_assign_placement_id(struct nvme_ns *ns,
+ struct request *req,
+ struct nvme_command *cmd)
+{
+ enum rw_hint h = umin(ns->head->nr_plids - 1, req->write_hint);
+
+ cmd->rw.control |= cpu_to_le16(NVME_RW_DTYPE_DPLCMT);
+ cmd->rw.dsmgmt |= cpu_to_le32(ns->head->plids[h] << 16);
+}
+
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
@@ -1075,6 +1099,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
break;
case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
+ if (!ret && ns->head->nr_plids)
+ nvme_assign_placement_id(ns, req, cmd);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
@@ -2105,6 +2131,40 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
return ret;
}
+static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
+{
+ struct nvme_command c = {};
+ struct nvme_fdp_ruh_status *ruhs;
+ struct nvme_fdp_ruh_status_desc *ruhsd;
+ int size, ret, i;
+
+ size = struct_size(ruhs, ruhsd, NVME_MAX_PLIDS);
+ ruhs = kzalloc(size, GFP_KERNEL);
+ if (!ruhs)
+ return -ENOMEM;
+
+ c.imr.opcode = nvme_cmd_io_mgmt_recv;
+ c.imr.nsid = cpu_to_le32(nsid);
+ c.imr.mo = 0x1;
+ c.imr.numd = cpu_to_le32((size >> 2) - 1);
+
+ ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+ if (ret)
+ goto out;
+
+ ns->head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+ ns->head->nr_plids =
+ min_t(u16, ns->head->nr_plids, NVME_MAX_PLIDS);
+
+ for (i = 0; i < ns->head->nr_plids; i++) {
+ ruhsd = &ruhs->ruhsd[i];
+ ns->head->plids[i] = le16_to_cpu(ruhsd->pid);
+ }
+out:
+ kfree(ruhs);
+ return ret;
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2196,6 +2256,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (ret && !nvme_first_scan(ns->disk))
goto out;
}
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+ ret = nvme_fetch_fdp_plids(ns, info->nsid);
+ if (ret)
+ dev_warn(ns->ctrl->device,
+ "FDP failure status:0x%x\n", ret);
+ }
+
ret = 0;
out:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b512012b7044..7b88b8ae502e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -450,6 +450,8 @@ struct nvme_ns_ids {
u8 csi;
};
+#define NVME_MAX_PLIDS (WRITE_LIFE_EXTREME + 1)
+
/*
* Anchor structure for namespaces. There is one for each namespace in a
* NVMe subsystem that any of our controllers can see, and the namespace
@@ -466,6 +468,8 @@ struct nvme_ns_head {
struct kref ref;
bool shared;
bool passthru_err_log_enabled;
+ u16 nr_plids;
+ u16 plids[NVME_MAX_PLIDS];
int instance;
struct nvme_effects_log *effects;
u64 nuse;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 57e27e48c913..9effc5902901 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -273,6 +273,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -832,6 +833,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -853,6 +855,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1004,6 +1007,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1091,6 +1095,20 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1811,6 +1829,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
--
2.25.1
More information about the Linux-nvme
mailing list