[PATCH v3 12/19] block,nvme,scsi,dm: Add blk_status to pr_ops callouts
Mike Christie
michael.christie at oracle.com
Sat Nov 5 11:36:18 PDT 2022
On 11/1/22 5:15 AM, Christoph Hellwig wrote:
> On Sun, Oct 30, 2022 at 06:05:35PM -0500, Mike Christie wrote:
>> The problem I hit is that in the ioctl code I then have to do:
>>
>> @@ -269,7 +270,14 @@ static int blkdev_pr_register(struct block_device *bdev,
>>
>> if (reg.flags & ~PR_FL_IGNORE_KEY)
>> return -EOPNOTSUPP;
>> - return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
>> + ret = ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
>> + if (ret == -EBADE) {
>> + if (bdev_is_nvme(bdev))
>> + ret = NVME_SC_RESERVATION_CONFLICT;
>> + else if (bdev_is_scsi(bdev)
>> + ret = SAM_STAT_RESERVATION_CONFLICT;
>> + }
>> + return ret;
>
> Eww. We should have never leaked protocol specific values out to
> userspace. This is an original bug I introduceѕ, so all blame is on me.
>
> I suspect the right way to fix is is to keep the numeric value of
> SAM_STAT_RESERVATION_CONFLICT and give it a new constant exposed in
> the uapi header, assuming that SCSI is the thing people actually
> used the PR API for, and nvme was just an nice little add-on.
>
Do you mean just doing this:
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 727123c611e6..6ac70514170d 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -72,12 +72,17 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
static int nvme_send_pr_command(struct block_device *bdev,
struct nvme_command *c, void *data, unsigned int data_len)
{
+ int ret;
+
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
bdev->bd_disk->fops == &nvme_ns_head_ops)
- return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
+ ret = nvme_send_ns_head_pr_command(bdev, c, data, data_len);
else
- return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c,
- data, data_len);
+ ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, c,
+ data, data_len);
+ if (ret == NVME_SC_RESERVATION_CONFLICT)
+ ret = PR_STS_RESERVATION_CONFLICT;
+ return ret;
}
static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c0a614efcfce..c7621d8ffa51 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1840,7 +1840,8 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa,
scsi_sense_valid(&sshdr)) {
sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result);
scsi_print_sense_hdr(sdev, NULL, &sshdr);
- }
+ } else if (__get_status_byte(result) == SAM_STAT_RESERVATION_CONFLICT)
+ result = PR_STS_RESERVATION_CONFLICT;
return result;
}
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
index ccc78cbf1221..7a637f9e5b49 100644
--- a/include/uapi/linux/pr.h
+++ b/include/uapi/linux/pr.h
@@ -13,6 +13,15 @@ enum pr_type {
PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
};
+enum pr_status {
+ PR_STS_SUCCESS = 0x0,
+ /*
+ * The error codes are based on SCSI, because it was the primary user
+ * and had userspace users.
+ */
+ PR_STS_RESERVATION_CONFLICT = 0x18,
+};
+
struct pr_reservation {
__u64 key;
__u32 type;
---------------------------------------------------------------------------
Or we could add a new flag and make it nicer for the user in the future,
but also uglier for the drivers but a little less uglier than adding the
blk_sts field to the calls:
diff --git a/block/ioctl.c b/block/ioctl.c
index 60121e89052b..cae58f57ea13 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -269,7 +269,8 @@ static int blkdev_pr_register(struct block_device *bdev,
if (reg.flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
- return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
+ return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags,
+ reg.flags & PR_FL_PR_STAT);
}
static int blkdev_pr_reserve(struct block_device *bdev,
@@ -287,7 +288,8 @@ static int blkdev_pr_reserve(struct block_device *bdev,
if (rsv.flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
- return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
+ return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags,
+ rsv.flags & PR_FL_PR_STAT)
}
static int blkdev_pr_release(struct block_device *bdev,
@@ -305,7 +307,8 @@ static int blkdev_pr_release(struct block_device *bdev,
if (rsv.flags)
return -EOPNOTSUPP;
- return ops->pr_release(bdev, rsv.key, rsv.type);
+ return ops->pr_release(bdev, rsv.key, rsv.type,
+ rsv.flags & PR_FL_PR_STAT);
}
static int blkdev_pr_preempt(struct block_device *bdev,
@@ -323,7 +326,8 @@ static int blkdev_pr_preempt(struct block_device *bdev,
if (p.flags)
return -EOPNOTSUPP;
- return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
+ return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort,
+ p.flags & PR_FL_PR_STAT)
}
static int blkdev_pr_clear(struct block_device *bdev,
@@ -341,7 +345,7 @@ static int blkdev_pr_clear(struct block_device *bdev,
if (c.flags)
return -EOPNOTSUPP;
- return ops->pr_clear(bdev, c.key);
+ return ops->pr_clear(bdev, c.key, c.flags & PR_FL_PR_STAT);
}
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 727123c611e6..7c317b646cde 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -70,18 +70,44 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
}
static int nvme_send_pr_command(struct block_device *bdev,
- struct nvme_command *c, void *data, unsigned int data_len)
+ struct nvme_command *c, void *data, unsigned int data_len,
+ bool use_pr_sts)
{
+ int ret;
+
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
bdev->bd_disk->fops == &nvme_ns_head_ops)
- return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
+ ret = nvme_send_ns_head_pr_command(bdev, c, data, data_len);
else
- return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c,
- data, data_len);
+ ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, c,
+ data, data_len);
+ if (!ret || ret < 0 || (ret > 0 && !use_pr_sts))
+ return ret;
+
+ switch (ret) {
+ case NVME_SC_RESERVATION_CONFLICT:
+ ret = PR_STS_RESERVATION_CONFLICT;
+ break;
+ case NVME_SC_HOST_PATH_ERROR:
+ ret = PR_STS_PATH_FAILURE;
+ break
+ case NVME_SC_BAD_ATTRIBUTES:
+ case NVME_SC_ONCS_NOT_SUPPORTED:
+ case NVME_SC_INVALID_OPCODE:
+ case NVME_SC_INVALID_FIELD:
+ case NVME_SC_INVALID_NS:
+ ret = PR_STS_INVALID_OP;
+ break;
+ default:
+ ret = PR_STS_IOERR;
+ break;
+ }
+
+ return ret;
}
static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
- u64 key, u64 sa_key, u8 op)
+ u64 key, u64 sa_key, u8 op, bool use_pr_sts)
{
struct nvme_command c = { };
u8 data[16] = { 0, };
@@ -92,11 +118,11 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
c.common.opcode = op;
c.common.cdw10 = cpu_to_le32(cdw10);
- return nvme_send_pr_command(bdev, &c, data, sizeof(data));
+ return nvme_send_pr_command(bdev, &c, data, sizeof(data), use_pr_sts);
}
static int nvme_pr_register(struct block_device *bdev, u64 old,
- u64 new, unsigned flags)
+ u64 new, unsigned flags, bool use_pr_sts)
{
u32 cdw10;
@@ -106,11 +132,12 @@ static int nvme_pr_register(struct block_device *bdev, u64 old,
cdw10 = old ? 2 : 0;
cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register,
+ use_pr_sts);
}
static int nvme_pr_reserve(struct block_device *bdev, u64 key,
- enum pr_type type, unsigned flags)
+ enum pr_type type, unsigned flags, bool use_pr_sts)
{
u32 cdw10;
@@ -119,29 +146,34 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
cdw10 = nvme_pr_type_from_blk(type) << 8;
cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire,
+ use_pr_sts);
}
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
- enum pr_type type, bool abort)
+ enum pr_type type, bool abort, bool use_pr_sts)
{
u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1);
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire,
+ use_pr_sts);
}
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
+static int nvme_pr_clear(struct block_device *bdev, u64 key, bool use_pr_sts)
{
u32 cdw10 = 1 | (key ? 0 : 1 << 3);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release,
+ use_pr_sts);
}
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+static int nvme_pr_release(struct block_device *bdev, u64 key,
+ enum pr_type type, bool use_pr_sts)
{
u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release,
+ use_pr_sts);
}
static int nvme_pr_resv_report(struct block_device *bdev, void *data,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c0a614efcfce..46393bdd7427 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1797,7 +1797,8 @@ static int sd_pr_read_reservation(struct block_device *bdev,
}
static int sd_pr_out_command(struct block_device *bdev, u8 sa,
- u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags)
+ u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags,
+ bool use_pr_sts)
{
struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
struct scsi_device *sdev = sdkp->device;
@@ -1842,44 +1843,74 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa,
scsi_print_sense_hdr(sdev, NULL, &sshdr);
}
+ if (!result || result < 0 || (result > 0 && !use_pr_sts))
+ return result;
+
+ result = PR_STS_IOERR;
+
+ switch host_byte(result) {
+ case DID_TRANSPORT_FAILFAST:
+ case DID_TRANSPORT_MARGINAL:
+ case DID_TRANSPORT_DISRUPTED:
+ result = PR_STS_PATH_FAILURE;
+ goto done;
+ }
+
+ switch (__get_status_byte(result)) {
+ case SAM_STAT_RESERVATION_CONFLICT:
+ result = PR_STS_RESERVATION_CONFLICT;
+ goto done;
+ case SAM_STAT_CHECK_CONDITION:
+ if (!scsi_sense_valid(&sshdr))
+ goto done;
+
+ if (sshdr.sense_key == ILLEGAL_REQUEST &&
+ (sshdr.asc == 0x26 || sshdr.asc == 0x24)) {
+ result = PR_STS_INVALID_OP;
+ goto done;
+ }
+ }
+
+done:
return result;
}
static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
- u32 flags)
+ u32 flags, bool use_pr_sts)
{
if (flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00,
- old_key, new_key, 0,
- (1 << 0) /* APTPL */);
+ old_key, new_key, 0, (1 << 0) /* APTPL */,
+ use_pr_sts);
}
static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
- u32 flags)
+ u32 flags, bool use_pr_sts)
{
if (flags)
return -EOPNOTSUPP;
return sd_pr_out_command(bdev, 0x01, key, 0,
- block_pr_type_to_scsi(type), 0);
+ block_pr_type_to_scsi(type), 0, use_pr_sts)
}
-static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type,
+ bool use_pr_sts)
{
return sd_pr_out_command(bdev, 0x02, key, 0,
- block_pr_type_to_scsi(type), 0);
+ block_pr_type_to_scsi(type), 0, use_pr_sts);
}
static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
- enum pr_type type, bool abort)
+ enum pr_type type, bool abort, bool use_pr_sts)
{
return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key,
- block_pr_type_to_scsi(type), 0);
+ block_pr_type_to_scsi(type), 0, use_pr_sts);
}
-static int sd_pr_clear(struct block_device *bdev, u64 key)
+static int sd_pr_clear(struct block_device *bdev, u64 key, bool use_pr_sts)
{
- return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0);
+ return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0, use_pr_sts);
}
static const struct pr_ops sd_pr_ops = {
diff --git a/include/linux/pr.h b/include/linux/pr.h
index 3003daec28a5..51e03e73a87f 100644
--- a/include/linux/pr.h
+++ b/include/linux/pr.h
@@ -18,14 +18,14 @@ struct pr_held_reservation {
struct pr_ops {
int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
- u32 flags);
+ u32 flags, bool use_pr_sts);
int (*pr_reserve)(struct block_device *bdev, u64 key,
- enum pr_type type, u32 flags);
+ enum pr_type type, u32 flags, bool use_pr_sts);
int (*pr_release)(struct block_device *bdev, u64 key,
- enum pr_type type);
+ enum pr_type type, bool use_pr_sts);
int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
- enum pr_type type, bool abort);
- int (*pr_clear)(struct block_device *bdev, u64 key);
+ enum pr_type type, bool abort, bool use_pr_sts);
+ int (*pr_clear)(struct block_device *bdev, u64 key, bool use_pr_sts);
/*
* pr_read_keys - Read the registered keys and return them in the
* pr_keys->keys array. The keys array will have been allocated at the
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
index ccc78cbf1221..ac8f7fc404ff 100644
--- a/include/uapi/linux/pr.h
+++ b/include/uapi/linux/pr.h
@@ -13,6 +13,14 @@ enum pr_type {
PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
};
+enum pr_status {
+ PR_STS_SUCCESS,
+ PR_STS_IOERR,
+ PR_STS_RESERVATION_CONFLICT,
+ PR_STS_PATH_FAILURE,
+ PR_STS_INVALID_OP,
+};
+
struct pr_reservation {
__u64 key;
__u32 type;
@@ -40,6 +48,7 @@ struct pr_clear {
};
#define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */
+#define PR_FL_PR_STAT (1 << 1) /* Convert device errors to pr_status */
#define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration)
#define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation)
Bart, if this is what you were suggesting I misunderstood you. I thought you wanted to
only pass the new code in the kernel so that's why I was saying we still have the problem
of converting the error back when passing it back to userspace.
More information about the Linux-nvme
mailing list