[PATCH] nvmet: support reservation feature

Keith Busch kbusch at kernel.org
Tue Jan 9 09:01:11 PST 2024


On Tue, Jan 09, 2024 at 08:10:08PM +0800, Guixin Liu wrote:
> This patch implements the reservation feature, includes:
> 1. reservation register(register, unregister and replace).
> 2. reservation acquire(acquire, preempt, preempt and abort).
> 3. reservation release(release and clear).
> 4. reservation report.
> 
> And also make reservation configurable, one can set ns to support
> reservation before enable ns. The default of resv_enable is false.
> 
> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
> ---
> Hi guys,
>     I've implemented the NVMe reservation feature. Please review it, all 
> comments are welcome.

Why? If you have a real use case, let's add a blktest example added to
that project.

>     In addtion, I didn't implement event reporting because I didn't see
> any handling of these events on the host side. If these events are mandatory
> to report, please let me know so that I can implement them.

User space could be listening for them. The driver doesn't have any
particular action to take on a PR event just send a uevent.
 
>  	 */
>  	id->nmic = NVME_NS_NMIC_SHARED;
>  	id->anagrpid = cpu_to_le32(req->ns->anagrpid);
> -
> +	id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
> +		     NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
> +		     NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
> +		     NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
> +		     NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
> +		     NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
> +		     NVME_PR_SUPPORT_IEKEY_DEF_LATER_VER_1_3;
>  	memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
>  
>  	id->lbaf[0].ds = req->ns->blksize_shift;
> @@ -1017,6 +1023,12 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
>  	if (nvmet_is_passthru_req(req))
>  		return nvmet_parse_passthru_admin_cmd(req);
>  
> +	ret = nvmet_pr_check_cmd_access(req);
> +	if (unlikely(ret)) {
> +		req->error_loc = offsetof(struct nvme_common_command, opcode);
> +		return ret;
> +	}

You're checking this before validating the opcode. I don't think pr
access violation should have error precedence over an invalid opcode.

> +static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req)
> +{
> +	struct nvme_command *cmd = req->cmd;
> +	u8 opcode = cmd->common.opcode;
> +
> +	if (req->sq->qid) {
> +		switch (opcode) {
> +		case nvme_cmd_flush:
> +		case nvme_cmd_write:
> +		case nvme_cmd_write_zeroes:
> +		case nvme_cmd_dsm:
> +		case nvme_cmd_write_uncor:
> +			return true;
> +		default:
> +			return false;
> +		}
> +	}
> +	switch (opcode) {
> +	case nvme_admin_cap_mgmt:
> +	case nvme_admin_format_nvm:
> +	case nvme_admin_ns_attach:
> +	case nvme_admin_ns_mgmt:
> +	case nvme_admin_sanitize_nvm:
> +	case nvme_admin_security_send:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req)
> +{
> +	struct nvme_command *cmd = req->cmd;
> +	u8 opcode = cmd->common.opcode;
> +
> +	if (req->sq->qid) {
> +		switch (opcode) {
> +		case nvme_cmd_read:
> +		case nvme_cmd_compare:
> +		case nvme_cmd_verify:
> +			return true;
> +		default:
> +			return false;
> +		}
> +	}
> +	switch (opcode) {
> +	case nvme_admin_security_recv:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}

You're checking opcodes that we don't even support. I suggest we stash
the Command Effects Log in our target and trust what that reports to
deterimine if a command violates a reservation so that this function
doesn't need to be kept in sync as new opcodes are created.

Speaking of effects, you also need to update nvmet_get_cmd_effects_nvm()
so that it reports support for NVMe reservation opcodes.

> +u16 nvmet_pr_check_cmd_access(struct nvmet_req *req)
> +{
> +	struct nvmet_ctrl *ctrl = req->sq->ctrl;
> +	struct nvmet_ns *ns = req->ns;
> +	struct nvmet_pr *pr;
> +	u16 ret = 0;
> +
> +	if (!ns) {
> +		ns = xa_load(&nvmet_req_subsys(req)->namespaces,
> +			     le32_to_cpu(req->cmd->common.nsid));

This could be the broadcast NSID, 0xffffffff, in which case you have to
check all the namespaces for reservations.

> +	/*
> +	 * The Reservation command group is checked in executing,
> +	 * allow it here.
> +	 */
> +	switch ((u8)pr->rtype) {
> +	case NVME_PR_WRITE_EXCLUSIVE:
> +		if (nvmet_is_req_write_cmd_group(req) ||
> +		    nvmet_is_req_copy_cmd_group(req))
> +			ret = NVME_SC_RESERVATION_CONFLICT | NVME_SC_DNR;
> +		break;
> +	case NVME_PR_EXCLUSIVE_ACCESS:
> +		if (nvmet_is_req_copy_cmd_group(req) ||
> +		    nvmet_is_req_read_cmd_group(req) ||
> +		    nvmet_is_req_write_cmd_group(req))
> +			ret = NVME_SC_RESERVATION_CONFLICT | NVME_SC_DNR;
> +		break;
> +	case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY:
> +	case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS:
> +		if ((nvmet_is_req_copy_cmd_group(req) ||
> +		    nvmet_is_req_write_cmd_group(req)) &&
> +		    !nvmet_pr_find_registrant_by_hostid(pr, &ctrl->hostid))
> +			ret = NVME_SC_RESERVATION_CONFLICT | NVME_SC_DNR;
> +		break;
> +	case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY:
> +	case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS:
> +		if ((nvmet_is_req_copy_cmd_group(req) ||
> +		    nvmet_is_req_read_cmd_group(req) ||
> +		    nvmet_is_req_write_cmd_group(req)) &&
> +		    !nvmet_pr_find_registrant_by_hostid(pr, &ctrl->hostid))
> +			ret = NVME_SC_RESERVATION_CONFLICT | NVME_SC_DNR;
> +		break;
> +	default:
> +		WARN_ON_ONCE(1);
> +		break;
> +	}
> +
> +unlock:
> +	read_unlock(&pr->pr_lock);
> +out:
> +	if (!req->ns)
> +		nvmet_put_namespace(ns);
> +	return ret;
> +}



More information about the Linux-nvme mailing list