[PATCH 3/3] nvme: add parameter command_retry to enable retry

Chao Leng lengchao at huawei.com
Tue Jan 12 04:34:48 EST 2021



On 2021/1/11 20:27, Minwoo Im wrote:
> Hello,
> 
> On 21-01-11 11:47:56, Chao Leng wrote:
>>
>>
>> On 2021/1/8 22:46, Minwoo Im wrote:
>>> nvme_init_request() has set REQ_FAILFAST_DRIVER to make requests
>>> non-retryable.  This command flag value is checked in
>>> nvme_decide_disposition() to decide whether to RETRY or other
>>> operations.  In that point, blk_noretry_request() macro will be used to
>>> check if command flags have one of REQ_FAILFAST_*.  If so, it just
>>> decides to complete the request without retrying.
>>>
>>> This patch added a module parameter named command_retry to turn on the
>>> command retry feature in this driver.  If turning it on,
>>> REQ3_FAILFAST_DRIVER will not be set to requests so that retry can be
>>> reached out to nvme_retry_req() based on the module parameter.
>>>
>>> Signed-off-by: Minwoo Im <minwoo.im.dev at gmail.com>
>>> ---
>>>    drivers/nvme/host/core.c | 7 ++++++-
>>>    1 file changed, 6 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 6e428fdc25a8..e1836ca9956f 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -44,6 +44,10 @@ static unsigned char shutdown_timeout = 5;
>>>    module_param(shutdown_timeout, byte, 0644);
>>>    MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
>>> +static bool command_retry;
>>> +module_param(command_retry, bool, 0644);
>>> +MODULE_PARM_DESC(command_retry, "retry commands up to nvme_max_retries");
>>> +
>>>    static u8 nvme_max_retries = 5;
>>>    module_param_named(max_retries, nvme_max_retries, byte, 0644);
>>>    MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
>>> @@ -560,7 +564,8 @@ static inline void nvme_init_request(struct request *req,
>>>    	else /* no queuedata implies admin queue */
>>>    		req->timeout = NVME_ADMIN_TIMEOUT;
>>> -	req->cmd_flags |= REQ_FAILFAST_DRIVER;
>>> +	if (!command_retry)
>>> +		req->cmd_flags |= REQ_FAILFAST_DRIVER;
>> In abnormal scenarios, such as request time out, connection process may takes long time or the admin command waits for long time.
>> Retry only for non-host errors may be a better choice. Maybe we can make some optimizations in nvme_decide_disposition.
> 
> Thanks for your review!
> 
> Oh, I agreed that it might wait for so long time in connecting process.
> Restricting some of commands that should be retried would be better as
> you mentiond.
> 
> Do you mean that maybe we can check this module parameter in
> nvme_decide_disposition()?  Like, even if blk_noretry_request(req) says
> that it's non-retriable, if this module parameter is enabled, then we
> can retry rather than failfast?
No, I mean that add the local preferential retry which defined in the NVMe protocol.
---
  drivers/nvme/host/core.c | 18 +++++++++++++-----
  drivers/nvme/host/nvme.h |  1 +
  2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 99f91efe3824..a25e9b4956b9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -316,15 +316,20 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
  	if (likely(nvme_req(req)->status == 0))
  		return COMPLETE;

-	if (blk_noretry_request(req) ||
-	    (nvme_req(req)->status & NVME_SC_DNR) ||
+	if ((nvme_req(req)->status & NVME_SC_DNR) ||
  	    nvme_req(req)->retries >= nvme_max_retries)
  		return COMPLETE;

+	if (nvme_req(req)->ctrl->acre &&
+	    !nvme_is_path_error(nvme_req(req)->status) &&
+	    !blk_queue_dying(req->q))
+		return RETRY;
+
+	if (blk_noretry_request(req))
+		return COMPLETE;
+
  	if (req->cmd_flags & REQ_NVME_MPATH) {
-		if (nvme_is_path_error(nvme_req(req)->status) ||
-		    blk_queue_dying(req->q))
-			return FAILOVER;
+		return FAILOVER;
  	} else {
  		if (blk_queue_dying(req->q))
  			return COMPLETE;
@@ -2513,6 +2518,7 @@ static int nvme_configure_acre(struct nvme_ctrl *ctrl)
  	struct nvme_feat_host_behavior *host;
  	int ret;

+	ctrl->acre = false;
  	/* Don't bother enabling the feature if retry delay is not reported */
  	if (!ctrl->crdt[0])
  		return 0;
@@ -2524,6 +2530,8 @@ static int nvme_configure_acre(struct nvme_ctrl *ctrl)
  	host->acre = NVME_ENABLE_ACRE;
  	ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
  				host, sizeof(*host), NULL);
+	if (!ret)
+		ctrl->acre = true;
  	kfree(host);
  	return ret;
  }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bfcedfa4b057..fd914b0dec88 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -276,6 +276,7 @@ struct nvme_ctrl {
  #ifdef CONFIG_BLK_DEV_ZONED
  	u32 max_zone_append;
  #endif
+	bool acre;
  	u16 crdt[3];
  	u16 oncs;
  	u16 oacs;
-- 
> 
> Thanks,
> .
> 



More information about the Linux-nvme mailing list