[PATCH v2 3/3] nvme-fabrics: prevent overriding of existing host

Max Gurtovoy mgurtovoy at nvidia.com
Thu May 11 16:20:24 PDT 2023



On 12/05/2023 2:13, Hannes Reinecke wrote:
> On 5/11/23 21:35, Max Gurtovoy wrote:
>>
>>
>> On 11/05/2023 21:50, Hannes Reinecke wrote:
>>> On 5/11/23 18:54, Max Gurtovoy wrote:
>>>> When first connecting a target using the "default" host parameters,
>>>> setting the hostid from the command line during a subsequent connection
>>>> establishment would override the "default" hostid parameter. This would
>>>> cause an existing connection that is already using the host definitions
>>>> to lose its hostid.
>>>>
>>>> To address this issue, the code has been modified to allow only 1:1
>>>> mapping between hostnqn and hostid. This will maintain unambiguous host
>>>> identification. Any non 1:1 mapping will be rejected during connection
>>>> establishment.
>>>>
>>>> Tested-by: Noam Gottlieb <ngottlieb at nvidia.com>
>>>> Reviewed-by: Israel Rukshin <israelr at nvidia.com>
>>>> Signed-off-by: Max Gurtovoy <mgurtovoy at nvidia.com>
>>>> ---
>>>>
>>>> changes from v1:
>>>> - address comments from Christoph
>>>> - allow only 1:1 mapping between hostnqn and hostid
>>>> - add error prints in case we fail a connection for user to understand
>>>>    the reason of the failure
>>>>
>>>> ---
>>>>   drivers/nvme/host/fabrics.c | 98 
>>>> ++++++++++++++++++++++++++-----------
>>>>   1 file changed, 70 insertions(+), 28 deletions(-)
>>>>
>>>> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
>>>> index 0c0172bdc4dc..f8e47c7d6188 100644
>>>> --- a/drivers/nvme/host/fabrics.c
>>>> +++ b/drivers/nvme/host/fabrics.c
>>>> @@ -21,35 +21,77 @@ static DEFINE_MUTEX(nvmf_hosts_mutex);
>>>>   static struct nvmf_host *nvmf_default_host;
>>>> -static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
>>>> +/**
>>>> + * __nvmf_host_find() - Find a matching to a previously created host
>>>> + * @hostnqn: Host NQN to match
>>>> + * @id: Host ID to match
>>>> + *
>>>> + * We have defined a host as how it is perceived by the target.
>>>> + * Therefore, we don't allow different Host NQNs with the same Host 
>>>> ID.
>>>> + * Similarly, we do not allow the usage of the same Host NQN with 
>>>> different
>>>> + * Host IDs. This will maintain unambiguous host identification.
>>>> + *
>>>> + * Return: Returns host pointer on success, NULL in case of no 
>>>> match or
>>>> + *         ERR_PTR(-EINVAL) in case of error match.
>>>> + */
>>>> +static struct nvmf_host *__nvmf_host_find(const char *hostnqn, 
>>>> uuid_t *id)
>>>>   {
>>>>       struct nvmf_host *host;
>>>> +    lockdep_assert_held(&nvmf_hosts_mutex);
>>>> +
>>>>       list_for_each_entry(host, &nvmf_hosts, list) {
>>>> -        if (!strcmp(host->nqn, hostnqn))
>>>> -            return host;
>>>> +        if (!strcmp(host->nqn, hostnqn)) {
>>>> +            if (uuid_equal(&host->id, id)) {
>>>> +                // same hostnqn and hostid
>>>> +                return host;
>>>> +            } else {
>>>> +                pr_err("found same hostnqn %s but different hostid 
>>>> %pUb\n",
>>>> +                       hostnqn, id);
>>>> +                return ERR_PTR(-EINVAL);
>>>> +            }
>>>> +        } else if (uuid_equal(&host->id, id)) {
>>>> +            // same hostid but different hostnqn
>>>> +            pr_err("found same hostid %pUb but different hostnqn 
>>>> %s\n",
>>>> +                    id, hostnqn);
>>>> +            return ERR_PTR(-EINVAL);
>>>> +        }
>>>>       }
>>>>       return NULL;
>>>>   }
>>>> -static struct nvmf_host *nvmf_host_add(const char *hostnqn)
>>>> +static struct nvmf_host *nvmf_host_alloc(const char *hostnqn, 
>>>> uuid_t *id)
>>>> +{
>>>> +    struct nvmf_host *host;
>>>> +
>>>> +    host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> +    if (!host)
>>>> +        return NULL;
>>>> +
>>>> +    kref_init(&host->ref);
>>>> +    uuid_copy(&host->id, id);
>>>> +    strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
>>>> +
>>>> +    return host;
>>>> +}
>>>> +
>>>> +static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t 
>>>> *id)
>>>>   {
>>>>       struct nvmf_host *host;
>>>>       mutex_lock(&nvmf_hosts_mutex);
>>>> -    host = __nvmf_host_find(hostnqn);
>>>> -    if (host) {
>>>> +    host = __nvmf_host_find(hostnqn, id);
>>>> +    if (IS_ERR(host)) {
>>>> +        goto out_unlock;
>>>> +    } else if (host) {
>>>>           kref_get(&host->ref);
>>>>           goto out_unlock;
>>>>       }
>>>> -    host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> +    host = nvmf_host_alloc(hostnqn, id);
>>>>       if (!host)
>>>> -        goto out_unlock;
>>>> -
>>>> -    kref_init(&host->ref);
>>>> -    strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
>>>> +        return ERR_PTR(-ENOMEM);
>>>>       list_add_tail(&host->list, &nvmf_hosts);
>>>>   out_unlock:
>>>> @@ -60,16 +102,17 @@ static struct nvmf_host *nvmf_host_add(const 
>>>> char *hostnqn)
>>>>   static struct nvmf_host *nvmf_host_default(void)
>>>>   {
>>>>       struct nvmf_host *host;
>>>> +    char nqn[NVMF_NQN_SIZE];
>>>> +    uuid_t id;
>>>> -    host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> +    uuid_gen(&id);
>>>> +    snprintf(nqn, NVMF_NQN_SIZE,
>>>> +        "nqn.2014-08.org.nvmexpress:uuid:%pUb", &id);
>>>> +
>>>> +    host = nvmf_host_alloc(nqn, &id);
>>>>       if (!host)
>>>>           return NULL;
>>>> -    kref_init(&host->ref);
>>>> -    uuid_gen(&host->id);
>>>> -    snprintf(host->nqn, NVMF_NQN_SIZE,
>>>> -        "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
>>>> -
>>>>       mutex_lock(&nvmf_hosts_mutex);
>>>>       list_add_tail(&host->list, &nvmf_hosts);
>>>>       mutex_unlock(&nvmf_hosts_mutex);
>>>> @@ -633,6 +676,7 @@ static int nvmf_parse_options(struct 
>>>> nvmf_ctrl_options *opts,
>>>>       size_t nqnlen  = 0;
>>>>       int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
>>>>       uuid_t hostid;
>>>> +    char hostnqn[NVMF_NQN_SIZE];
>>>>       /* Set defaults */
>>>>       opts->queue_size = NVMF_DEF_QUEUE_SIZE;
>>>> @@ -649,7 +693,9 @@ static int nvmf_parse_options(struct 
>>>> nvmf_ctrl_options *opts,
>>>>       if (!options)
>>>>           return -ENOMEM;
>>>> -    uuid_gen(&hostid);
>>>> +    /* use default host if not given by user space */
>>>> +    uuid_copy(&hostid, &nvmf_default_host->id);
>>>> +    strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE);
>>>>       while ((p = strsep(&o, ",\n")) != NULL) {
>>>>           if (!*p)
>>>> @@ -795,12 +841,8 @@ static int nvmf_parse_options(struct 
>>>> nvmf_ctrl_options *opts,
>>>>                   ret = -EINVAL;
>>>>                   goto out;
>>>>               }
>>>> -            opts->host = nvmf_host_add(p);
>>>> +            strscpy(hostnqn, p, NVMF_NQN_SIZE);
>>>>               kfree(p);
>>>> -            if (!opts->host) {
>>>> -                ret = -ENOMEM;
>>>> -                goto out;
>>>> -            }
>>>>               break;
>>>>           case NVMF_OPT_RECONNECT_DELAY:
>>>>               if (match_int(args, &token)) {
>>>> @@ -957,13 +999,13 @@ static int nvmf_parse_options(struct 
>>>> nvmf_ctrl_options *opts,
>>>>                   opts->fast_io_fail_tmo, ctrl_loss_tmo);
>>>>       }
>>>> -    if (!opts->host) {
>>>> -        kref_get(&nvmf_default_host->ref);
>>>> -        opts->host = nvmf_default_host;
>>>> +    opts->host = nvmf_host_add(hostnqn, &hostid);
>>>> +    if (IS_ERR(opts->host)) {
>>>> +        ret = PTR_ERR(opts->host);
>>>> +        opts->host = NULL;
>>>> +        goto out;
>>>>       }
>>>> -    uuid_copy(&opts->host->id, &hostid);
>>>> -
>>>>   out:
>>>>       kfree(options);
>>>>       return ret;
>>>
>>> Hmm. Remind me again: what's the supposed behaviour if I do _not_ set 
>>> the hostid from userspace and try another connection with the same 
>>> hostnqn?
>>> If I read the code correctly I will be getting a new 'host' structure 
>>> (as the hostid is not identical).
>>> Is this the behaviour we're aiming at?
>>> I would have thought that I would be getting the same host structure ..
>>
>> Unfortunately, the nvme-cli is not handling the hostnqn and hostid 
>> well IMO.
>> It should have enforce getting these 2 together or non of them. The 
>> current code in cli allows the user to provide one of them in cmdline.
>>
>> We also might have hostid and hostnqn files under /etc/nvme.
>>
>> So what we have is:
>> 1. if files under /etc/nvme exist:
>>   - use values from files if the user didn't set explicitly from 
>> command line during the connect command (for each missing argument).
>> 2. if files under /etc/nvme don't exist:
>>   - use in kernel default hostnqn/hostid (that is created during 
>> module loading) values as it uses for the /etc/nvme files.
>>
>> The code today will override the hostid for the host that it matches 
>> hostnqn. In case we'll have a reconnection, we'll reconnect with 
>> hostid that was mistakenly overridden.
>>
>> In v1, I've fixed that and allowed having hostNQN with multiple ids 
>> but Christoph suggested blocking it in linux and maintain unambiguous 
>> host identification (1:1 mapping between hostnqn and hostid).
>>
>> In v2, I've enforced the 1:1 mapping and fail connection establishment 
>> in any other case.
>>
>> I've added error prints to dmesg to help the users to understand the 
>> reason of the connection failure.
>> It is better to fail the connection establishment than overriding the 
>> hostid under the hood.
>>
>>
> Oh, I fully get that an I'm on board with that.
> Issue is that you are generating a random ID if none is specified on the 
> commandline, so the second call (with the same parameters) will fail as 
> we're just checking if the hostid is identical.
> Guess we'll need to skip the check of hostid if none is specified.

I'm not generating random IDs.

if none is specified on cmdline then:
if /etc/nvme/hostid exist take it from there;
else use the default_host->hostid


> 
> Cheers,
> 
> Hannes



More information about the Linux-nvme mailing list