[PATCH v2 3/3] nvme-fabrics: prevent overriding of existing host
Max Gurtovoy
mgurtovoy at nvidia.com
Thu May 11 16:20:24 PDT 2023
On 12/05/2023 2:13, Hannes Reinecke wrote:
> On 5/11/23 21:35, Max Gurtovoy wrote:
>>
>>
>> On 11/05/2023 21:50, Hannes Reinecke wrote:
>>> On 5/11/23 18:54, Max Gurtovoy wrote:
>>>> When first connecting a target using the "default" host parameters,
>>>> setting the hostid from the command line during a subsequent connection
>>>> establishment would override the "default" hostid parameter. This would
>>>> cause an existing connection that is already using the host definitions
>>>> to lose its hostid.
>>>>
>>>> To address this issue, the code has been modified to allow only 1:1
>>>> mapping between hostnqn and hostid. This will maintain unambiguous host
>>>> identification. Any non 1:1 mapping will be rejected during connection
>>>> establishment.
>>>>
>>>> Tested-by: Noam Gottlieb <ngottlieb at nvidia.com>
>>>> Reviewed-by: Israel Rukshin <israelr at nvidia.com>
>>>> Signed-off-by: Max Gurtovoy <mgurtovoy at nvidia.com>
>>>> ---
>>>>
>>>> changes from v1:
>>>> - address comments from Christoph
>>>> - allow only 1:1 mapping between hostnqn and hostid
>>>> - add error prints in case we fail a connection for user to understand
>>>> the reason of the failure
>>>>
>>>> ---
>>>> drivers/nvme/host/fabrics.c | 98
>>>> ++++++++++++++++++++++++++-----------
>>>> 1 file changed, 70 insertions(+), 28 deletions(-)
>>>>
>>>> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
>>>> index 0c0172bdc4dc..f8e47c7d6188 100644
>>>> --- a/drivers/nvme/host/fabrics.c
>>>> +++ b/drivers/nvme/host/fabrics.c
>>>> @@ -21,35 +21,77 @@ static DEFINE_MUTEX(nvmf_hosts_mutex);
>>>> static struct nvmf_host *nvmf_default_host;
>>>> -static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
>>>> +/**
>>>> + * __nvmf_host_find() - Find a matching to a previously created host
>>>> + * @hostnqn: Host NQN to match
>>>> + * @id: Host ID to match
>>>> + *
>>>> + * We have defined a host as how it is perceived by the target.
>>>> + * Therefore, we don't allow different Host NQNs with the same Host
>>>> ID.
>>>> + * Similarly, we do not allow the usage of the same Host NQN with
>>>> different
>>>> + * Host IDs. This will maintain unambiguous host identification.
>>>> + *
>>>> + * Return: Returns host pointer on success, NULL in case of no
>>>> match or
>>>> + * ERR_PTR(-EINVAL) in case of error match.
>>>> + */
>>>> +static struct nvmf_host *__nvmf_host_find(const char *hostnqn,
>>>> uuid_t *id)
>>>> {
>>>> struct nvmf_host *host;
>>>> + lockdep_assert_held(&nvmf_hosts_mutex);
>>>> +
>>>> list_for_each_entry(host, &nvmf_hosts, list) {
>>>> - if (!strcmp(host->nqn, hostnqn))
>>>> - return host;
>>>> + if (!strcmp(host->nqn, hostnqn)) {
>>>> + if (uuid_equal(&host->id, id)) {
>>>> + // same hostnqn and hostid
>>>> + return host;
>>>> + } else {
>>>> + pr_err("found same hostnqn %s but different hostid
>>>> %pUb\n",
>>>> + hostnqn, id);
>>>> + return ERR_PTR(-EINVAL);
>>>> + }
>>>> + } else if (uuid_equal(&host->id, id)) {
>>>> + // same hostid but different hostnqn
>>>> + pr_err("found same hostid %pUb but different hostnqn
>>>> %s\n",
>>>> + id, hostnqn);
>>>> + return ERR_PTR(-EINVAL);
>>>> + }
>>>> }
>>>> return NULL;
>>>> }
>>>> -static struct nvmf_host *nvmf_host_add(const char *hostnqn)
>>>> +static struct nvmf_host *nvmf_host_alloc(const char *hostnqn,
>>>> uuid_t *id)
>>>> +{
>>>> + struct nvmf_host *host;
>>>> +
>>>> + host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> + if (!host)
>>>> + return NULL;
>>>> +
>>>> + kref_init(&host->ref);
>>>> + uuid_copy(&host->id, id);
>>>> + strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
>>>> +
>>>> + return host;
>>>> +}
>>>> +
>>>> +static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t
>>>> *id)
>>>> {
>>>> struct nvmf_host *host;
>>>> mutex_lock(&nvmf_hosts_mutex);
>>>> - host = __nvmf_host_find(hostnqn);
>>>> - if (host) {
>>>> + host = __nvmf_host_find(hostnqn, id);
>>>> + if (IS_ERR(host)) {
>>>> + goto out_unlock;
>>>> + } else if (host) {
>>>> kref_get(&host->ref);
>>>> goto out_unlock;
>>>> }
>>>> - host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> + host = nvmf_host_alloc(hostnqn, id);
>>>> if (!host)
>>>> - goto out_unlock;
>>>> -
>>>> - kref_init(&host->ref);
>>>> - strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
>>>> + return ERR_PTR(-ENOMEM);
>>>> list_add_tail(&host->list, &nvmf_hosts);
>>>> out_unlock:
>>>> @@ -60,16 +102,17 @@ static struct nvmf_host *nvmf_host_add(const
>>>> char *hostnqn)
>>>> static struct nvmf_host *nvmf_host_default(void)
>>>> {
>>>> struct nvmf_host *host;
>>>> + char nqn[NVMF_NQN_SIZE];
>>>> + uuid_t id;
>>>> - host = kmalloc(sizeof(*host), GFP_KERNEL);
>>>> + uuid_gen(&id);
>>>> + snprintf(nqn, NVMF_NQN_SIZE,
>>>> + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &id);
>>>> +
>>>> + host = nvmf_host_alloc(nqn, &id);
>>>> if (!host)
>>>> return NULL;
>>>> - kref_init(&host->ref);
>>>> - uuid_gen(&host->id);
>>>> - snprintf(host->nqn, NVMF_NQN_SIZE,
>>>> - "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
>>>> -
>>>> mutex_lock(&nvmf_hosts_mutex);
>>>> list_add_tail(&host->list, &nvmf_hosts);
>>>> mutex_unlock(&nvmf_hosts_mutex);
>>>> @@ -633,6 +676,7 @@ static int nvmf_parse_options(struct
>>>> nvmf_ctrl_options *opts,
>>>> size_t nqnlen = 0;
>>>> int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
>>>> uuid_t hostid;
>>>> + char hostnqn[NVMF_NQN_SIZE];
>>>> /* Set defaults */
>>>> opts->queue_size = NVMF_DEF_QUEUE_SIZE;
>>>> @@ -649,7 +693,9 @@ static int nvmf_parse_options(struct
>>>> nvmf_ctrl_options *opts,
>>>> if (!options)
>>>> return -ENOMEM;
>>>> - uuid_gen(&hostid);
>>>> + /* use default host if not given by user space */
>>>> + uuid_copy(&hostid, &nvmf_default_host->id);
>>>> + strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE);
>>>> while ((p = strsep(&o, ",\n")) != NULL) {
>>>> if (!*p)
>>>> @@ -795,12 +841,8 @@ static int nvmf_parse_options(struct
>>>> nvmf_ctrl_options *opts,
>>>> ret = -EINVAL;
>>>> goto out;
>>>> }
>>>> - opts->host = nvmf_host_add(p);
>>>> + strscpy(hostnqn, p, NVMF_NQN_SIZE);
>>>> kfree(p);
>>>> - if (!opts->host) {
>>>> - ret = -ENOMEM;
>>>> - goto out;
>>>> - }
>>>> break;
>>>> case NVMF_OPT_RECONNECT_DELAY:
>>>> if (match_int(args, &token)) {
>>>> @@ -957,13 +999,13 @@ static int nvmf_parse_options(struct
>>>> nvmf_ctrl_options *opts,
>>>> opts->fast_io_fail_tmo, ctrl_loss_tmo);
>>>> }
>>>> - if (!opts->host) {
>>>> - kref_get(&nvmf_default_host->ref);
>>>> - opts->host = nvmf_default_host;
>>>> + opts->host = nvmf_host_add(hostnqn, &hostid);
>>>> + if (IS_ERR(opts->host)) {
>>>> + ret = PTR_ERR(opts->host);
>>>> + opts->host = NULL;
>>>> + goto out;
>>>> }
>>>> - uuid_copy(&opts->host->id, &hostid);
>>>> -
>>>> out:
>>>> kfree(options);
>>>> return ret;
>>>
>>> Hmm. Remind me again: what's the supposed behaviour if I do _not_ set
>>> the hostid from userspace and try another connection with the same
>>> hostnqn?
>>> If I read the code correctly I will be getting a new 'host' structure
>>> (as the hostid is not identical).
>>> Is this the behaviour we're aiming at?
>>> I would have thought that I would be getting the same host structure ..
>>
>> Unfortunately, the nvme-cli is not handling the hostnqn and hostid
>> well IMO.
>> It should have enforce getting these 2 together or non of them. The
>> current code in cli allows the user to provide one of them in cmdline.
>>
>> We also might have hostid and hostnqn files under /etc/nvme.
>>
>> So what we have is:
>> 1. if files under /etc/nvme exist:
>> - use values from files if the user didn't set explicitly from
>> command line during the connect command (for each missing argument).
>> 2. if files under /etc/nvme don't exist:
>> - use in kernel default hostnqn/hostid (that is created during
>> module loading) values as it uses for the /etc/nvme files.
>>
>> The code today will override the hostid for the host that it matches
>> hostnqn. In case we'll have a reconnection, we'll reconnect with
>> hostid that was mistakenly overridden.
>>
>> In v1, I've fixed that and allowed having hostNQN with multiple ids
>> but Christoph suggested blocking it in linux and maintain unambiguous
>> host identification (1:1 mapping between hostnqn and hostid).
>>
>> In v2, I've enforced the 1:1 mapping and fail connection establishment
>> in any other case.
>>
>> I've added error prints to dmesg to help the users to understand the
>> reason of the connection failure.
>> It is better to fail the connection establishment than overriding the
>> hostid under the hood.
>>
>>
> Oh, I fully get that an I'm on board with that.
> Issue is that you are generating a random ID if none is specified on the
> commandline, so the second call (with the same parameters) will fail as
> we're just checking if the hostid is identical.
> Guess we'll need to skip the check of hostid if none is specified.
I'm not generating random IDs.
if none is specified on cmdline then:
if /etc/nvme/hostid exist take it from there;
else use the default_host->hostid
>
> Cheers,
>
> Hannes
More information about the Linux-nvme
mailing list