[PATCH 13/18] nvmet-tcp: allocate socket file

Wed Mar 22 05:07:30 PDT 2023

On 3/22/23 12:46, Sagi Grimberg wrote:
> 
> 
> On 3/21/23 14:43, Hannes Reinecke wrote:
>> When using the TLS upcall we need to allocate a socket file such
>> that the userspace daemon is able to use the socket.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
>>   drivers/nvme/target/tcp.c | 49 ++++++++++++++++++++++++++++-----------
>>   1 file changed, 36 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index 66e8f9fd0ca7..5c43767c5ecd 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -96,12 +96,14 @@ struct nvmet_tcp_cmd {
>>   enum nvmet_tcp_queue_state {
>>       NVMET_TCP_Q_CONNECTING,
>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>       NVMET_TCP_Q_LIVE,
>>       NVMET_TCP_Q_DISCONNECTING,
>>   };
>>   struct nvmet_tcp_queue {
>>       struct socket        *sock;
>> +    struct file        *sock_file;
>>       struct nvmet_tcp_port    *port;
>>       struct work_struct    io_work;
>>       struct nvmet_cq        nvme_cq;
>> @@ -1455,12 +1457,19 @@ static void 
>> nvmet_tcp_release_queue_work(struct work_struct *w)
>>       nvmet_sq_destroy(&queue->nvme_sq);
>>       cancel_work_sync(&queue->io_work);
>>       nvmet_tcp_free_cmd_data_in_buffers(queue);
>> -    sock_release(queue->sock);
>> +    if (queue->sock_file) {
>> +        fput(queue->sock_file);
> 
> I don't remember, but does the fput call sock_release
> on the final put? I'd move this into a helper nvmet_tcp_close_sock()
> or something.
> 
Yes, it does. (Took me some weeks to figure that out...)
But yeah, we can do a helper.

>> +        queue->sock_file = NULL;
>> +        queue->sock = NULL;
> 
> I always get a bit weary when I see that deallocations are setting
> pointers to NULL.
> 
And curiously that's a pattern I commonly use to track invalid accesses.
But that's just personal preference.

>> +    } else {
>> +        WARN_ON(!queue->sock->ops);
>> +        sock_release(queue->sock);
>> +        queue->sock = NULL;
>> +    }
>>       nvmet_tcp_free_cmds(queue);
>>       if (queue->hdr_digest || queue->data_digest)
>>           nvmet_tcp_free_crypto(queue);
>>       ida_free(&nvmet_tcp_queue_ida, queue->idx);
>> -
>>       page = virt_to_head_page(queue->pf_cache.va);
>>       __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
>>       kfree(queue);
>> @@ -1583,7 +1592,7 @@ static int nvmet_tcp_set_queue_sock(struct 
>> nvmet_tcp_queue *queue)
>>       return ret;
>>   }
>> -static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>> +static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>>           struct socket *newsock)
> 
> Why is this becoming a void function? This absolutely can fail.
> 
Oh, it can fail.
But it's being called as a last call in a 'void' function, so there is 
nothing the caller could do with the return value.
And the caller actually just uses the return value to print out a
logging message, so I moved that call into nvmet_tcp_alloc_queue()
and made it a 'void' function.

>>   {
>>       struct nvmet_tcp_queue *queue;
>> @@ -1591,7 +1600,7 @@ static int nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       queue = kzalloc(sizeof(*queue), GFP_KERNEL);
>>       if (!queue)
>> -        return -ENOMEM;
>> +        return;
>>       INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
>>       INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
>> @@ -1599,15 +1608,28 @@ static int nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       queue->port = port;
>>       queue->nr_cmds = 0;
>>       spin_lock_init(&queue->state_lock);
>> -    queue->state = NVMET_TCP_Q_CONNECTING;
>> +    if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
>> +        NVMF_TCP_SECTYPE_TLS13)
>> +        queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
>> +    else
>> +        queue->state = NVMET_TCP_Q_CONNECTING;
>>       INIT_LIST_HEAD(&queue->free_list);
>>       init_llist_head(&queue->resp_list);
>>       INIT_LIST_HEAD(&queue->resp_send_list);
>> +    if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
>> +        queue->sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, 
>> NULL);
>> +        if (IS_ERR(queue->sock_file)) {
>> +            ret = PTR_ERR(queue->sock_file);
>> +            queue->sock_file = NULL;
>> +            goto out_free_queue;
>> +        }
>> +    }
>> +
>>       queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL);
>>       if (queue->idx < 0) {
>>           ret = queue->idx;
>> -        goto out_free_queue;
>> +        goto out_sock;
>>       }
>>       ret = nvmet_tcp_alloc_cmd(queue, &queue->connect);
>> @@ -1628,7 +1650,7 @@ static int nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       if (ret)
>>           goto out_destroy_sq;
>> -    return 0;
>> +    return;
>>   out_destroy_sq:
>>       mutex_lock(&nvmet_tcp_queue_mutex);
>>       list_del_init(&queue->queue_list);
>> @@ -1638,9 +1660,14 @@ static int nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       nvmet_tcp_free_cmd(&queue->connect);
>>   out_ida_remove:
>>       ida_free(&nvmet_tcp_queue_ida, queue->idx);
>> +out_sock:
>> +    if (queue->sock_file)
>> +        fput(queue->sock_file);
>> +    else
>> +        sock_release(queue->sock);
>>   out_free_queue:
>>       kfree(queue);
>> -    return ret;
>> +    pr_err("failed to allocate queue");
> 
> Can we design this better?
> It looks backwards that this routine deallocates an argument
> coming from the call-site.
> 
> I know that this is similar to what happens with kernel_accept
> to some extent. But would prefer to avoid this pattern if possible.
> 
Sure; I just followed precedent here.
But no prob to change it.

Cheers,

Hannes