[PATCH 15/17] nvmet-tcp: enable TLS handshake upcall
Sagi Grimberg
sagi at grimberg.me
Tue Aug 15 00:01:21 PDT 2023
>>>>> @@ -1864,6 +1877,14 @@ static struct config_group
>>>>> *nvmet_ports_make(struct config_group *group,
>>>>> return ERR_PTR(-ENOMEM);
>>>>> }
>>>>> + if (nvme_keyring_id()) {
>>>>> + port->keyring = key_lookup(nvme_keyring_id());
>>>>> + if (IS_ERR(port->keyring)) {
>>>>> + pr_warn("NVMe keyring not available, disabling TLS\n");
>>>>> + port->keyring = NULL;
>>>>
>>>> why setting this to NULL?
>>>>
>>> It's check when changing TSAS; we can only enable TLS if the nvme
>>> keyring is available.
>>
>> ok
>>
>>>
>>>>> + }
>>>>> + }
>>>>> +
>>>>> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>>>> if (i == NVMET_DEFAULT_ANA_GRPID)
>>>>> port->ana_state[1] = NVME_ANA_OPTIMIZED;
>>>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>>>>> index 8cfd60f3b564..7f9ae53c1df5 100644
>>>>> --- a/drivers/nvme/target/nvmet.h
>>>>> +++ b/drivers/nvme/target/nvmet.h
>>>>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>>>> struct config_group ana_groups_group;
>>>>> struct nvmet_ana_group ana_default_group;
>>>>> enum nvme_ana_state *ana_state;
>>>>> + struct key *keyring;
>>>>> void *priv;
>>>>> bool enabled;
>>>>> int inline_data_size;
>>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>>>> index f19ea9d923fd..77fa339008e1 100644
>>>>> --- a/drivers/nvme/target/tcp.c
>>>>> +++ b/drivers/nvme/target/tcp.c
>>>>> @@ -8,9 +8,13 @@
>>>>> #include <linux/init.h>
>>>>> #include <linux/slab.h>
>>>>> #include <linux/err.h>
>>>>> +#include <linux/key.h>
>>>>> #include <linux/nvme-tcp.h>
>>>>> +#include <linux/nvme-keyring.h>
>>>>> #include <net/sock.h>
>>>>> #include <net/tcp.h>
>>>>> +#include <net/tls.h>
>>>>> +#include <net/handshake.h>
>>>>> #include <linux/inet.h>
>>>>> #include <linux/llist.h>
>>>>> #include <crypto/hash.h>
>>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs,
>>>>> &set_param_ops,
>>>>> MODULE_PARM_DESC(idle_poll_period_usecs,
>>>>> "nvmet tcp io_work poll till idle time period in usecs:
>>>>> Default 0");
>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>> +/*
>>>>> + * TLS handshake timeout
>>>>> + */
>>>>> +static int tls_handshake_timeout = 10;
>>>>> +module_param(tls_handshake_timeout, int, 0644);
>>>>> +MODULE_PARM_DESC(tls_handshake_timeout,
>>>>> + "nvme TLS handshake timeout in seconds (default 10)");
>>>>> +#endif
>>>>> +
>>>>> #define NVMET_TCP_RECV_BUDGET 8
>>>>> #define NVMET_TCP_SEND_BUDGET 8
>>>>> #define NVMET_TCP_IO_WORK_BUDGET 64
>>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>>>> enum nvmet_tcp_queue_state {
>>>>> NVMET_TCP_Q_CONNECTING,
>>>>> + NVMET_TCP_Q_TLS_HANDSHAKE,
>>>>> NVMET_TCP_Q_LIVE,
>>>>> NVMET_TCP_Q_DISCONNECTING,
>>>>> };
>>>>> struct nvmet_tcp_queue {
>>>>> + struct kref kref;
>>>>
>>>> Why is kref the first member of the struct?
>>>>
>>> Habit.
>>> I don't mind where it'll end up.
>>
>> Move it to the back together with the tls section.
>>
>>>
>>>>> struct socket *sock;
>>>>> struct nvmet_tcp_port *port;
>>>>> struct work_struct io_work;
>>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>>>> struct ahash_request *snd_hash;
>>>>> struct ahash_request *rcv_hash;
>>>>> + /* TLS state */
>>>>> + key_serial_t tls_pskid;
>>>>> + struct delayed_work tls_handshake_work;
>>>>> +
>>>>> unsigned long poll_end;
>>>>> spinlock_t state_lock;
>>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct
>>>>> nvmet_tcp_queue *queue,
>>>>> return ret;
>>>>> }
>>>>> +static void nvmet_tcp_release_queue(struct kref *kref)
>>>>> +{
>>>>> + struct nvmet_tcp_queue *queue =
>>>>> + container_of(kref, struct nvmet_tcp_queue, kref);
>>>>> +
>>>>> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>>>>> + queue_work(nvmet_wq, &queue->release_work);
>>>>> +}
>>>>> +
>>>>> static void nvmet_tcp_schedule_release_queue(struct
>>>>> nvmet_tcp_queue *queue)
>>>>> {
>>>>> spin_lock_bh(&queue->state_lock);
>>>>> if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>>>> queue->state = NVMET_TCP_Q_DISCONNECTING;
>>>>> - queue_work(nvmet_wq, &queue->release_work);
>>>>> + kref_put(&queue->kref, nvmet_tcp_release_queue);
>>>>> }
>>>>> spin_unlock_bh(&queue->state_lock);
>>>>> }
>>>>> @@ -1485,6 +1514,8 @@ static void
>>>>> nvmet_tcp_release_queue_work(struct work_struct *w)
>>>>> mutex_unlock(&nvmet_tcp_queue_mutex);
>>>>> nvmet_tcp_restore_socket_callbacks(queue);
>>>>> + tls_handshake_cancel(queue->sock->sk);
>>>>> + cancel_delayed_work_sync(&queue->tls_handshake_work);
>>>>
>>>> We should call it tls_handshake_tmo_work or something to make it
>>>> clear it is a timeout work.
>>>>
>>> Okay.
>>>
>>>>> cancel_work_sync(&queue->io_work);
>>>>> /* stop accepting incoming data */
>>>>> queue->rcv_state = NVMET_TCP_RECV_ERR;
>>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock
>>>>> *sk)
>>>>> read_lock_bh(&sk->sk_callback_lock);
>>>>> queue = sk->sk_user_data;
>>>>> - if (likely(queue))
>>>>> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>>>> &queue->io_work);
>>>>> + if (likely(queue)) {
>>>>> + if (queue->data_ready)
>>>>> + queue->data_ready(sk);
>>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>>>>> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>>>> + &queue->io_work);
>>>>> + }
>>>>> read_unlock_bh(&sk->sk_callback_lock);
>>>>> }
>>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct
>>>>> nvmet_tcp_queue *queue)
>>>>> return ret;
>>>>> }
>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>>>>> + key_serial_t peerid)
>>>>> +{
>>>>> + struct nvmet_tcp_queue *queue = data;
>>>>> +
>>>>> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>>>>> + queue->idx, peerid, status);
>>>>> + spin_lock_bh(&queue->state_lock);
>>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>>>>
>>>> Is this even possible?
>>>>
>>> I guess it can happen when the socket closes during handshake; the
>>> daemon might still be sending a 'done' event but
>>> nvmet_tcp_schedule_release_queue() has been called.
>>
>> Umm, if the socket closes during the handshake then the state
>> is NVMET_TCP_Q_TLS_HANDSHAKE.
>>
> But there's a race window between setting it to
> NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel().
>
>> p.s. you call handshake cancel in the release flow so you should be
>> fenced properly no?
> Not really. But I'll check if I can fix it up.
The teardown handling feels complicated to me.
How are you testing it btw?
More information about the Linux-nvme
mailing list