[PATCH 13/14] nvmet-tcp: enable TLS handshake upcall

Mon Aug 7 01:51:38 PDT 2023

On 8/3/23 13:51, Hannes Reinecke wrote:
> Add functions to start the TLS handshake upcall when
> the TCP TSAS sectype is set to 'tls1.3' and add a config
> option NVME_TARGET_TCP_TLS.
> 
> Signed-off-by: Hannes Reincke <hare at suse.de>
> ---
>   drivers/nvme/target/Kconfig    |  14 ++++
>   drivers/nvme/target/configfs.c |  63 +++++++++++++++-
>   drivers/nvme/target/nvmet.h    |   1 +
>   drivers/nvme/target/tcp.c      | 133 +++++++++++++++++++++++++++++++--
>   4 files changed, 203 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
> index 79fc64035ee3..15af2b4341f6 100644
> --- a/drivers/nvme/target/Kconfig
> +++ b/drivers/nvme/target/Kconfig
> @@ -84,6 +84,20 @@ config NVME_TARGET_TCP
>   
>   	  If unsure, say N.
>   
> +config NVME_TARGET_TCP_TLS
> +	bool "NVMe over Fabrics TCP target TLS encryption support"
> +	depends on NVME_TARGET_TCP
> +	select NVME_COMMON
> +	select NVME_KEYRING
> +	select NET_HANDSHAKE
> +	help
> +	  Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
> +
> +	  The TLS handshake daemon is availble at
> +	  https://github.com/oracle/ktls-utils.
> +
> +	  If unsure, say N.
> +
>   config NVME_TARGET_AUTH
>   	bool "NVMe over Fabrics In-band Authentication support"
>   	depends on NVME_TARGET
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index 7f826ac8b75c..49b407702ad5 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -15,6 +15,7 @@
>   #ifdef CONFIG_NVME_TARGET_AUTH
>   #include <linux/nvme-auth.h>
>   #endif
> +#include <linux/nvme-keyring.h>
>   #include <crypto/hash.h>
>   #include <crypto/kpp.h>
>   
> @@ -159,10 +160,15 @@ static const struct nvmet_type_name_map nvmet_addr_treq[] = {
>   	{ NVMF_TREQ_NOT_REQUIRED,	"not required" },
>   };
>   
> +static inline u8 nvmet_port_treq(struct nvmet_port *port)
> +{
> +	return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
> +}
> +
>   static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
>   {
> -	u8 treq = to_nvmet_port(item)->disc_addr.treq &
> -		NVME_TREQ_SECURE_CHANNEL_MASK;
> +	struct nvmet_port *port = to_nvmet_port(item);
> +	u8 treq = nvmet_port_treq(port);
>   	int i;
>   
>   	for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
> @@ -174,11 +180,16 @@ static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
>   	return snprintf(page, PAGE_SIZE, "\n");
>   }
>   
> +static inline u8 nvmet_port_treq_mask(struct nvmet_port *port)
> +{
> +	return (port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK);
> +}
> +
>   static ssize_t nvmet_addr_treq_store(struct config_item *item,
>   		const char *page, size_t count)
>   {
>   	struct nvmet_port *port = to_nvmet_port(item);
> -	u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
> +	u8 treq = nvmet_port_treq_mask(port);
>   	int i;
>   
>   	if (nvmet_is_port_enabled(port, __func__))
> @@ -193,6 +204,23 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
>   	return -EINVAL;
>   
>   found:
> +	if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) {
> +		if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
> +			pr_err("TLS is not supported\n");
> +			return -EINVAL;
> +		}
> +		if (!port->keyring) {
> +			pr_err("TLS keyring not configured\n");
> +			return -EINVAL;
> +		}
> +		if (port->disc_addr.tsas.tcp.sectype != NVMF_TCP_SECTYPE_TLS13) {
> +			pr_warn("cannot change TREQ when TLS is not enabled\n");
> +			return -EINVAL;
> +		} else if (nvmet_addr_treq[i].type == NVMF_TREQ_NOT_SPECIFIED) {
> +			pr_warn("cannot set TREQ to 'not specified' when TLS is enabled\n");
> +			return -EINVAL;
> +		}
> +	}
>   	treq |= nvmet_addr_treq[i].type;
>   	port->disc_addr.treq = treq;
>   	return count;
> @@ -371,6 +399,7 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
>   		const char *page, size_t count)
>   {
>   	struct nvmet_port *port = to_nvmet_port(item);
> +	u8 treq = nvmet_port_treq_mask(port);
>   	int i;
>   
>   	if (nvmet_is_port_enabled(port, __func__))
> @@ -379,6 +408,15 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
>   	if (port->disc_addr.trtype != NVMF_TRTYPE_TCP)
>   		return -EINVAL;
>   
> +	if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
> +		pr_err("TLS is not supported\n");
> +		return -EINVAL;
> +	}
> +	if (!port->keyring) {
> +		pr_err("TLS keyring not configured\n");
> +		return -EINVAL;
> +	}
> +
>   	for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
>   		if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name))
>   			goto found;
> @@ -389,6 +427,16 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
>   
>   found:
>   	nvmet_port_init_tsas_tcp(port, nvmet_addr_tsas_tcp[i].type);
> +	if (nvmet_addr_tsas_tcp[i].type == NVMF_TCP_SECTYPE_TLS13) {
> +		if (nvmet_port_treq(port) == NVMF_TREQ_NOT_SPECIFIED)
> +			treq |= NVMF_TREQ_REQUIRED;
> +		else
> +			treq |= nvmet_port_treq(port);
> +	} else {
> +		/* Set to 'not specified' if TLS is not enabled */
> +		treq |= NVMF_TREQ_NOT_SPECIFIED;
> +	}
> +	port->disc_addr.treq = treq;
>   	return count;
>   }
>   

Can the treq/tsas be split from the actual nvmet-tcp upcall addition?

> @@ -1795,6 +1843,7 @@ static void nvmet_port_release(struct config_item *item)
>   	flush_workqueue(nvmet_wq);
>   	list_del(&port->global_entry);
>   
> +	key_put(port->keyring);
>   	kfree(port->ana_state);
>   	kfree(port);
>   }
> @@ -1844,6 +1893,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
>   		return ERR_PTR(-ENOMEM);
>   	}
>   
> +	if (nvme_keyring_id()) {
> +		port->keyring = key_lookup(nvme_keyring_id());
> +		if (IS_ERR(port->keyring)) {
> +			pr_warn("NVMe keyring not available, disabling TLS\n");
> +			port->keyring = NULL;
> +		}
> +	}
> +

Nice.

>   	for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>   		if (i == NVMET_DEFAULT_ANA_GRPID)
>   			port->ana_state[1] = NVME_ANA_OPTIMIZED;
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index 8cfd60f3b564..7f9ae53c1df5 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -158,6 +158,7 @@ struct nvmet_port {
>   	struct config_group		ana_groups_group;
>   	struct nvmet_ana_group		ana_default_group;
>   	enum nvme_ana_state		*ana_state;
> +	struct key			*keyring;
>   	void				*priv;
>   	bool				enabled;
>   	int				inline_data_size;
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index fdc351f591a4..7279c994abd6 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -8,9 +8,13 @@
>   #include <linux/init.h>
>   #include <linux/slab.h>
>   #include <linux/err.h>
> +#include <linux/key.h>
>   #include <linux/nvme-tcp.h>
> +#include <linux/nvme-keyring.h>
>   #include <net/sock.h>
>   #include <net/tcp.h>
> +#include <net/tls.h>
> +#include <net/handshake.h>
>   #include <linux/inet.h>
>   #include <linux/llist.h>
>   #include <crypto/hash.h>
> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops,
>   MODULE_PARM_DESC(idle_poll_period_usecs,
>   		"nvmet tcp io_work poll till idle time period in usecs: Default 0");
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
> +/*
> + * TLS handshake timeout
> + */
> +static int tls_handshake_timeout = 30;
> +module_param(tls_handshake_timeout, int, 0644);
> +MODULE_PARM_DESC(tls_handshake_timeout,
> +		 "nvme TLS handshake timeout in seconds (default 30)");
> +#endif

On the host it is 10 and here 30? what is the source of the assymmetry?

> +
>   #define NVMET_TCP_RECV_BUDGET		8
>   #define NVMET_TCP_SEND_BUDGET		8
>   #define NVMET_TCP_IO_WORK_BUDGET	64
> @@ -122,6 +136,7 @@ struct nvmet_tcp_cmd {
>   
>   enum nvmet_tcp_queue_state {
>   	NVMET_TCP_Q_CONNECTING,
> +	NVMET_TCP_Q_TLS_HANDSHAKE,
>   	NVMET_TCP_Q_LIVE,
>   	NVMET_TCP_Q_DISCONNECTING,
>   };
> @@ -154,6 +169,8 @@ struct nvmet_tcp_queue {
>   	bool			data_digest;
>   	struct ahash_request	*snd_hash;
>   	struct ahash_request	*rcv_hash;
> +	key_serial_t		tls_pskid;
> +	struct delayed_work	tls_handshake_work;
>   
>   	unsigned long           poll_end;
>   
> @@ -1285,12 +1302,12 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
>   
>   static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
>   {
> -	spin_lock(&queue->state_lock);
> +	spin_lock_irq(&queue->state_lock);

Where is this lock taken in irq context that needs disabling irq?

>   	if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>   		queue->state = NVMET_TCP_Q_DISCONNECTING;
>   		queue_work(nvmet_wq, &queue->release_work);
>   	}
> -	spin_unlock(&queue->state_lock);
> +	spin_unlock_irq(&queue->state_lock);
>   }
>   
>   static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue)
> @@ -1512,8 +1529,12 @@ static void nvmet_tcp_data_ready(struct sock *sk)
>   
>   	read_lock_bh(&sk->sk_callback_lock);
>   	queue = sk->sk_user_data;
> -	if (likely(queue))
> -		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
> +	if (queue->data_ready)
> +		queue->data_ready(sk);
> +	if (likely(queue) &&
> +	    queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
> +		queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
> +			      &queue->io_work);
>   	read_unlock_bh(&sk->sk_callback_lock);
>   }
>   
> @@ -1621,6 +1642,85 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
>   	return ret;
>   }
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
> +static void nvmet_tcp_tls_queue_reset(struct nvmet_tcp_queue *queue)
> +{
> +	spin_lock_irq(&queue->state_lock);
> +	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
> +		pr_warn("queue %d: TLS handshake already completed\n",
> +			queue->idx);
> +		spin_unlock_irq(&queue->state_lock);
> +		return;

trigger fatal error here?

> +	}
> +	queue->state = NVMET_TCP_Q_CONNECTING;
> +	spin_unlock_irq(&queue->state_lock);
> +
> +	pr_debug("queue %d: resetting queue callbacks after TLS handshake\n",
> +		 queue->idx);
> +	/*
> +	 * Set callbacks after handshake; TLS implementation
> +	 * might have changed the socket callbacks.
> +	 */
> +	nvmet_tcp_set_queue_sock(queue);
> +}
> +
> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
> +					 key_serial_t peerid)

lets call peerid psk_id throughout.

> +{
> +	struct nvmet_tcp_queue *queue = data;
> +
> +	pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
> +		 queue->idx, peerid, status);
> +	if (!status) {
> +		spin_lock_irq(&queue->state_lock);
> +		queue->tls_pskid = peerid;
> +		spin_unlock_irq(&queue->state_lock);
> +	}
> +	cancel_delayed_work_sync(&queue->tls_handshake_work);

Hmm, the cancel_delayed_work_sync is scary.

What happens if it ran and already scheduled a release (which
already ran and completed)?

> +	if (status)
> +		nvmet_tcp_schedule_release_queue(queue);
> +	else
> +		nvmet_tcp_tls_queue_reset(queue);

What I think you need is an atomic state that one or the
other access, and then you are fine with a normal async
cancel of the delayed_work.

> +}
> +
> +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w)
> +{
> +	struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w),
> +			struct nvmet_tcp_queue, tls_handshake_work);
> +
> +	pr_debug("queue %d: TLS handshake timeout\n", queue->idx);
> +	nvmet_tcp_schedule_release_queue(queue);
> +}
> +
> +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
> +{
> +	int ret = -EOPNOTSUPP;
> +	struct tls_handshake_args args;
> +
> +	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
> +		pr_warn("cannot start TLS in state %d\n", queue->state);
> +		return -EINVAL;
> +	}
> +
> +	pr_debug("queue %d: TLS ServerHello\n", queue->idx);
> +	memset(&args, 0, sizeof(args));
> +	args.ta_sock = queue->sock;
> +	args.ta_done = nvmet_tcp_tls_handshake_done;
> +	args.ta_data = queue;
> +	args.ta_keyring = key_serial(queue->port->nport->keyring);
> +	args.ta_timeout_ms = tls_handshake_timeout * 2 * 1024;

* 2 * 1024 ? I didn't know we have 2048 ms in a second...

> +
> +	ret = tls_server_hello_psk(&args, GFP_KERNEL);
> +	if (ret) {
> +		pr_err("failed to start TLS, err=%d\n", ret);
> +	} else {
> +		queue_delayed_work(nvmet_wq, &queue->tls_handshake_work,
> +				   tls_handshake_timeout * HZ);
> +	}
> +	return ret;
> +}
> +#endif
> +
>   static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   		struct socket *newsock)
>   {
> @@ -1638,7 +1738,11 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   	queue->port = port;
>   	queue->nr_cmds = 0;
>   	spin_lock_init(&queue->state_lock);
> -	queue->state = NVMET_TCP_Q_CONNECTING;
> +	if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
> +	    NVMF_TCP_SECTYPE_TLS13)
> +		queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
> +	else
> +		queue->state = NVMET_TCP_Q_CONNECTING;
>   	INIT_LIST_HEAD(&queue->free_list);
>   	init_llist_head(&queue->resp_list);
>   	INIT_LIST_HEAD(&queue->resp_send_list);
> @@ -1669,6 +1773,25 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   	list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
>   	mutex_unlock(&nvmet_tcp_queue_mutex);
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS

maybe the ifdef can be avoided with stubs?

> +	INIT_DELAYED_WORK(&queue->tls_handshake_work,
> +			  nvmet_tcp_tls_handshake_timeout_work);
> +	if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
> +		struct sock *sk = queue->sock->sk;
> +
> +		/* Restore the default callbacks before starting upcall */
> +		read_lock_bh(&sk->sk_callback_lock);
> +		sk->sk_user_data = NULL;
> +		sk->sk_data_ready = port->data_ready;
> +		read_unlock_bh(&sk->sk_callback_lock);
> +		if (!nvmet_tcp_tls_handshake(queue))
> +			return;
> +
> +		/* TLS handshake failed, terminate the connection */
> +		goto out_destroy_sq;
> +	}
> +#endif
> +
>   	ret = nvmet_tcp_set_queue_sock(queue);
>   	if (ret)
>   		goto out_destroy_sq;