[PATCH 6/6] net/tls: implement ->read_sock()

Jakub Kicinski kuba at kernel.org
Thu Jul 20 20:02:16 PDT 2023


On Wed, 19 Jul 2023 13:38:36 +0200 Hannes Reinecke wrote:
> Implement ->read_sock() function for use with nvme-tcp.

> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
> +		     sk_read_actor_t read_actor)
> +{
> +	struct tls_context *tls_ctx = tls_get_ctx(sk);
> +	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
> +	struct strp_msg *rxm = NULL;
> +	struct tls_msg *tlm;
> +	struct sk_buff *skb;
> +	struct sk_psock *psock;
> +	ssize_t copied = 0;
> +	bool bpf_strp_enabled;

bubble up the longer lines, like this:

+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct strp_msg *rxm = NULL;
+	struct sk_psock *psock;
+	bool bpf_strp_enabled;
+	struct tls_msg *tlm;
+	struct sk_buff *skb;
+	ssize_t copied = 0;
+	int err, used;

> +	int err, used;
> +
> +	psock = sk_psock_get(sk);
> +	err = tls_rx_reader_acquire(sk, ctx, true);
> +	if (err < 0)
> +		goto psock_put;
> +	bpf_strp_enabled = sk_psock_strp_enabled(psock);

You're not servicing the BPF out of band queue, just error out if
the BPF psock is enabled. It's barely used and endlessly buggy anyway.

> +	/* If crypto failed the connection is broken */
> +	err = ctx->async_wait.err;
> +	if (err)
> +		goto read_sock_end;
> +
> +	do {
> +		if (!skb_queue_empty(&ctx->rx_list)) {
> +			skb = __skb_dequeue(&ctx->rx_list);
> +			rxm = strp_msg(skb);
> +		} else {
> +			struct tls_decrypt_arg darg;
> +
> +			err = tls_rx_rec_wait(sk, psock, true, true);
> +			if (err <= 0)
> +				goto read_sock_end;
> +
> +			memset(&darg.inargs, 0, sizeof(darg.inargs));
> +			darg.zc = !bpf_strp_enabled && ctx->zc_capable;

And what are you zero-copying into my friend? zc == zero copy.
Leave the zc be 0, like splice does, otherwise passing msg=NULL
to tls_rx_one_record() may explode. Testing with TLS 1.2 should
show that.

> +			rxm = strp_msg(tls_strp_msg(ctx));
> +			tlm = tls_msg(tls_strp_msg(ctx));
> +
> +			/* read_sock does not support reading control messages */
> +			if (tlm->control != TLS_RECORD_TYPE_DATA) {
> +				err = -EINVAL;
> +				goto read_sock_requeue;
> +			}
> +
> +			if (!bpf_strp_enabled)
> +				darg.async = ctx->async_capable;
> +			else
> +				darg.async = false;

Also don't bother with async. TLS 1.3 can't do async, anyway,
and I don't think you wait for the completion :S

> +			err = tls_rx_one_record(sk, NULL, &darg);
> +			if (err < 0) {
> +				tls_err_abort(sk, -EBADMSG);
> +				goto read_sock_end;
> +			}
> +
> +			sk_flush_backlog(sk);

Hm, could be a bit often but okay.

> +			skb = darg.skb;
> +			rxm = strp_msg(skb);
> +
> +			tls_rx_rec_done(ctx);
> +		}
> +
> +		used = read_actor(desc, skb, rxm->offset, rxm->full_len);
> +		if (used <= 0) {
> +			if (!copied)
> +				err = used;
> +			goto read_sock_end;

You have to requeue on error.

> +		}
> +		copied += used;
> +		if (used < rxm->full_len) {
> +			rxm->offset += used;
> +			rxm->full_len -= used;
> +			if (!desc->count)
> +				goto read_sock_requeue;

And here. Like splice_read does. Otherwise you leak the skb.

> +		} else {
> +			consume_skb(skb);
> +			if (!desc->count)
> +				skb = NULL;
> +		}
> +	} while (skb);
> +
> +read_sock_end:
> +	tls_rx_reader_release(sk, ctx);
> +psock_put:
> +	if (psock)
> +		sk_psock_put(sk, psock);
> +	return copied ? : err;
> +
> +read_sock_requeue:
> +	__skb_queue_head(&ctx->rx_list, skb);
> +	goto read_sock_end;
> +}
> +
>  bool tls_sw_sock_is_readable(struct sock *sk)
>  {
>  	struct tls_context *tls_ctx = tls_get_ctx(sk);




More information about the Linux-nvme mailing list