[PATCH v2] net/socket: Check cgroup_bpf_enabled() only once in do_sock_getsockopt

Kuniyuki Iwashima kuniyu at amazon.com
Mon Aug 19 11:18:25 PDT 2024


From: Tze-nan Wu <Tze-nan.Wu at mediatek.com>
Date: Mon, 19 Aug 2024 23:56:27 +0800
> The return value from `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` can change
> between the invocations of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` and
> `BPF_CGROUP_RUN_PROG_GETSOCKOPT`.
> 
> If `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` changes from "false" to
> "true" between the invocations of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` and
> `BPF_CGROUP_RUN_PROG_GETSOCKOPT`, `BPF_CGROUP_RUN_PROG_GETSOCKOPT` will
> receive an -EFAULT from `__cgroup_bpf_run_filter_getsockopt(max_optlen=0)`
> due to `get_user()` was not reached in `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN`.
> 
> Scenario shown as below:
> 
>            `process A`                      `process B`
>            -----------                      ------------
>   BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN
>                                             enable CGROUP_GETSOCKOPT
>   BPF_CGROUP_RUN_PROG_GETSOCKOPT (-EFAULT)
> 
> To prevent this, invoke `cgroup_bpf_enabled()` only once and cache the
> result in a newly added local variable `enabled`.
> Both `BPF_CGROUP_*` macros in `do_sock_getsockopt` will then check their
> condition using the same `enabled` variable as the condition variable,
> instead of using the return values from `cgroup_bpf_enabled` called by
> themselves as the condition variable(which could yield different results).
> This ensures that either both `BPF_CGROUP_*` macros pass the condition
> or neither does.
> 
> Co-developed-by: Yanghui Li <yanghui.li at mediatek.com>
> Signed-off-by: Yanghui Li <yanghui.li at mediatek.com>
> Co-developed-by: Cheng-Jui Wang <cheng-jui.wang at mediatek.com>
> Signed-off-by: Cheng-Jui Wang <cheng-jui.wang at mediatek.com>
> Signed-off-by: Tze-nan Wu <Tze-nan.Wu at mediatek.com>
> ---
> 
> Chagnes from v1 to v2: https://lore.kernel.org/all/20240819082513.27176-1-Tze-nan.Wu@mediatek.com/
>   Instead of using cgroup_lock in the fastpath, invoke cgroup_bpf_enabled
>   only once and cache the value in the variable `enabled`. `BPF_CGROUP_*`
>   macros in do_sock_getsockopt can then both check their condition with
>   the same variable, ensuring that either they both passing the condition
>   or both do not.
> 
> Appreciate for reviewing this!
> This patch should make cgroup_bpf_enabled() only using once,
> but not sure if "BPF_CGROUP_*" is modifiable?(not familiar with code here)
> 
> If it's not, then maybe I can come up another patch like below one:
> 	+++ b/net/socket.c
> 	  	int max_optlen __maybe_unused;
> 	 	const struct proto_ops *ops;
> 	 	int err;
> 	+	bool enabled;
> 	
> 	 	err = security_socket_getsockopt(sock, level, optname);
> 	 	if (err)
> 	 		return err;
> 	
> 	-	if (!compat)
> 	+	enabled = cgroup_bpf_enabled(CGROUP_GETSOCKOPT);
> 	+   if (!compat && enabled)
> 			max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
> 
> But this will cause do_sock_getsockopt calling cgroup_bpf_enabled up to
> three times , Wondering which approach will be more acceptable?
> 
> ---
>  include/linux/bpf-cgroup.h | 13 ++++++-------
>  net/socket.c               |  9 ++++++---
>  2 files changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index fb3c3e7181e6..251632d52fa9 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -390,20 +390,19 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
>  	__ret;								       \
>  })
>  
> -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen)			       \
> +#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen, enabled)			       \

Please keep \ aligned.  Same for other places.


>  ({									       \
>  	int __ret = 0;							       \
> -	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))			       \

Can you assign 'enabled' here to hide its usage in the macro ?


> +	if (enabled)			       \
>  		copy_from_sockptr(&__ret, optlen, sizeof(int));		       \
>  	__ret;								       \
>  })
>  
>  #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen,   \
> -				       max_optlen, retval)		       \
> +				       max_optlen, retval, enabled)		       \
>  ({									       \
>  	int __ret = retval;						       \
> -	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) &&			       \
> -	    cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT))		       \
> +	if (enabled && cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT))		    \
>  		if (!(sock)->sk_prot->bpf_bypass_getsockopt ||		       \
>  		    !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
>  					tcp_bpf_bypass_getsockopt,	       \
> @@ -518,9 +517,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>  #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
> -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
> +#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen, enabled) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
> -				       optlen, max_optlen, retval) ({ retval; })
> +				       optlen, max_optlen, retval, enabled) ({ retval; })
>  #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
>  					    optlen, retval) ({ retval; })
>  #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
> diff --git a/net/socket.c b/net/socket.c
> index fcbdd5bc47ac..5336a2755bb4 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -2365,13 +2365,16 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>  	int max_optlen __maybe_unused;
>  	const struct proto_ops *ops;
>  	int err;
> +	bool enabled;

Please keep reverse xmas tree order.
https://docs.kernel.org/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs


>  
>  	err = security_socket_getsockopt(sock, level, optname);
>  	if (err)
>  		return err;
>  
> -	if (!compat)
> -		max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
> +	if (!compat) {
> +		enabled = cgroup_bpf_enabled(CGROUP_GETSOCKOPT);
> +		max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen, enabled);
> +	}
>  
>  	ops = READ_ONCE(sock->ops);
>  	if (level == SOL_SOCKET) {
> @@ -2390,7 +2393,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>  	if (!compat)
>  		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
>  						     optval, optlen, max_optlen,
> -						     err);
> +						     err, enabled);
>  
>  	return err;
>  }
> -- 
> 2.45.2



More information about the Linux-mediatek mailing list