diff mbox series

[bpf-next,v2,1/3] bpf, sockmap: avoid using sk_socket after free

Message ID 20250228055106.58071-2-jiayuan.chen@linux.dev
State New
Headers show
Series [bpf-next,v2,1/3] bpf, sockmap: avoid using sk_socket after free | expand

Commit Message

Jiayuan Chen Feb. 28, 2025, 5:51 a.m. UTC
Use RCU lock to protect sk_socket, preventing concurrent close and release
by another thread.

Because TCP/UDP are already within a relatively large critical section:
'''
ip_local_deliver_finish
  rcu_read_lock
  ip_protocol_deliver_rcu
      tcp_rcv/udp_rcv
  rcu_read_unlock
'''

Adding rcu_read_{un}lock() at the entrance and exit of sk_data_ready
will not increase performance overhead.

Fixes: c63829182c37 ("af_unix: Implement ->psock_update_sk_prot()")
Reported-by: syzbot+dd90a702f518e0eac072@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/bpf/6734c033.050a0220.2a2fcc.0015.GAE@google.com/
Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
 net/core/skmsg.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Comments

Michal Luczaj March 7, 2025, 9:45 a.m. UTC | #1
On 2/28/25 06:51, Jiayuan Chen wrote:
> ...
>  static void sk_psock_verdict_data_ready(struct sock *sk)
>  {
> -	struct socket *sock = sk->sk_socket;
> +	struct socket *sock;
>  	const struct proto_ops *ops;
>  	int copied;
>  
>  	trace_sk_data_ready(sk);
>  
> +	/* We need RCU to prevent the sk_socket from being released.
> +	 * Especially for Unix sockets, we are currently in the process
> +	 * context and do not have RCU protection.
> +	 */
> +	rcu_read_lock();
> +	sock = sk->sk_socket;
>  	if (unlikely(!sock))
> -		return;
> +		goto unlock;
> +
>  	ops = READ_ONCE(sock->ops);
>  	if (!ops || !ops->read_skb)
> -		return;
> +		goto unlock;
> +
>  	copied = ops->read_skb(sk, sk_psock_verdict_recv);
>  	if (copied >= 0) {
>  		struct sk_psock *psock;
>  
> -		rcu_read_lock();
>  		psock = sk_psock(sk);
>  		if (psock)
>  			sk_psock_data_ready(sk, psock);
> -		rcu_read_unlock();
>  	}
> +unlock:
> +	rcu_read_unlock();
>  }

Hi,

Doesn't sk_psock_handle_skb() (!ingress path) have the same `struct socket`
release race issue? Any plans on fixing that one, too?

BTW, lockdep (CONFIG_LOCKDEP=y) complains about calling AF_UNIX's
read_skb() under RCU read lock.

Thanks,
Michal
diff mbox series

Patch

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 0ddc4c718833..1b71ae1d1bf5 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1222,27 +1222,35 @@  static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
 
 static void sk_psock_verdict_data_ready(struct sock *sk)
 {
-	struct socket *sock = sk->sk_socket;
+	struct socket *sock;
 	const struct proto_ops *ops;
 	int copied;
 
 	trace_sk_data_ready(sk);
 
+	/* We need RCU to prevent the sk_socket from being released.
+	 * Especially for Unix sockets, we are currently in the process
+	 * context and do not have RCU protection.
+	 */
+	rcu_read_lock();
+	sock = sk->sk_socket;
 	if (unlikely(!sock))
-		return;
+		goto unlock;
+
 	ops = READ_ONCE(sock->ops);
 	if (!ops || !ops->read_skb)
-		return;
+		goto unlock;
+
 	copied = ops->read_skb(sk, sk_psock_verdict_recv);
 	if (copied >= 0) {
 		struct sk_psock *psock;
 
-		rcu_read_lock();
 		psock = sk_psock(sk);
 		if (psock)
 			sk_psock_data_ready(sk, psock);
-		rcu_read_unlock();
 	}
+unlock:
+	rcu_read_unlock();
 }
 
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)