mbox series

[bpf,v3,0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings.

Message ID 20230426085122.376768-1-gilad9366@gmail.com
Headers show
Series Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings. | expand

Message

Gilad Sever April 26, 2023, 8:51 a.m. UTC
When calling socket lookup from L2 (tc, xdp), VRF boundaries aren't
respected. This patchset fixes this by regarding the incoming device's
VRF attachment when performing the socket lookups from tc/xdp.

The first two patches are coding changes which factor out the tc helper's
logic which was shared with cg/sk_skb (which operate correctly).

This refactoring is needed in order to avoid affecting the cgroup/sk_skb
flows as there does not seem to be a strict criteria for discerning which
flow the helper is called from based on the net device or packet
information.

The third patch contains the actual bugfix.

The fourth patch adds bpf tests for these lookup functions.
---
v3: - Rename bpf_l2_sdif() to dev_sdif() as suggested by Stanislav Fomichev
    - Added xdp tests as suggested by Daniel Borkmann
    - Use start_server() to avoid duplicate code as suggested by Stanislav Fomichev

v2: Fixed uninitialized var in test patch (4).

Gilad Sever (4):
  bpf: factor out socket lookup functions for the TC hookpoint.
  bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via TC
    hookpoint
  bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings
  selftests/bpf: Add vrf_socket_lookup tests

 net/core/filter.c                             | 132 +++++--
 .../bpf/prog_tests/vrf_socket_lookup.c        | 327 ++++++++++++++++++
 .../selftests/bpf/progs/vrf_socket_lookup.c   |  88 +++++
 3 files changed, 526 insertions(+), 21 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/vrf_socket_lookup.c

Comments

Stanislav Fomichev April 27, 2023, 6:03 p.m. UTC | #1
On 04/26, Gilad Sever wrote:
> When calling bpf_sk_lookup_tcp(), bpf_sk_lookup_udp() or
> bpf_skc_lookup_tcp() from tc/xdp ingress, VRF socket bindings aren't
> respoected, i.e. unbound sockets are returned, and bound sockets aren't
> found.
> 
> VRF binding is determined by the sdif argument to sk_lookup(), however
> when called from tc the IP SKB control block isn't initialized and thus
> inet{,6}_sdif() always returns 0.
> 
> Fix by calculating sdif for the tc/xdp flows by observing the device's
> l3 enslaved state.
> 
> The cg/sk_skb hooking points which are expected to support
> inet{,6}_sdif() pass sdif=-1 which makes __bpf_skc_lookup() use the
> existing logic.
> 
> Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
> Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
> Signed-off-by: Gilad Sever <gilad9366@gmail.com>

Acked-by: Stanislav Fomichev <sdf@google.com>

with one nit below

> ---
> v3: Rename bpf_l2_sdif() to dev_sdif() as suggested by Stanislav Fomichev
> ---
>  net/core/filter.c | 63 +++++++++++++++++++++++++++++++----------------
>  1 file changed, 42 insertions(+), 21 deletions(-)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index f43f86fc1235..894913aaa29f 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6529,12 +6529,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
>  static struct sock *
>  __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
>  		 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
> -		 u64 flags)
> +		 u64 flags, int sdif)
>  {
>  	struct sock *sk = NULL;
>  	struct net *net;
>  	u8 family;
> -	int sdif;
>  
>  	if (len == sizeof(tuple->ipv4))
>  		family = AF_INET;
> @@ -6546,10 +6545,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
>  	if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
>  		goto out;
>  
> -	if (family == AF_INET)
> -		sdif = inet_sdif(skb);
> -	else
> -		sdif = inet6_sdif(skb);
> +	if (sdif < 0) {
> +		if (family == AF_INET)
> +			sdif = inet_sdif(skb);
> +		else
> +			sdif = inet6_sdif(skb);
> +	}
>  
>  	if ((s32)netns_id < 0) {
>  		net = caller_net;
> @@ -6569,10 +6570,11 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
>  static struct sock *
>  __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
>  		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
> -		u64 flags)
> +		u64 flags, int sdif)
>  {
>  	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
> -					   ifindex, proto, netns_id, flags);
> +					   ifindex, proto, netns_id, flags,
> +					   sdif);
>  
>  	if (sk) {
>  		struct sock *sk2 = sk_to_full_sk(sk);
> @@ -6612,7 +6614,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
>  	}
>  
>  	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
> -				netns_id, flags);
> +				netns_id, flags, -1);
>  }
>  
>  static struct sock *
> @@ -6701,15 +6703,25 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
>  	.arg5_type	= ARG_ANYTHING,
>  };
 

[..]

> +static int dev_sdif(const struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
> +	if (netif_is_l3_slave(dev))
> +		return dev->ifindex;
> +#endif
> +	return 0;
> +}


nit: should this go into include/linux/netdevice.h?

> +
>  BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(skb->dev);
> +	int sdif = dev_sdif(skb->dev);
>  	int ifindex = skb->dev->ifindex;
>  
>  	return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
>  					       ifindex, IPPROTO_TCP, netns_id,
> -					       flags);
> +					       flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
> @@ -6728,11 +6740,12 @@ BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(skb->dev);
> +	int sdif = dev_sdif(skb->dev);
>  	int ifindex = skb->dev->ifindex;
>  
>  	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
>  					      ifindex, IPPROTO_TCP, netns_id,
> -					      flags);
> +					      flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
> @@ -6751,11 +6764,12 @@ BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(skb->dev);
> +	int sdif = dev_sdif(skb->dev);
>  	int ifindex = skb->dev->ifindex;
>  
>  	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
>  					      ifindex, IPPROTO_UDP, netns_id,
> -					      flags);
> +					      flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
> @@ -6788,11 +6802,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(ctx->rxq->dev);
> -	int ifindex = ctx->rxq->dev->ifindex;
> +	struct net_device *dev = ctx->rxq->dev;
> +	int sdif = dev_sdif(dev);
> +	int ifindex = dev->ifindex;
>  
>  	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
>  					      ifindex, IPPROTO_UDP, netns_id,
> -					      flags);
> +					      flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
> @@ -6811,11 +6827,13 @@ BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(ctx->rxq->dev);
> -	int ifindex = ctx->rxq->dev->ifindex;
> +	struct net_device *dev = ctx->rxq->dev;
> +	int sdif = dev_sdif(dev);
> +	int ifindex = dev->ifindex;
>  
>  	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
>  					       ifindex, IPPROTO_TCP, netns_id,
> -					       flags);
> +					       flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
> @@ -6834,11 +6852,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
>  	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
>  {
>  	struct net *caller_net = dev_net(ctx->rxq->dev);
> -	int ifindex = ctx->rxq->dev->ifindex;
> +	struct net_device *dev = ctx->rxq->dev;
> +	int sdif = dev_sdif(dev);
> +	int ifindex = dev->ifindex;
>  
>  	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
>  					      ifindex, IPPROTO_TCP, netns_id,
> -					      flags);
> +					      flags, sdif);
>  }
>  
>  static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
> @@ -6858,7 +6878,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
>  {
>  	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
>  					       sock_net(ctx->sk), 0,
> -					       IPPROTO_TCP, netns_id, flags);
> +					       IPPROTO_TCP, netns_id, flags,
> +					       -1);
>  }
>  
>  static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
> @@ -6877,7 +6898,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
>  {
>  	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
>  					      sock_net(ctx->sk), 0, IPPROTO_TCP,
> -					      netns_id, flags);
> +					      netns_id, flags, -1);
>  }
>  
>  static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
> @@ -6896,7 +6917,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
>  {
>  	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
>  					      sock_net(ctx->sk), 0, IPPROTO_UDP,
> -					      netns_id, flags);
> +					      netns_id, flags, -1);
>  }
>  
>  static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
> -- 
> 2.34.1
>
Stanislav Fomichev April 27, 2023, 6:03 p.m. UTC | #2
On 04/26, Gilad Sever wrote:
> Change BPF helper socket lookup functions to use TC specific variants:
> bpf_tc_sk_lookup_tcp() / bpf_tc_sk_lookup_udp() / bpf_tc_skc_lookup_tcp()
> instead of sharing implementation with the cg / sk_skb hooking points.
> This allows introducing a separate logic for the TC flow.
> 
> The tc functions are identical to the original code.
> 
> Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
> Signed-off-by: Gilad Sever <gilad9366@gmail.com>

Acked-by: Stanislav Fomichev <sdf@google.com>

> ---
>  net/core/filter.c | 63 ++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 60 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1d6f165923bf..5910956f4e0d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6701,6 +6701,63 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
>  	.arg5_type	= ARG_ANYTHING,
>  };
>  
> +BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
> +	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
> +{
> +	return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
> +					     netns_id, flags);
> +}
> +
> +static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
> +	.func		= bpf_tc_skc_lookup_tcp,
> +	.gpl_only	= false,
> +	.pkt_access	= true,
> +	.ret_type	= RET_PTR_TO_SOCK_COMMON_OR_NULL,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
> +	.arg3_type	= ARG_CONST_SIZE,
> +	.arg4_type	= ARG_ANYTHING,
> +	.arg5_type	= ARG_ANYTHING,
> +};
> +
> +BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
> +	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
> +{
> +	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
> +					    netns_id, flags);
> +}
> +
> +static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
> +	.func		= bpf_tc_sk_lookup_tcp,
> +	.gpl_only	= false,
> +	.pkt_access	= true,
> +	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
> +	.arg3_type	= ARG_CONST_SIZE,
> +	.arg4_type	= ARG_ANYTHING,
> +	.arg5_type	= ARG_ANYTHING,
> +};
> +
> +BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
> +	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
> +{
> +	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
> +					    netns_id, flags);
> +}
> +
> +static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
> +	.func		= bpf_tc_sk_lookup_udp,
> +	.gpl_only	= false,
> +	.pkt_access	= true,
> +	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
> +	.arg3_type	= ARG_CONST_SIZE,
> +	.arg4_type	= ARG_ANYTHING,
> +	.arg5_type	= ARG_ANYTHING,
> +};
> +
>  BPF_CALL_1(bpf_sk_release, struct sock *, sk)
>  {
>  	if (sk && sk_is_refcounted(sk))
> @@ -7954,9 +8011,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  #endif
>  #ifdef CONFIG_INET
>  	case BPF_FUNC_sk_lookup_tcp:
> -		return &bpf_sk_lookup_tcp_proto;
> +		return &bpf_tc_sk_lookup_tcp_proto;
>  	case BPF_FUNC_sk_lookup_udp:
> -		return &bpf_sk_lookup_udp_proto;
> +		return &bpf_tc_sk_lookup_udp_proto;
>  	case BPF_FUNC_sk_release:
>  		return &bpf_sk_release_proto;
>  	case BPF_FUNC_tcp_sock:
> @@ -7964,7 +8021,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  	case BPF_FUNC_get_listener_sock:
>  		return &bpf_get_listener_sock_proto;
>  	case BPF_FUNC_skc_lookup_tcp:
> -		return &bpf_skc_lookup_tcp_proto;
> +		return &bpf_tc_skc_lookup_tcp_proto;
>  	case BPF_FUNC_tcp_check_syncookie:
>  		return &bpf_tcp_check_syncookie_proto;
>  	case BPF_FUNC_skb_ecn_set_ce:
> -- 
> 2.34.1
>