Message ID | 160200017655.719143.17344942455389603664.stgit@firesoul |
---|---|
State | New |
Headers | show |
Series | bpf: New approach for BPF MTU handling and enforcement | expand |
On Tue, Oct 6, 2020 at 9:03 AM Jesper Dangaard Brouer <brouer@redhat.com> wrote: > > The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup) > can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED. The BPF-prog > don't know the MTU value that caused this rejection. > > If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it > need to know this MTU value for the ICMP packet. > > Patch change lookup and result struct bpf_fib_lookup, to contain this MTU > value as output via a union with 'tot_len' as this is the value used for > the MTU lookup. > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > --- > include/uapi/linux/bpf.h | 11 +++++++++-- > net/core/filter.c | 17 ++++++++++++----- > 2 files changed, 21 insertions(+), 7 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c446394135be..50ce65e37b16 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2216,6 +2216,9 @@ union bpf_attr { > * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the > * packet is not forwarded or needs assist from full stack > * > + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU > + * was exceeded and result params->mtu contains the MTU. > + * > * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) > * Description > * Add an entry to, or update a sockhash *map* referencing sockets. > @@ -4844,9 +4847,13 @@ struct bpf_fib_lookup { > __be16 sport; > __be16 dport; > > - /* total length of packet from network header - used for MTU check */ > - __u16 tot_len; > + union { /* used for MTU check */ > + /* input to lookup */ > + __u16 tot_len; /* total length of packet from network hdr */ > > + /* output: MTU value (if requested check_mtu) */ > + __u16 mtu; > + }; > /* input: L3 device index for lookup > * output: device index from FIB lookup > */ > diff --git a/net/core/filter.c b/net/core/filter.c > index fed239e77bdc..d84723f347c0 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5185,13 +5185,14 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { > #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) > static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, > const struct neighbour *neigh, > - const struct net_device *dev) > + const struct net_device *dev, u32 mtu) > { > memcpy(params->dmac, neigh->ha, ETH_ALEN); > memcpy(params->smac, dev->dev_addr, ETH_ALEN); > params->h_vlan_TCI = 0; > params->h_vlan_proto = 0; > params->ifindex = dev->ifindex; > + params->mtu = mtu; > > return 0; > } > @@ -5275,8 +5276,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > > if (check_mtu) { > mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); > - if (params->tot_len > mtu) > + if (params->tot_len > mtu) { > + params->mtu = mtu; /* union with tot_len */ > return BPF_FIB_LKUP_RET_FRAG_NEEDED; > + } > } > > nhc = res.nhc; > @@ -5309,7 +5312,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > if (!neigh) > return BPF_FIB_LKUP_RET_NO_NEIGH; > > - return bpf_fib_set_fwd_params(params, neigh, dev); > + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); > } > #endif > > @@ -5401,8 +5404,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > > if (check_mtu) { > mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); > - if (params->tot_len > mtu) > + if (params->tot_len > mtu) { > + params->mtu = mtu; /* union with tot_len */ > return BPF_FIB_LKUP_RET_FRAG_NEEDED; > + } > } > > if (res.nh->fib_nh_lws) > @@ -5421,7 +5426,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > if (!neigh) > return BPF_FIB_LKUP_RET_NO_NEIGH; > > - return bpf_fib_set_fwd_params(params, neigh, dev); > + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); > } > #endif > > @@ -5490,6 +5495,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, > dev = dev_get_by_index_rcu(net, params->ifindex); > if (!is_skb_forwardable(dev, skb)) > rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; > + > + params->mtu = dev->mtu; /* union with tot_len */ > } > > return rc; > > It would be beneficial to be able to fetch the route advmss, initcwnd, etc as well... But I take it the struct can't be extended?
Hi Jesper, url: https://github.com/0day-ci/linux/commits/Jesper-Dangaard-Brouer/bpf-New-approach-for-BPF-MTU-handling-and-enforcement/20201007-000903 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: s390-randconfig-m031-20201002 (attached as .config) compiler: s390-linux-gcc (GCC) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> smatch warnings: net/core/filter.c:5315 bpf_ipv4_fib_lookup() error: uninitialized symbol 'mtu'. vim +/mtu +5315 net/core/filter.c 87f5fc7e48dd31 David Ahern 2018-05-09 5202 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 4f74fede40df8d David Ahern 2018-05-21 5203 u32 flags, bool check_mtu) 87f5fc7e48dd31 David Ahern 2018-05-09 5204 { eba618abacade7 David Ahern 2019-04-02 5205 struct fib_nh_common *nhc; 87f5fc7e48dd31 David Ahern 2018-05-09 5206 struct in_device *in_dev; 87f5fc7e48dd31 David Ahern 2018-05-09 5207 struct neighbour *neigh; 87f5fc7e48dd31 David Ahern 2018-05-09 5208 struct net_device *dev; 87f5fc7e48dd31 David Ahern 2018-05-09 5209 struct fib_result res; 87f5fc7e48dd31 David Ahern 2018-05-09 5210 struct flowi4 fl4; 87f5fc7e48dd31 David Ahern 2018-05-09 5211 int err; 4f74fede40df8d David Ahern 2018-05-21 5212 u32 mtu; 87f5fc7e48dd31 David Ahern 2018-05-09 5213 87f5fc7e48dd31 David Ahern 2018-05-09 5214 dev = dev_get_by_index_rcu(net, params->ifindex); 87f5fc7e48dd31 David Ahern 2018-05-09 5215 if (unlikely(!dev)) 87f5fc7e48dd31 David Ahern 2018-05-09 5216 return -ENODEV; 87f5fc7e48dd31 David Ahern 2018-05-09 5217 87f5fc7e48dd31 David Ahern 2018-05-09 5218 /* verify forwarding is enabled on this interface */ 87f5fc7e48dd31 David Ahern 2018-05-09 5219 in_dev = __in_dev_get_rcu(dev); 87f5fc7e48dd31 David Ahern 2018-05-09 5220 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) 4c79579b44b187 David Ahern 2018-06-26 5221 return BPF_FIB_LKUP_RET_FWD_DISABLED; 87f5fc7e48dd31 David Ahern 2018-05-09 5222 87f5fc7e48dd31 David Ahern 2018-05-09 5223 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 87f5fc7e48dd31 David Ahern 2018-05-09 5224 fl4.flowi4_iif = 1; 87f5fc7e48dd31 David Ahern 2018-05-09 5225 fl4.flowi4_oif = params->ifindex; 87f5fc7e48dd31 David Ahern 2018-05-09 5226 } else { 87f5fc7e48dd31 David Ahern 2018-05-09 5227 fl4.flowi4_iif = params->ifindex; 87f5fc7e48dd31 David Ahern 2018-05-09 5228 fl4.flowi4_oif = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5229 } 87f5fc7e48dd31 David Ahern 2018-05-09 5230 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK; 87f5fc7e48dd31 David Ahern 2018-05-09 5231 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 87f5fc7e48dd31 David Ahern 2018-05-09 5232 fl4.flowi4_flags = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5233 87f5fc7e48dd31 David Ahern 2018-05-09 5234 fl4.flowi4_proto = params->l4_protocol; 87f5fc7e48dd31 David Ahern 2018-05-09 5235 fl4.daddr = params->ipv4_dst; 87f5fc7e48dd31 David Ahern 2018-05-09 5236 fl4.saddr = params->ipv4_src; 87f5fc7e48dd31 David Ahern 2018-05-09 5237 fl4.fl4_sport = params->sport; 87f5fc7e48dd31 David Ahern 2018-05-09 5238 fl4.fl4_dport = params->dport; 1869e226a7b3ef David Ahern 2020-09-13 5239 fl4.flowi4_multipath_hash = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5240 87f5fc7e48dd31 David Ahern 2018-05-09 5241 if (flags & BPF_FIB_LOOKUP_DIRECT) { 87f5fc7e48dd31 David Ahern 2018-05-09 5242 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 87f5fc7e48dd31 David Ahern 2018-05-09 5243 struct fib_table *tb; 87f5fc7e48dd31 David Ahern 2018-05-09 5244 87f5fc7e48dd31 David Ahern 2018-05-09 5245 tb = fib_get_table(net, tbid); 87f5fc7e48dd31 David Ahern 2018-05-09 5246 if (unlikely(!tb)) 4c79579b44b187 David Ahern 2018-06-26 5247 return BPF_FIB_LKUP_RET_NOT_FWDED; 87f5fc7e48dd31 David Ahern 2018-05-09 5248 87f5fc7e48dd31 David Ahern 2018-05-09 5249 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); 87f5fc7e48dd31 David Ahern 2018-05-09 5250 } else { 87f5fc7e48dd31 David Ahern 2018-05-09 5251 fl4.flowi4_mark = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5252 fl4.flowi4_secid = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5253 fl4.flowi4_tun_key.tun_id = 0; 87f5fc7e48dd31 David Ahern 2018-05-09 5254 fl4.flowi4_uid = sock_net_uid(net, NULL); 87f5fc7e48dd31 David Ahern 2018-05-09 5255 87f5fc7e48dd31 David Ahern 2018-05-09 5256 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); 87f5fc7e48dd31 David Ahern 2018-05-09 5257 } 87f5fc7e48dd31 David Ahern 2018-05-09 5258 4c79579b44b187 David Ahern 2018-06-26 5259 if (err) { 4c79579b44b187 David Ahern 2018-06-26 5260 /* map fib lookup errors to RTN_ type */ 4c79579b44b187 David Ahern 2018-06-26 5261 if (err == -EINVAL) 4c79579b44b187 David Ahern 2018-06-26 5262 return BPF_FIB_LKUP_RET_BLACKHOLE; 4c79579b44b187 David Ahern 2018-06-26 5263 if (err == -EHOSTUNREACH) 4c79579b44b187 David Ahern 2018-06-26 5264 return BPF_FIB_LKUP_RET_UNREACHABLE; 4c79579b44b187 David Ahern 2018-06-26 5265 if (err == -EACCES) 4c79579b44b187 David Ahern 2018-06-26 5266 return BPF_FIB_LKUP_RET_PROHIBIT; 4c79579b44b187 David Ahern 2018-06-26 5267 4c79579b44b187 David Ahern 2018-06-26 5268 return BPF_FIB_LKUP_RET_NOT_FWDED; 4c79579b44b187 David Ahern 2018-06-26 5269 } 4c79579b44b187 David Ahern 2018-06-26 5270 4c79579b44b187 David Ahern 2018-06-26 5271 if (res.type != RTN_UNICAST) 4c79579b44b187 David Ahern 2018-06-26 5272 return BPF_FIB_LKUP_RET_NOT_FWDED; 87f5fc7e48dd31 David Ahern 2018-05-09 5273 5481d73f81549e David Ahern 2019-06-03 5274 if (fib_info_num_path(res.fi) > 1) 87f5fc7e48dd31 David Ahern 2018-05-09 5275 fib_select_path(net, &res, &fl4, NULL); 87f5fc7e48dd31 David Ahern 2018-05-09 5276 4f74fede40df8d David Ahern 2018-05-21 5277 if (check_mtu) { 4f74fede40df8d David Ahern 2018-05-21 5278 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); ab61fc7ee5c482 Jesper Dangaard Brouer 2020-10-06 5279 if (params->tot_len > mtu) { ab61fc7ee5c482 Jesper Dangaard Brouer 2020-10-06 5280 params->mtu = mtu; /* union with tot_len */ 4c79579b44b187 David Ahern 2018-06-26 5281 return BPF_FIB_LKUP_RET_FRAG_NEEDED; 4f74fede40df8d David Ahern 2018-05-21 5282 } ab61fc7ee5c482 Jesper Dangaard Brouer 2020-10-06 5283 } "mtu" is not initialized on else path. 4f74fede40df8d David Ahern 2018-05-21 5284 eba618abacade7 David Ahern 2019-04-02 5285 nhc = res.nhc; 87f5fc7e48dd31 David Ahern 2018-05-09 5286 87f5fc7e48dd31 David Ahern 2018-05-09 5287 /* do not handle lwt encaps right now */ eba618abacade7 David Ahern 2019-04-02 5288 if (nhc->nhc_lwtstate) 4c79579b44b187 David Ahern 2018-06-26 5289 return BPF_FIB_LKUP_RET_UNSUPP_LWT; 87f5fc7e48dd31 David Ahern 2018-05-09 5290 eba618abacade7 David Ahern 2019-04-02 5291 dev = nhc->nhc_dev; 87f5fc7e48dd31 David Ahern 2018-05-09 5292 87f5fc7e48dd31 David Ahern 2018-05-09 5293 params->rt_metric = res.fi->fib_priority; 87f5fc7e48dd31 David Ahern 2018-05-09 5294 87f5fc7e48dd31 David Ahern 2018-05-09 5295 /* xdp and cls_bpf programs are run in RCU-bh so 87f5fc7e48dd31 David Ahern 2018-05-09 5296 * rcu_read_lock_bh is not needed here 87f5fc7e48dd31 David Ahern 2018-05-09 5297 */ 6f5f68d05ec0f6 David Ahern 2019-04-05 5298 if (likely(nhc->nhc_gw_family != AF_INET6)) { 6f5f68d05ec0f6 David Ahern 2019-04-05 5299 if (nhc->nhc_gw_family) 6f5f68d05ec0f6 David Ahern 2019-04-05 5300 params->ipv4_dst = nhc->nhc_gw.ipv4; 6f5f68d05ec0f6 David Ahern 2019-04-05 5301 6f5f68d05ec0f6 David Ahern 2019-04-05 5302 neigh = __ipv4_neigh_lookup_noref(dev, 6f5f68d05ec0f6 David Ahern 2019-04-05 5303 (__force u32)params->ipv4_dst); 6f5f68d05ec0f6 David Ahern 2019-04-05 5304 } else { 6f5f68d05ec0f6 David Ahern 2019-04-05 5305 struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; 6f5f68d05ec0f6 David Ahern 2019-04-05 5306 6f5f68d05ec0f6 David Ahern 2019-04-05 5307 params->family = AF_INET6; 6f5f68d05ec0f6 David Ahern 2019-04-05 5308 *dst = nhc->nhc_gw.ipv6; 6f5f68d05ec0f6 David Ahern 2019-04-05 5309 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); 6f5f68d05ec0f6 David Ahern 2019-04-05 5310 } 6f5f68d05ec0f6 David Ahern 2019-04-05 5311 4c79579b44b187 David Ahern 2018-06-26 5312 if (!neigh) 4c79579b44b187 David Ahern 2018-06-26 5313 return BPF_FIB_LKUP_RET_NO_NEIGH; 87f5fc7e48dd31 David Ahern 2018-05-09 5314 ab61fc7ee5c482 Jesper Dangaard Brouer 2020-10-06 @5315 return bpf_fib_set_fwd_params(params, neigh, dev, mtu); ^^^ Uninitialized. 87f5fc7e48dd31 David Ahern 2018-05-09 5316 } --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Tue, 6 Oct 2020 18:34:50 -0700 Maciej Żenczykowski <maze@google.com> wrote: > On Tue, Oct 6, 2020 at 9:03 AM Jesper Dangaard Brouer <brouer@redhat.com> wrote: > > > > The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup) > > can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED. The BPF-prog > > don't know the MTU value that caused this rejection. > > > > If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it > > need to know this MTU value for the ICMP packet. > > > > Patch change lookup and result struct bpf_fib_lookup, to contain this MTU > > value as output via a union with 'tot_len' as this is the value used for > > the MTU lookup. > > > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > > --- > > include/uapi/linux/bpf.h | 11 +++++++++-- > > net/core/filter.c | 17 ++++++++++++----- > > 2 files changed, 21 insertions(+), 7 deletions(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c446394135be..50ce65e37b16 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h [...] > > @@ -4844,9 +4847,13 @@ struct bpf_fib_lookup { > > __be16 sport; > > __be16 dport; > > > > - /* total length of packet from network header - used for MTU check */ > > - __u16 tot_len; > > + union { /* used for MTU check */ > > + /* input to lookup */ > > + __u16 tot_len; /* total length of packet from network hdr */ > > > > + /* output: MTU value (if requested check_mtu) */ > > + __u16 mtu; > > + }; > > /* input: L3 device index for lookup > > * output: device index from FIB lookup > > */ [...] > > It would be beneficial to be able to fetch the route advmss, initcwnd, > etc as well... > But I take it the struct can't be extended? The struct bpf_fib_lookup is exactly 1 cache-line (64 bytes) for performance reasons. I do believe that it can be extended, as Ahern designed the BPF-helper API cleverly via a plen (detail below signature). For accessing other route metric information like advmss and initcwnd, I would expect Daniel to suggest to use BTF to access info from dst_entry, or actually dst->_metrics. But looking at the details for accessing dst->_metrics is complicated by macros, thus I expect BTF would have a hard time.
On 10/7/20 12:42 AM, Jesper Dangaard Brouer wrote: > > The struct bpf_fib_lookup is exactly 1 cache-line (64 bytes) for > performance reasons. I do believe that it can be extended, as Ahern > designed the BPF-helper API cleverly via a plen (detail below signature). Yes, I kept it to 64B for performance reasons which is why most fields have 1 value on input and another on output. Technically it can be extended, but any cost in doing so should be abosrbed by the new feature(s). Meaning, users just doing a fib lookup based on current API should not take a hit with the extra size.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c446394135be..50ce65e37b16 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2216,6 +2216,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and result params->mtu contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -4844,9 +4847,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* total length of packet from network hdr */ + /* output: MTU value (if requested check_mtu) */ + __u16 mtu; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index fed239e77bdc..d84723f347c0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5185,13 +5185,14 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, const struct neighbour *neigh, - const struct net_device *dev) + const struct net_device *dev, u32 mtu) { memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; params->ifindex = dev->ifindex; + params->mtu = mtu; return 0; } @@ -5275,8 +5276,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); - if (params->tot_len > mtu) + if (params->tot_len > mtu) { + params->mtu = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; + } } nhc = res.nhc; @@ -5309,7 +5312,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; - return bpf_fib_set_fwd_params(params, neigh, dev); + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); } #endif @@ -5401,8 +5404,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); - if (params->tot_len > mtu) + if (params->tot_len > mtu) { + params->mtu = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; + } } if (res.nh->fib_nh_lws) @@ -5421,7 +5426,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; - return bpf_fib_set_fwd_params(params, neigh, dev); + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); } #endif @@ -5490,6 +5495,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, dev = dev_get_by_index_rcu(net, params->ifindex); if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; + + params->mtu = dev->mtu; /* union with tot_len */ } return rc;
The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup) can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED. The BPF-prog don't know the MTU value that caused this rejection. If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it need to know this MTU value for the ICMP packet. Patch change lookup and result struct bpf_fib_lookup, to contain this MTU value as output via a union with 'tot_len' as this is the value used for the MTU lookup. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> --- include/uapi/linux/bpf.h | 11 +++++++++-- net/core/filter.c | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-)