Message ID | 20210509151615.200608-4-idosch@idosch.org |
---|---|
State | New |
Headers | show |
Series | Add support for custom multipath hash | expand |
On 5/9/21 9:16 AM, Ido Schimmel wrote: > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 9d61e969446e..a4c477475f4c 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -1906,6 +1906,121 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, > hash_keys->addrs.v4addrs.dst = key_iph->daddr; > } > > +static u32 fib_multipath_custom_hash_outer(const struct net *net, > + const struct sk_buff *skb, > + bool *p_has_inner) > +{ > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > + struct flow_keys keys, hash_keys; > + > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); > + > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) > + hash_keys.ports.src = keys.ports.src; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) > + hash_keys.ports.dst = keys.ports.dst; > + > + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); > + return flow_hash_from_keys(&hash_keys); > +} > + > +static u32 fib_multipath_custom_hash_inner(const struct net *net, > + const struct sk_buff *skb, > + bool has_inner) > +{ > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > + struct flow_keys keys, hash_keys; > + > + /* We assume the packet carries an encapsulation, but if none was > + * encountered during dissection of the outer flow, then there is no > + * point in calling the flow dissector again. > + */ > + if (!has_inner) > + return 0; > + > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + skb_flow_dissect_flow_keys(skb, &keys, 0); > + > + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) > + return 0; > + > + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) > + hash_keys.tags.flow_label = keys.tags.flow_label; > + } > + > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) > + hash_keys.ports.src = keys.ports.src; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) > + hash_keys.ports.dst = keys.ports.dst; > + > + return flow_hash_from_keys(&hash_keys); > +} > + > +static u32 fib_multipath_custom_hash_skb(const struct net *net, > + const struct sk_buff *skb) > +{ > + u32 mhash, mhash_inner; > + bool has_inner = true; > + Is it not possible to do the dissect once here and pass keys to outer and inner functions? memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, flag); > + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); > + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); > + > + return jhash_2words(mhash, mhash_inner, 0); > +} > + > +static u32 fib_multipath_custom_hash_fl4(const struct net *net, > + const struct flowi4 *fl4) > +{ > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > + struct flow_keys hash_keys; > + > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) > + hash_keys.addrs.v4addrs.src = fl4->saddr; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) > + hash_keys.addrs.v4addrs.dst = fl4->daddr; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) > + hash_keys.basic.ip_proto = fl4->flowi4_proto; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) > + hash_keys.ports.src = fl4->fl4_sport; > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) > + hash_keys.ports.dst = fl4->fl4_dport; > + > + return flow_hash_from_keys(&hash_keys); > +} > + > /* if skb is set it will be used and fl4 can be NULL */ > int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, > const struct sk_buff *skb, struct flow_keys *flkeys)
On Tue, May 11, 2021 at 09:46:27AM -0600, David Ahern wrote: > On 5/9/21 9:16 AM, Ido Schimmel wrote: > > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > > index 9d61e969446e..a4c477475f4c 100644 > > --- a/net/ipv4/route.c > > +++ b/net/ipv4/route.c > > @@ -1906,6 +1906,121 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, > > hash_keys->addrs.v4addrs.dst = key_iph->daddr; > > } > > > > +static u32 fib_multipath_custom_hash_outer(const struct net *net, > > + const struct sk_buff *skb, > > + bool *p_has_inner) > > +{ > > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > > + struct flow_keys keys, hash_keys; > > + > > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); > > + > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) > > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) > > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) > > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) > > + hash_keys.ports.src = keys.ports.src; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) > > + hash_keys.ports.dst = keys.ports.dst; > > + > > + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > +static u32 fib_multipath_custom_hash_inner(const struct net *net, > > + const struct sk_buff *skb, > > + bool has_inner) > > +{ > > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > > + struct flow_keys keys, hash_keys; > > + > > + /* We assume the packet carries an encapsulation, but if none was > > + * encountered during dissection of the outer flow, then there is no > > + * point in calling the flow dissector again. > > + */ > > + if (!has_inner) > > + return 0; > > + > > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + skb_flow_dissect_flow_keys(skb, &keys, 0); > > + > > + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) > > + return 0; > > + > > + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) > > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) > > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > > + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) > > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) > > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) > > + hash_keys.tags.flow_label = keys.tags.flow_label; > > + } > > + > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) > > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) > > + hash_keys.ports.src = keys.ports.src; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) > > + hash_keys.ports.dst = keys.ports.dst; > > + > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > +static u32 fib_multipath_custom_hash_skb(const struct net *net, > > + const struct sk_buff *skb) > > +{ > > + u32 mhash, mhash_inner; > > + bool has_inner = true; > > + > > Is it not possible to do the dissect once here and pass keys to outer > and inner functions? > > memset(&hash_keys, 0, sizeof(hash_keys)); > skb_flow_dissect_flow_keys(skb, &keys, flag); Not that I'm aware. For outer flow we need to pass 'FLOW_DISSECTOR_F_STOP_AT_ENCAP'. For inner flow, we shouldn't pass any flags, but make sure encapsulation was encountered by checking 'keys.control.flags & FLOW_DIS_ENCAPSULATION'. Also, 'struct flow_keys' has keys for a single flow. > > > > + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); > > + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); > > + > > + return jhash_2words(mhash, mhash_inner, 0); > > +} > > + > > +static u32 fib_multipath_custom_hash_fl4(const struct net *net, > > + const struct flowi4 *fl4) > > +{ > > + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; > > + struct flow_keys hash_keys; > > + > > + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) > > + hash_keys.addrs.v4addrs.src = fl4->saddr; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) > > + hash_keys.addrs.v4addrs.dst = fl4->daddr; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) > > + hash_keys.basic.ip_proto = fl4->flowi4_proto; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) > > + hash_keys.ports.src = fl4->fl4_sport; > > + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) > > + hash_keys.ports.dst = fl4->fl4_dport; > > + > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > /* if skb is set it will be used and fl4 can be NULL */ > > int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, > > const struct sk_buff *skb, struct flow_keys *flkeys)
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 15982f830abc..2c3b7677222e 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER - 0 - Layer 3 - 1 - Layer 4 - 2 - Layer 3 or inner Layer 3 if present + - 3 - Custom multipath hash. Fields used for multipath hash calculation + are determined by fib_multipath_hash_fields sysctl fib_multipath_hash_fields - UNSIGNED INTEGER When fib_multipath_hash_policy is set to 3 (custom multipath hash), the diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9d61e969446e..a4c477475f4c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1906,6 +1906,121 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, hash_keys->addrs.v4addrs.dst = key_iph->daddr; } +static u32 fib_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 fib_multipath_custom_hash_fl4(const struct net *net, + const struct flowi4 *fl4) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = fl4->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = fl4->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl4->flowi4_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl4->fl4_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) @@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } mhash = flow_hash_from_keys(&hash_keys); break; + case 3: + if (skb) + mhash = fib_multipath_custom_hash_skb(net, skb); + else + mhash = fib_multipath_custom_hash_fl4(net, fl4); + break; } if (multipath_hash) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index da627c4d633a..90b3b924b761 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -30,6 +30,7 @@ #include <net/netevent.h> static int two = 2; +static int three __maybe_unused = 3; static int four = 4; static int thousand = 1000; static int tcp_retr1_max = 255; @@ -1053,7 +1054,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "fib_multipath_hash_fields",