Message ID | 20210502162257.3472453-8-idosch@idosch.org |
---|---|
State | New |
Headers | show |
Series | Add support for custom multipath hash | expand |
On 5/2/21 10:22 AM, Ido Schimmel wrote: > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > index 9935e18146e5..b4c65c5baf35 100644 > --- a/net/ipv6/route.c > +++ b/net/ipv6/route.c > @@ -2326,6 +2326,125 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, > } > } > > +static u32 rt6_multipath_custom_hash_outer(const struct net *net, > + const struct sk_buff *skb, > + bool *p_has_inner) > +{ > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > + struct flow_keys keys, hash_keys; > + > + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); > + > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) > + hash_keys.tags.flow_label = keys.tags.flow_label; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) > + hash_keys.ports.src = keys.ports.src; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) > + hash_keys.ports.dst = keys.ports.dst; > + > + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); > + return flow_hash_from_keys(&hash_keys); > +} > + > +static u32 rt6_multipath_custom_hash_inner(const struct net *net, > + const struct sk_buff *skb, > + bool has_inner) > +{ > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > + struct flow_keys keys, hash_keys; > + > + /* We assume the packet carries an encapsulation, but if none was > + * encountered during dissection of the outer flow, then there is no > + * point in calling the flow dissector again. > + */ > + if (!has_inner) > + return 0; > + > + if (!net->ipv6.sysctl.multipath_hash_fields_need_inner) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + skb_flow_dissect_flow_keys(skb, &keys, 0); > + > + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) > + return 0; > + > + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_FLOWLABEL, hash_fields)) > + hash_keys.tags.flow_label = keys.tags.flow_label; > + } > + > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_IP_PROTO, hash_fields)) > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_PORT, hash_fields)) > + hash_keys.ports.src = keys.ports.src; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_PORT, hash_fields)) > + hash_keys.ports.dst = keys.ports.dst; > + > + return flow_hash_from_keys(&hash_keys); > +} > + > +static u32 rt6_multipath_custom_hash_skb(const struct net *net, > + const struct sk_buff *skb) > +{ > + u32 mhash, mhash_inner; > + bool has_inner = true; > + > + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); > + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); > + > + return jhash_2words(mhash, mhash_inner, 0); > +} > + > +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, > + const struct flowi6 *fl6) > +{ > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > + struct flow_keys hash_keys; > + > + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) > + return 0; > + > + memset(&hash_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) > + hash_keys.addrs.v6addrs.src = fl6->saddr; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) > + hash_keys.addrs.v6addrs.dst = fl6->daddr; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) > + hash_keys.basic.ip_proto = fl6->flowi6_proto; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) > + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) > + hash_keys.ports.src = fl6->fl6_sport; > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) > + hash_keys.ports.dst = fl6->fl6_dport; > + > + return flow_hash_from_keys(&hash_keys); > +} > + given the amount of duplication with IPv4, should be able to use inline macros and the flowi_uli union to make some common helpers without impacting performance.
On Mon, May 03, 2021 at 08:46:18PM -0600, David Ahern wrote: > On 5/2/21 10:22 AM, Ido Schimmel wrote: > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > > index 9935e18146e5..b4c65c5baf35 100644 > > --- a/net/ipv6/route.c > > +++ b/net/ipv6/route.c > > @@ -2326,6 +2326,125 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, > > } > > } > > > > +static u32 rt6_multipath_custom_hash_outer(const struct net *net, > > + const struct sk_buff *skb, > > + bool *p_has_inner) > > +{ > > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > > + struct flow_keys keys, hash_keys; > > + > > + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); > > + > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) > > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) > > + hash_keys.tags.flow_label = keys.tags.flow_label; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) > > + hash_keys.ports.src = keys.ports.src; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) > > + hash_keys.ports.dst = keys.ports.dst; > > + > > + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > +static u32 rt6_multipath_custom_hash_inner(const struct net *net, > > + const struct sk_buff *skb, > > + bool has_inner) > > +{ > > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > > + struct flow_keys keys, hash_keys; > > + > > + /* We assume the packet carries an encapsulation, but if none was > > + * encountered during dissection of the outer flow, then there is no > > + * point in calling the flow dissector again. > > + */ > > + if (!has_inner) > > + return 0; > > + > > + if (!net->ipv6.sysctl.multipath_hash_fields_need_inner) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + skb_flow_dissect_flow_keys(skb, &keys, 0); > > + > > + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) > > + return 0; > > + > > + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) > > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) > > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > > + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_FLOWLABEL, hash_fields)) > > + hash_keys.tags.flow_label = keys.tags.flow_label; > > + } > > + > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_IP_PROTO, hash_fields)) > > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_PORT, hash_fields)) > > + hash_keys.ports.src = keys.ports.src; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_PORT, hash_fields)) > > + hash_keys.ports.dst = keys.ports.dst; > > + > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > +static u32 rt6_multipath_custom_hash_skb(const struct net *net, > > + const struct sk_buff *skb) > > +{ > > + u32 mhash, mhash_inner; > > + bool has_inner = true; > > + > > + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); > > + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); > > + > > + return jhash_2words(mhash, mhash_inner, 0); > > +} > > + > > +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, > > + const struct flowi6 *fl6) > > +{ > > + unsigned long *hash_fields = ip6_multipath_hash_fields(net); > > + struct flow_keys hash_keys; > > + > > + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) > > + return 0; > > + > > + memset(&hash_keys, 0, sizeof(hash_keys)); > > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.src = fl6->saddr; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) > > + hash_keys.addrs.v6addrs.dst = fl6->daddr; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) > > + hash_keys.basic.ip_proto = fl6->flowi6_proto; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) > > + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) > > + hash_keys.ports.src = fl6->fl6_sport; > > + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) > > + hash_keys.ports.dst = fl6->fl6_dport; > > + > > + return flow_hash_from_keys(&hash_keys); > > +} > > + > > given the amount of duplication with IPv4, should be able to use inline > macros and the flowi_uli union to make some common helpers without > impacting performance. OK, will try to create some common helpers
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 5289336227b3..8b88499fe555 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1774,6 +1774,8 @@ fib_multipath_hash_policy - INTEGER - 0 - Layer 3 (source and destination addresses plus flow label) - 1 - Layer 4 (standard 5-tuple) - 2 - Layer 3 or inner Layer 3 if present + - 3 - Custom multipath hash. Fields used for multipath hash calculation + are determined by fib_multipath_hash_fields sysctl fib_multipath_hash_fields - list of comma separated ranges When fib_multipath_hash_policy is set to 3 (custom multipath hash), the diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9935e18146e5..b4c65c5baf35 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2326,6 +2326,125 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, } } +static u32 rt6_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + unsigned long *hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) + hash_keys.tags.flow_label = keys.tags.flow_label; + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) + hash_keys.ports.src = keys.ports.src; + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + unsigned long *hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!net->ipv6.sysctl.multipath_hash_fields_need_inner) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields)) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields)) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_FLOWLABEL, hash_fields)) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_IP_PROTO, hash_fields)) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_PORT, hash_fields)) + hash_keys.ports.src = keys.ports.src; + if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_PORT, hash_fields)) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, + const struct flowi6 *fl6) +{ + unsigned long *hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys hash_keys; + + if (!net->ipv6.sysctl.multipath_hash_fields_need_outer) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields)) + hash_keys.addrs.v6addrs.src = fl6->saddr; + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields)) + hash_keys.addrs.v6addrs.dst = fl6->daddr; + if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields)) + hash_keys.basic.ip_proto = fl6->flowi6_proto; + if (FIB_MULTIPATH_HASH_TEST_FIELD(FLOWLABEL, hash_fields)) + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); + if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields)) + hash_keys.ports.src = fl6->fl6_sport; + if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields)) + hash_keys.ports.dst = fl6->fl6_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl6 can be NULL */ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *flkeys) @@ -2416,6 +2535,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, } mhash = flow_hash_from_keys(&hash_keys); break; + case 3: + if (skb) + mhash = rt6_multipath_custom_hash_skb(net, skb); + else + mhash = rt6_multipath_custom_hash_fl6(net, fl6); + break; } return mhash >> 1; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 8d94a1d621d0..38d444b1bb60 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,6 +23,7 @@ #endif static int two = 2; +static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; @@ -174,7 +175,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "fib_multipath_hash_fields",