diff mbox series

[3/3] selftests: forwarding: Add test for load-balancing between multiple servers

Message ID 20230819114825.30867-4-sriram.yagnaraman@est.tech
State New
Headers show
Series Avoid TCP resets when using ECMP for load-balancing between multiple servers. | expand

Commit Message

Sriram Yagnaraman Aug. 19, 2023, 11:48 a.m. UTC
Create a topology with 3 hosts, a router each in it's own network
namespace. Test IPv4 and IPv6 multipath routing from h1 to h2/h3 via
router r1 where a multipath route is setup to load-balance between h2
and h3.

See diagram in the test for more information.

Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
---
 .../testing/selftests/net/forwarding/Makefile |   1 +
 .../net/forwarding/router_multipath_vip.sh    | 403 ++++++++++++++++++
 2 files changed, 404 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_multipath_vip.sh

Comments

Ido Schimmel Aug. 21, 2023, 11:34 a.m. UTC | #1
On Sat, Aug 19, 2023 at 01:48:25PM +0200, Sriram Yagnaraman wrote:
> Create a topology with 3 hosts, a router each in it's own network
> namespace. Test IPv4 and IPv6 multipath routing from h1 to h2/h3 via
> router r1 where a multipath route is setup to load-balance between h2
> and h3.
> 
> See diagram in the test for more information.

How are you running this test? At least with veth pairs it is passing
both before and after the patches. I didn't look into the veth driver,
it might not even use the listified path.

Also, I'm seeing the following errors during the test:

sysctl: setting key "net.ipv4.fib_multipath_hash_policy": Invalid argument
sysctl: setting key "net.ipv6.fib_multipath_hash_policy": Invalid argument
Sriram Yagnaraman Aug. 21, 2023, 7:36 p.m. UTC | #2
> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: Monday, 21 August 2023 13:35
> To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> Cc: netdev@vger.kernel.org; linux-kselftest@vger.kernel.org; David S . Miller
> <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub
> Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; David Ahern
> <dsahern@kernel.org>; Ido Schimmel <idosch@nvidia.com>; Shuah Khan
> <shuah@kernel.org>
> Subject: Re: [PATCH 3/3] selftests: forwarding: Add test for load-balancing
> between multiple servers
> 
> On Sat, Aug 19, 2023 at 01:48:25PM +0200, Sriram Yagnaraman wrote:
> > Create a topology with 3 hosts, a router each in it's own network
> > namespace. Test IPv4 and IPv6 multipath routing from h1 to h2/h3 via
> > router r1 where a multipath route is setup to load-balance between h2
> > and h3.
> >
> > See diagram in the test for more information.
> 
> How are you running this test? At least with veth pairs it is passing both before
> and after the patches. I didn't look into the veth driver, it might not even use
> the listified path.

I agree, the test is flaky, and it doesn't definitively fail before the patch, nor does it definitively pass after the patch. Checking the packet transmit counters is probably not the best way to test this. I will try to rewrite this selftest using ncat.
I use mconnect [0], another test utility to test that a TCP connection succeeds for my own testing, but I guess using that in selftest is not an option.

Do you think it would be OK to drop this patch from the series for now? I can come back with the selftest when I have something working correctly?

> 
> Also, I'm seeing the following errors during the test:
> 
> sysctl: setting key "net.ipv4.fib_multipath_hash_policy": Invalid argument
> sysctl: setting key "net.ipv6.fib_multipath_hash_policy": Invalid argument

[0]: https://github.com/Nordix/mconnect
Ido Schimmel Aug. 22, 2023, 6:57 p.m. UTC | #3
On Mon, Aug 21, 2023 at 07:36:47PM +0000, Sriram Yagnaraman wrote:
> Do you think it would be OK to drop this patch from the series for now? I can come back with the selftest when I have something working correctly?

There's a more direct way of testing it and that's by counting the
number of times the relevant FIB trace point was triggered. This script
[1] does it for IPv4. For IPv6 the equivalent trace point is called
fib6:fib6_table_lookup. The script can obviously be made nicer.

Before the patches:

# ./mp_repo.sh 
10020

After the patches:

# ./mp_repo.sh 
65535

You can see that after the patches the trace point is triggered for
every packet. Sometimes it's a bit less. I assume because some events
are lost.

Another approach would be to tweak the current test so that $h1 and $rp1
are configured in a similar fashion to veth0 and veth1.

[1]
#!/bin/bash

ip link del dev veth0 &> /dev/null
ip netns del ns1 &> /dev/null

ip link add name veth0 type veth peer name veth1
ethtool -K veth0 tcp-segmentation-offload off
ethtool -K veth1 generic-receive-offload on
echo 20000 > /sys/class/net/veth1/gro_flush_timeout
echo 1 > /sys/class/net/veth1/napi_defer_hard_irqs

ip netns add ns1

ip link set dev veth0 up
ip address add 192.0.2.1/28 dev veth0

ip link set dev veth1 netns ns1
ip -n ns1 link set dev veth1 up
ip -n ns1 address add 192.0.2.2/28 dev veth1

ip -n ns1 link set dev lo up
ip netns exec ns1 sysctl -w -q net.ipv4.conf.all.forwarding=1
ip netns exec ns1 sysctl -w -q net.ipv4.fib_multipath_hash_policy=1

ip -n ns1 link add name dummy1 up type dummy
ip -n ns1 address add 192.0.2.17/28 dev dummy1
ip -n ns1 neigh add 192.0.2.18 lladdr 00:11:22:33:44:55 nud perm dev dummy1
ip -n ns1 neigh add 192.0.2.19 lladdr 00:aa:bb:cc:dd:ee nud perm dev dummy1
ip -n ns1 route add 198.51.100.0/24 nexthop via 192.0.2.18 nexthop via 192.0.2.19

dmac=$(ip -n ns1 -j link show dev veth1 | jq -r '.[]["address"]')
fout=$(mktemp)
perf stat -o $fout -j -e fib:fib_table_lookup -- \
        mausezahn veth0 -a own -b $dmac -A 192.0.2.1 -B 198.51.100.10 \
        -t udp "sp=12345,dp=0-65535" -q
tail -n 1 $fout | jq '.["counter-value"] | tonumber | floor'
Sriram Yagnaraman Aug. 23, 2023, 12:55 p.m. UTC | #4
> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: Tuesday, 22 August 2023 20:57
> To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> Cc: netdev@vger.kernel.org; linux-kselftest@vger.kernel.org; David S . Miller
> <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub
> Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; David Ahern
> <dsahern@kernel.org>; Ido Schimmel <idosch@nvidia.com>; Shuah Khan
> <shuah@kernel.org>
> Subject: Re: [PATCH 3/3] selftests: forwarding: Add test for load-balancing
> between multiple servers
> 
> On Mon, Aug 21, 2023 at 07:36:47PM +0000, Sriram Yagnaraman wrote:
> > Do you think it would be OK to drop this patch from the series for now? I can
> come back with the selftest when I have something working correctly?
> 
> There's a more direct way of testing it and that's by counting the number of
> times the relevant FIB trace point was triggered. This script [1] does it for IPv4.
> For IPv6 the equivalent trace point is called fib6:fib6_table_lookup. The script
> can obviously be made nicer.
> 
> Before the patches:
> 
> # ./mp_repo.sh
> 10020
> 
> After the patches:
> 
> # ./mp_repo.sh
> 65535
> 
> You can see that after the patches the trace point is triggered for every packet.
> Sometimes it's a bit less. I assume because some events are lost.
> 
> Another approach would be to tweak the current test so that $h1 and $rp1 are
> configured in a similar fashion to veth0 and veth1.
> 

Nice. Thanks a lot, I will send v2 with this approach.
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 770efbe24f0d..bf4e5745fd5c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -70,6 +70,7 @@  TEST_PROGS = bridge_igmp.sh \
 	router_mpath_nh.sh \
 	router_multicast.sh \
 	router_multipath.sh \
+	router_multipath_vip.sh \
 	router_nh.sh \
 	router.sh \
 	router_vid_1.sh \
diff --git a/tools/testing/selftests/net/forwarding/router_multipath_vip.sh b/tools/testing/selftests/net/forwarding/router_multipath_vip.sh
new file mode 100755
index 000000000000..15c7598d42df
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath_vip.sh
@@ -0,0 +1,403 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |              $h1 + |                     | + $h2                |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64     + vip                   2001:db8:2::1/64   |
+# |                        198.18.0.0/24                            |
+# |                        2001:db8:18::/64    + $rp3               |
+# |                                            | 203.0.113.1/24     |
+# |                                            | 2001:db8:3::1/64   |
+# |                                            |                    |
+# |                                            |                    |
+# +--------------------------------------------|--------------------+
+#                                              |
+#                                            +-|--------------------+
+#                                            | |                 H3 |
+#                                            | |                    |
+#                                            | | 203.0.113.2/24     |
+#                                            | | 2001:db8:3::2/64   |
+#                                            | + $h3                |
+#                                            |                      |
+#                                            +----------------------+
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=6
+source lib.sh
+
+ns_create()
+{
+	ns=$1
+
+	ip netns add $ns
+	in_ns $ns ip link set dev lo up
+	in_ns $ns sysctl -q -w net.ipv4.ip_forward=1
+	in_ns $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+ns_destroy()
+{
+	ip netns del $1
+}
+
+h1_create()
+{
+	local ns="ns-h1"
+
+	ns_create $ns
+	ip link set dev $h1 netns $ns
+
+	in_ns $ns ip link set dev $h1 up
+
+	in_ns $ns ip address add 192.0.2.2/24 dev $h1
+	in_ns $ns ip address add 2001:db8:1::2/64 dev $h1
+
+	in_ns $ns ip route add default via 192.0.2.1
+	in_ns $ns ip route add default via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	local ns="ns-h1"
+
+	in_ns $ns ip route del default via 2001:db8:1::1
+	in_ns $ns ip route del default via 192.0.2.1
+
+	in_ns $ns ip address del 2001:db8:1::2/64 dev $h1
+	in_ns $ns ip address del 192.0.2.2/24 dev $h1
+
+	in_ns $ns ip link set dev $h1 down
+	in_ns $ns ip link set dev $h1 netns 1
+	ns_destroy $ns
+}
+
+h2_create()
+{
+	local ns="ns-h2"
+
+	ns_create $ns
+	ip link set dev $h2 netns $ns
+
+	in_ns $ns ip link set dev $h2 up
+
+	in_ns $ns ip address add 198.51.100.2/24 dev $h2
+	in_ns $ns ip address add 2001:db8:2::2/64 dev $h2
+
+	in_ns $ns ip address add 198.18.0.0/24 dev lo
+	in_ns $ns ip address add 2001:db8:18::/64 dev lo
+
+	in_ns $ns ip route add 192.0.2.0/24 via 198.51.100.1
+	in_ns $ns ip route add 2001:db8:1::/64 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	local ns="ns-h2"
+
+	in_ns $ns ip route del 2001:db8:1::/64 nexthop via 2001:db8:2::1
+	in_ns $ns ip route del 192.0.2.0/24 via 198.51.100.1
+
+	in_ns $ns ip address del 2001:db8:18::/64 dev lo
+	in_ns $ns ip address del 198.18.0.0/24 dev lo
+
+	in_ns $ns ip address del 2001:db8:2::2/64 dev $h2
+	in_ns $ns ip address del 198.51.100.2/24 dev $h2
+
+	in_ns $ns ip link set dev $h2 down
+	in_ns $ns ip link set dev $h2 netns 1
+	ns_destroy $ns
+}
+
+h3_create()
+{
+	local ns="ns-h3"
+
+	ns_create $ns
+	ip link set dev $h3 netns $ns
+
+	in_ns $ns ip link set dev $h3 up
+
+	in_ns $ns ip address add 203.0.113.2/24 dev $h3
+	in_ns $ns ip address add 2001:db8:3::2/64 dev $h3
+
+	in_ns $ns ip address add 198.18.0.0/24 dev lo
+	in_ns $ns ip address add 2001:db8:18::/64 dev lo
+
+	in_ns $ns ip route add 192.0.2.0/24 via 203.0.113.1
+	in_ns $ns ip route add 2001:db8:1::/64 nexthop via 2001:db8:3::1
+}
+
+h3_destroy()
+{
+	local ns="ns-h3"
+
+	in_ns $ns ip route del 2001:db8:1::/64 nexthop via 2001:db8:3::1
+	in_ns $ns ip route del 192.0.2.0/24 via 203.0.113.1
+
+	in_ns $ns ip address del 198.18.0.0/24 dev lo
+	in_ns $ns ip address del 2001:db8:18::/64 dev lo
+
+	in_ns $ns ip address del 2001:db8:3::2/64 dev $h3
+	in_ns $ns ip address del 203.0.113.2/24 dev $h3
+
+	in_ns $ns ip link set dev $h3 down
+	in_ns $ns ip link set dev $h3 netns 1
+	ns_destroy $ns
+}
+
+router1_create()
+{
+	local ns="ns-r1"
+
+	ns_create $ns
+	ip link set dev $rp1 netns $ns
+	ip link set dev $rp2 netns $ns
+	ip link set dev $rp3 netns $ns
+
+	in_ns $ns ip link set dev $rp1 up
+	in_ns $ns ip link set dev $rp2 up
+	in_ns $ns ip link set dev $rp3 up
+
+	in_ns $ns ip address add 192.0.2.1/24 dev $rp1
+	in_ns $ns ip address add 2001:db8:1::1/64 dev $rp1
+
+	in_ns $ns ip address add 198.51.100.1/24 dev $rp2
+	in_ns $ns ip address add 2001:db8:2::1/64 dev $rp2
+
+	in_ns $ns ip address add 203.0.113.1/24 dev $rp3
+	in_ns $ns ip address add 2001:db8:3::1/64 dev $rp3
+
+	in_ns $ns ip route add 198.18.0.0/24 \
+		nexthop via 198.51.100.2 \
+		nexthop via 203.0.113.2
+	in_ns $ns ip route add 2001:db8:18::/64 \
+		nexthop via 2001:db8:2::2 \
+		nexthop via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+	local ns="ns-r1"
+
+	in_ns $ns ip route del 2001:db8:18::/64
+	in_ns $ns ip route del 198.18.0.0/24
+
+	in_ns $ns ip address del 2001:db8:3::1/64 dev $rp3
+	in_ns $ns ip address del 203.0.113.1/24 dev $rp3
+
+	in_ns $ns ip address del 2001:db8:2::1/64 dev $rp2
+	in_ns $ns ip address del 198.51.100.1/24 dev $rp2
+
+	in_ns $ns ip address del 2001:db8:1::1/64 dev $rp1
+	in_ns $ns ip address del 192.0.2.1/24 dev $rp1
+
+	in_ns $ns ip link set dev $rp3 down
+	in_ns $ns ip link set dev $rp2 down
+	in_ns $ns ip link set dev $rp1 down
+
+	in_ns $ns ip link set dev $rp3 netns 1
+	in_ns $ns ip link set dev $rp2 netns 1
+	in_ns $ns ip link set dev $rp1 netns 1
+	ns_destroy $ns
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp2=$2
+	local weight_rp3=$3
+	local t0_rp2 t0_rp3 t1_rp2 t1_rp3
+	local packets_rp2 packets_rp3
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp2 and rp3)
+	# according to the configured weights.
+	in_ns ns-r1 sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	in_ns ns-r1 ip route replace 198.18.0.0/24 \
+		nexthop via 198.51.100.2 weight $weight_rp2 \
+		nexthop via 203.0.113.2 weight $weight_rp3
+
+	t0_rp2=$(in_ns ns-r1 link_stats_tx_packets_get $rp2)
+	t0_rp3=$(in_ns ns-r1 link_stats_tx_packets_get $rp3)
+
+	in_ns ns-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.18.0.0 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp2=$(in_ns ns-r1 link_stats_tx_packets_get $rp2)
+	t1_rp3=$(in_ns ns-r1 link_stats_tx_packets_get $rp3)
+
+	let "packets_rp2 = $t1_rp2 - $t0_rp2"
+	let "packets_rp3 = $t1_rp3 - $t0_rp3"
+	in_ns ns-r1 multipath_eval "$desc" $weight_rp2 $weight_rp3 $packets_rp2 $packets_rp3
+
+	in_ns ns-r1 ip route replace 198.18.0.0/24 \
+		nexthop via 198.51.100.2 \
+		nexthop via 203.0.113.2
+
+	in_ns ns-r1 sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp2=$2
+	local weight_rp3=$3
+	local t0_rp2 t0_rp3 t1_rp2 t1_rp3
+	local packets_rp2 packets_rp3
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp2 and rp3)
+	# according to the configured weights.
+	in_ns ns-r1 sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	in_ns ns-r1 ip route replace 2001:db8:18::/64 \
+		nexthop via 2001:db8:2::2 weight $weight_rp2 \
+		nexthop via 2001:db8:3::2 weight $weight_rp3
+
+	t0_rp2=$(in_ns ns-r1 link_stats_tx_packets_get $rp2)
+	t0_rp3=$(in_ns ns-r1 link_stats_tx_packets_get $rp3)
+
+	in_ns ns-h1 $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:18::0 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp2=$(in_ns ns-r1 link_stats_tx_packets_get $rp2)
+	t1_rp3=$(in_ns ns-r1 link_stats_tx_packets_get $rp3)
+
+	let "packets_rp2 = $t1_rp2 - $t0_rp2"
+	let "packets_rp3 = $t1_rp3 - $t0_rp3"
+	in_ns ns-r1 multipath_eval "$desc" $weight_rp2 $weight_rp3 $packets_rp2 $packets_rp3
+
+	in_ns ns-r1 ip route replace 2001:db8:18::/64 \
+		nexthop via 2001:db8:2::2 \
+		nexthop via 2001:db8:3::2
+
+	in_ns ns-r1 sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h1_create
+	h2_create
+	h3_create
+
+	router1_create
+
+	forwarding_enable
+}
+
+setup_wait()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	in_ns ns-h1 setup_wait_dev $h1
+	in_ns ns-h2 setup_wait_dev $h2
+	in_ns ns-h3 setup_wait_dev $h3
+	in_ns ns-r1 setup_wait_dev $rp1
+	in_ns ns-r1 setup_wait_dev $rp2
+	in_ns ns-r1 setup_wait_dev $rp3
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router1_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+}
+
+ping_test()
+{
+	RET=0
+
+	local ns=$1
+	local dip=$2
+	local args=$3
+
+	in_ns $ns $PING $args $dip -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT &> /dev/null
+	check_err $?
+	log_test "ping$args"
+}
+
+ping6_test()
+{
+	RET=0
+
+	local ns=$1
+	local dip=$2
+	local args=$3
+
+	in_ns $ns $PING6 $args $dip -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT &> /dev/null
+	check_err $?
+	log_test "ping6$args"
+}
+
+ping_ipv4()
+{
+	ping_test ns-h1 198.51.100.2
+	ping_test ns-h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test ns-h1 2001:db8:2::2
+	ping6_test ns-h1 2001:db8:3::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS