From patchwork Mon Apr 20 07:54:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 220978 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.7 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id A31F5C3815B for ; Mon, 20 Apr 2020 07:55:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 824FE214AF for ; Mon, 20 Apr 2020 07:55:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726373AbgDTHzL (ORCPT ); Mon, 20 Apr 2020 03:55:11 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56969 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726161AbgDTHzL (ORCPT ); Mon, 20 Apr 2020 03:55:11 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Apr 2020 10:54:31 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03K7sUfC026672; Mon, 20 Apr 2020 10:54:30 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, jiri@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V2 mlx5-next 03/10] bonding: Add helpers to get xmit slave Date: Mon, 20 Apr 2020 10:54:19 +0300 Message-Id: <20200420075426.31462-4-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200420075426.31462-1-maorg@mellanox.com> References: <20200420075426.31462-1-maorg@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This helpers will be used by both the xmit function and the get xmit slave ndo. Signed-off-by: Maor Gottlieb --- drivers/net/bonding/bond_alb.c | 35 ++++++++---- drivers/net/bonding/bond_main.c | 94 +++++++++++++++++++++------------ include/net/bond_alb.h | 4 ++ 3 files changed, 89 insertions(+), 44 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7bb49b049dcc..e863c694c309 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1334,11 +1334,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, return NETDEV_TX_OK; } -netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; struct slave *tx_slave = NULL; + struct ethhdr *eth_data; u32 hash_index; skb_reset_mac_header(skb); @@ -1369,20 +1369,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } } - return bond_do_alb_xmit(skb, bond, tx_slave); + return tx_slave; } -netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; + struct slave *tx_slave; + + tx_slave = bond_xmit_tlb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); +} + +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *tx_slave = NULL; static const __be32 ip_bcast = htonl(0xffffffff); - int hash_size = 0; + struct slave *tx_slave = NULL; + const u8 *hash_start = NULL; bool do_tx_balance = true; + struct ethhdr *eth_data; u32 hash_index = 0; - const u8 *hash_start = NULL; + int hash_size = 0; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); @@ -1501,7 +1510,15 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) count]; } } + return tx_slave; +} + +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; + tx_slave = bond_xmit_alb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2cb41d480ae2..7e04be86fda8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -82,6 +82,7 @@ #include #include #include +#include #include "bonding_priv.h" @@ -3406,10 +3407,26 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) (__force u32)flow_get_u32_src(&flow); hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash >> 1; } +static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, + struct sk_buff *skb, + struct bond_up_slave *slaves) +{ + struct slave *slave; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash(bond, skb); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + slave = slaves->arr[hash % count]; + return slave; +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -3923,16 +3940,15 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) } /** - * bond_xmit_slave_id - transmit skb through slave with slave_id + * bond_get_slave_by_id - get xmit slave with slave_id * @bond: bonding device that is transmitting - * @skb: buffer to transmit * @slave_id: slave id up to slave_cnt-1 through which to transmit * - * This function tries to transmit through slave with slave_id but in case + * This function tries to get slave with slave_id but in case * it fails, it tries to find the first available slave for transmission. - * The skb is consumed in all cases, thus the function is void. */ -static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static struct slave *bond_get_slave_by_id(struct bonding *bond, + int slave_id) { struct list_head *iter; struct slave *slave; @@ -3941,10 +3957,8 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl /* Here we start from the slave with slave_id */ bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } } @@ -3953,13 +3967,11 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } - /* no slave that can tx has been found */ - bond_tx_drop(bond->dev, skb); + + return NULL; } /** @@ -3995,10 +4007,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) return slave_id; } -static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, - struct net_device *bond_dev) +static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int slave_cnt; u32 slave_id; @@ -4020,24 +4031,40 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, if (iph->protocol == IPPROTO_IGMP) { slave = rcu_dereference(bond->curr_active_slave); if (slave) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_xmit_slave_id(bond, skb, 0); - return NETDEV_TX_OK; + return slave; + return bond_get_slave_by_id(bond, 0); } } non_igmp: slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { - slave_id = bond_rr_gen_slave_id(bond); - bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); - } else { - bond_tx_drop(bond_dev, skb); + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); } + return NULL; +} + +static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, + struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + slave = bond_xmit_roundrobin_slave_get(bond, skb); + if (slave) + bond_dev_queue_xmit(bond, skb, slave->dev); + else + bond_tx_drop(bond_dev, skb); return NETDEV_TX_OK; } +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + return rcu_dereference(bond->curr_active_slave); +} + /* In active-backup mode, we know that bond->curr_active_slave is always valid if * the bond has a usable interface. */ @@ -4047,7 +4074,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = rcu_dereference(bond->curr_active_slave); + slave = bond_xmit_activebackup_slave_get(bond, skb); if (slave) bond_dev_queue_xmit(bond, skb, slave->dev); else @@ -4193,18 +4220,15 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); - struct slave *slave; struct bond_up_slave *slaves; - unsigned int count; + struct slave *slave; slaves = rcu_dereference(bond->usable_slaves); - count = slaves ? READ_ONCE(slaves->count) : 0; - if (likely(count)) { - slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + if (likely(slave)) bond_dev_queue_xmit(bond, skb, slave->dev); - } else { + else bond_tx_drop(dev, skb); - } return NETDEV_TX_OK; } diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index b3504fcd773d..f6af76c87a6c 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb); +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb); void bond_alb_monitor(struct work_struct *); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); From patchwork Mon Apr 20 07:54:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 220980 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.7 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id BA5F9C3A5A0 for ; Mon, 20 Apr 2020 07:54:46 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A385521927 for ; Mon, 20 Apr 2020 07:54:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726343AbgDTHyq (ORCPT ); Mon, 20 Apr 2020 03:54:46 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:51573 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726232AbgDTHyl (ORCPT ); Mon, 20 Apr 2020 03:54:41 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Apr 2020 10:54:31 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03K7sUfD026672; Mon, 20 Apr 2020 10:54:31 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, jiri@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V2 mlx5-next 04/10] bonding: Implement ndo_xmit_slave_get Date: Mon, 20 Apr 2020 10:54:20 +0300 Message-Id: <20200420075426.31462-5-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200420075426.31462-1-maorg@mellanox.com> References: <20200420075426.31462-1-maorg@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Add implementation of ndo_xmit_slave_get. When user sets the LAG_FLAGS_HASH_ALL_SLAVES bit and the xmit slave result is based on the hash, then the slave will be selected from the array of all the slaves. Signed-off-by: Maor Gottlieb --- drivers/net/bonding/bond_main.c | 123 +++++++++++++++++++++++++++----- include/net/bonding.h | 1 + 2 files changed, 105 insertions(+), 19 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7e04be86fda8..320bcb1394fd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4137,6 +4137,40 @@ static void bond_skip_slave(struct bond_up_slave *slaves, } } +static void bond_set_slave_arr(struct bonding *bond, + struct bond_up_slave *usable_slaves, + struct bond_up_slave *all_slaves) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + if (usable) + kfree_rcu(usable, rcu); + + all = rtnl_dereference(bond->all_slaves); + rcu_assign_pointer(bond->all_slaves, all_slaves); + if (all) + kfree_rcu(all, rcu); +} + +static void bond_reset_slave_arr(struct bonding *bond) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4147,7 +4181,7 @@ static void bond_skip_slave(struct bond_up_slave *slaves, */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { - struct bond_up_slave *usable_slaves, *old_usable_slaves; + struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; struct slave *slave; struct list_head *iter; int agg_id = 0; @@ -4159,7 +4193,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves = kzalloc(struct_size(usable_slaves, arr, bond->slave_cnt), GFP_KERNEL); - if (!usable_slaves) { + all_slaves = kzalloc(struct_size(all_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves || !all_slaves) { ret = -ENOMEM; goto out; } @@ -4168,20 +4204,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(usable_slaves, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - if (old_usable_slaves) { - RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(old_usable_slaves, rcu); - } + bond_reset_slave_arr(bond); goto out; } agg_id = ad_info.aggregator_id; } bond_for_each_slave(bond, slave, iter) { + if (skipslave == slave) + continue; + + all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg; @@ -4191,8 +4226,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) } if (!bond_slave_can_tx(slave)) continue; - if (skipslave == slave) - continue; slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", usable_slaves->count); @@ -4200,14 +4233,17 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves->arr[usable_slaves->count++] = slave; } - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - rcu_assign_pointer(bond->usable_slaves, usable_slaves); - if (old_usable_slaves) - kfree_rcu(old_usable_slaves, rcu); + bond_set_slave_arr(bond, usable_slaves, all_slaves); + return ret; out: - if (ret != 0 && skipslave) + if (ret != 0 && skipslave) { + bond_skip_slave(rtnl_dereference(bond->all_slaves), + skipslave); bond_skip_slave(rtnl_dereference(bond->usable_slaves), skipslave); + } + kfree_rcu(all_slaves, rcu); + kfree_rcu(usable_slaves, rcu); return ret; } @@ -4313,6 +4349,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, return txq; } +static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, + struct sk_buff *skb, + u16 flags) +{ + struct bonding *bond = netdev_priv(master_dev); + struct bond_up_slave *slaves; + struct slave *slave = NULL; + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xmit_roundrobin_slave_get(bond, skb); + break; + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond, skb); + break; + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + if (flags & LAG_FLAGS_HASH_ALL_SLAVES) + slaves = rcu_dereference(bond->all_slaves); + else + slaves = rcu_dereference(bond->usable_slaves); + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + break; + case BOND_MODE_BROADCAST: + break; + case BOND_MODE_ALB: + slave = bond_xmit_alb_slave_get(bond, skb); + break; + case BOND_MODE_TLB: + slave = bond_xmit_tlb_slave_get(bond, skb); + break; + default: + /* Should never happen, mode already checked */ + WARN_ONCE(true, "Unknown bonding mode"); + break; + } + + if (slave) + return slave->dev; + return NULL; +} + static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); @@ -4434,6 +4512,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, + .ndo_xmit_get_slave = bond_xmit_get_slave, }; static const struct device_type bond_type = { @@ -4501,9 +4580,9 @@ void bond_setup(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct bond_up_slave *usable, *all; struct list_head *iter; struct slave *slave; - struct bond_up_slave *arr; bond_netpoll_cleanup(bond_dev); @@ -4512,10 +4591,16 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->usable_slaves); - if (arr) { + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(arr, rcu); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); } list_del(&bond->bond_list); diff --git a/include/net/bonding.h b/include/net/bonding.h index 33bdb6d5182d..a2a7f461fa63 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -201,6 +201,7 @@ struct bonding { struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; struct bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */ + struct bond_up_slave __rcu *all_slaves; /* Array of all slaves */ bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, From patchwork Mon Apr 20 07:54:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 220982 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.7 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id E4178C54FCC for ; Mon, 20 Apr 2020 07:54:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CD97722250 for ; Mon, 20 Apr 2020 07:54:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726214AbgDTHyi (ORCPT ); Mon, 20 Apr 2020 03:54:38 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56970 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726112AbgDTHyh (ORCPT ); Mon, 20 Apr 2020 03:54:37 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Apr 2020 10:54:31 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03K7sUfE026672; Mon, 20 Apr 2020 10:54:31 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, jiri@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V2 mlx5-next 05/10] RDMA/core: Add LAG functionality Date: Mon, 20 Apr 2020 10:54:21 +0300 Message-Id: <20200420075426.31462-6-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200420075426.31462-1-maorg@mellanox.com> References: <20200420075426.31462-1-maorg@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Add support to get the RoCE LAG xmit slave by building skb of the RoCE packet and call to master_xmit_slave_get. If driver wants to get the slave assume all slaves are available, then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags. Signed-off-by: Maor Gottlieb --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/lag.c | 139 +++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/lag.h | 22 +++++ 4 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/lag.c create mode 100644 include/rdma/lag.h diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d1b14887960e..870f0fcd54d5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ - trace.o + trace.o lag.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c new file mode 100644 index 000000000000..007c06cae3d8 --- /dev/null +++ b/drivers/infiniband/core/lag.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#include +#include +#include + +static struct sk_buff *rdma_build_skb(struct ib_device *device, + struct net_device *netdev, + struct rdma_ah_attr *ah_attr) +{ + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + struct udphdr *uh; + u8 smac[ETH_ALEN]; + bool is_ipv4; + int hdr_len; + + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw); + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev); + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr); + + skb = alloc_skb(hdr_len, GFP_ATOMIC); + if (!skb) + return NULL; + + skb->dev = netdev; + skb_reserve(skb, hdr_len); + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->source = htons(0xC000); + uh->dest = htons(ROCE_V2_UDP_DPORT); + uh->len = htons(sizeof(struct udphdr)); + + if (is_ipv4) { + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + iph->frag_off = 0; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iph->ihl = 0x5; + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct + iphdr)); + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12, + sizeof(struct in_addr)); + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12, + sizeof(struct in_addr)); + } else { + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label, + sizeof(*ip6h->flow_lbl)); + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw, + sizeof(struct in6_addr)); + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw, + sizeof(struct in6_addr)); + } + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6); + rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac); + memcpy(eth->h_source, smac, ETH_ALEN); + memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN); + + return skb; +} + +static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, + struct net_device *master, + struct rdma_ah_attr *ah_attr) +{ + struct net_device *slave; + struct sk_buff *skb; + u16 flags; + + skb = rdma_build_skb(device, master, ah_attr); + if (!skb) + return NULL; + + flags = device->lag_flags & RDMA_LAG_FLAGS_HASH_ALL_SLAVES ? + LAG_FLAGS_HASH_ALL_SLAVES : 0; + slave = master_xmit_get_slave(master, skb, flags); + kfree_skb(skb); + return slave; +} + +void rdma_lag_put_ah_roce_slave(struct rdma_ah_attr *ah_attr) +{ + if (ah_attr->roce.xmit_slave) + dev_put(ah_attr->roce.xmit_slave); +} + +int rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr) +{ + struct net_device *master; + struct net_device *slave; + + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) + return 0; + + rcu_read_lock(); + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr); + if (IS_ERR(master)) { + rcu_read_unlock(); + return PTR_ERR(master); + } + dev_hold(master); + rcu_read_unlock(); + + if (!netif_is_bond_master(master)) { + dev_put(master); + return 0; + } + + slave = rdma_get_xmit_slave_udp(device, master, ah_attr); + + dev_put(master); + if (!slave) { + ibdev_warn(device, "Failed to get lag xmit slave\n"); + return -EINVAL; + } + + ah_attr->roce.xmit_slave = slave; + + return 0; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbc5cfb57cd2..60f9969b6d83 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -894,6 +894,7 @@ struct ib_ah_attr { struct roce_ah_attr { u8 dmac[ETH_ALEN]; + struct net_device *xmit_slave; }; struct opa_ah_attr { @@ -2709,6 +2710,7 @@ struct ib_device { /* Used by iWarp CM */ char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; + u32 lag_flags; }; struct ib_client_nl_info; diff --git a/include/rdma/lag.h b/include/rdma/lag.h new file mode 100644 index 000000000000..a71511824207 --- /dev/null +++ b/include/rdma/lag.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_LAG_H_ +#define _RDMA_LAG_H_ + +#include + +struct ib_device; +struct rdma_ah_attr; + +enum rdma_lag_flags { + RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 +}; + +void rdma_lag_put_ah_roce_slave(struct rdma_ah_attr *ah_attr); +int rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr); + +#endif /* _RDMA_LAG_H_ */ From patchwork Mon Apr 20 07:54:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 220981 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.7 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CE73EC3815B for ; Mon, 20 Apr 2020 07:54:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B34C321927 for ; Mon, 20 Apr 2020 07:54:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726323AbgDTHyl (ORCPT ); Mon, 20 Apr 2020 03:54:41 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:51595 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726240AbgDTHyj (ORCPT ); Mon, 20 Apr 2020 03:54:39 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Apr 2020 10:54:31 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03K7sUfF026672; Mon, 20 Apr 2020 10:54:31 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, jiri@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V2 mlx5-next 06/10] RDMA/core: Get xmit slave for LAG Date: Mon, 20 Apr 2020 10:54:22 +0300 Message-Id: <20200420075426.31462-7-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200420075426.31462-1-maorg@mellanox.com> References: <20200420075426.31462-1-maorg@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Add a call to rdma_lag_get_ah_roce_slave when Address handle is created. Low driver can use it to select the QP's affinity port. Signed-off-by: Maor Gottlieb --- drivers/infiniband/core/verbs.c | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 56a71337112c..a0d60376ba6b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "core_priv.h" #include @@ -554,8 +555,14 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (ret) return ERR_PTR(ret); - ah = _rdma_create_ah(pd, ah_attr, flags, NULL); + ret = rdma_lag_get_ah_roce_slave(pd->device, ah_attr); + if (ret) { + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ERR_PTR(ret); + } + ah = _rdma_create_ah(pd, ah_attr, flags, NULL); + rdma_lag_put_ah_roce_slave(ah_attr); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } @@ -1638,6 +1645,25 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, &old_sgid_attr_av); if (ret) return ret; + + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + /* + * If the user provided the qp_attr then we have to + * resolve it. Kerne users have to provide already + * resolved rdma_ah_attr's. + */ + if (udata) { + ret = ib_resolve_eth_dmac(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } + ret = rdma_lag_get_ah_roce_slave(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } } if (attr_mask & IB_QP_ALT_PATH) { /* @@ -1664,18 +1690,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } - /* - * If the user provided the qp_attr then we have to resolve it. Kernel - * users have to provide already resolved rdma_ah_attr's - */ - if (udata && (attr_mask & IB_QP_AV) && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto out; - } - if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, @@ -1717,8 +1731,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { + rdma_lag_put_ah_roce_slave(&attr->ah_attr); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); + } return ret; }