@@ -25,6 +25,7 @@ enum {
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
};
struct mlx5e_tc_flow_parse_attr {
@@ -59,6 +60,11 @@ struct encap_flow_item {
int index;
};
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
@@ -70,6 +76,11 @@ struct mlx5e_tc_flow {
struct list_head l3_to_l2_reformat;
struct mlx5e_decap_entry *decap_reformat;
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
/* Flow can be associated with multiple encap IDs.
* The number of encaps is bounded by the number of supported
* destinations.
@@ -7,6 +7,23 @@
#include "rep/tc.h"
#include "diag/en_tc_tracepoint.h"
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ struct rcu_head rcu;
+};
+
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
{
@@ -29,10 +46,13 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
tun_attr->dst_ip.v4 = *(__be32 *)daddr;
tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (ip_version == 6) {
int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+ struct in6_addr zerov6 = {};
daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
@@ -40,8 +60,15 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
+ !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
+ return 0;
}
#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
return 0;
}
@@ -325,6 +352,7 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
return;
+ list_del(&e->route_list);
hash_del_rcu(&e->encap_hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
@@ -343,12 +371,20 @@ static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d
mlx5e_decap_dealloc(priv, d);
}
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
void mlx5e_detach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow, int out_index)
{
struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ if (flow->attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
/* flow wasn't fully initialized */
if (!e)
return;
@@ -360,6 +396,7 @@ void mlx5e_detach_encap(struct mlx5e_priv *priv,
mutex_unlock(&esw->offloads.encap_tbl_lock);
return;
}
+ list_del(&e->route_list);
hash_del_rcu(&e->encap_hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
@@ -531,6 +568,12 @@ static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
return err;
}
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ int out_index);
+
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
@@ -545,6 +588,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
const struct ip_tunnel_info *tun_info;
struct encap_key key;
struct mlx5e_encap_entry *e;
+ bool entry_created = false;
unsigned short family;
uintptr_t hash_key;
int err = 0;
@@ -592,6 +636,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
refcount_set(&e->refcnt, 1);
init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
tun_info = mlx5e_dup_tun_info(tun_info);
if (!tun_info) {
@@ -622,8 +668,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
e->compl_result = 1;
attach_flow:
- err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
- e->route_dev_ifindex, out_index);
+ err = mlx5e_attach_encap_route(priv, flow, e, entry_created, out_index);
if (err)
goto out_err;
@@ -732,3 +777,212 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
mutex_unlock(&esw->offloads.decap_tbl_lock);
return err;
}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_priv *priv, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(esw->offloads.route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ hash_key = hash_route_info(key);
+ r = mlx5e_route_get(priv, key, hash_key);
+ if (r)
+ return r;
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+ hash_add(esw->offloads.route_tbl, &r->hlist, hash_key);
+ return r;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
@@ -22,6 +22,11 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
void mlx5e_detach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
@@ -181,6 +181,7 @@ struct mlx5e_encap_entry {
*/
struct hlist_node encap_hlist;
struct list_head flows;
+ struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
@@ -1251,6 +1251,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
+ if (flow_flag_test(flow, TUN_RX)) {
+ err = mlx5e_attach_decap_route(priv, flow);
+ if (err)
+ return err;
+ }
+
if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
err = mlx5e_attach_decap(priv, flow, extack);
if (err)
@@ -1335,8 +1341,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
int out_index;
+ esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
if (flow_flag_test(flow, NOT_READY))
@@ -1354,11 +1362,15 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+ if (flow->decap_route)
+ mlx5e_detach_decap_route(priv, flow);
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
+ }
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
@@ -1368,7 +1380,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
+ mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
@@ -3692,12 +3704,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
}
- if (decap && esw_attr->rx_tun_attr) {
- err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
- if (err)
- return err;
- }
-
/* always set IP version for indirect table handling */
attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
@@ -1830,6 +1830,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->offloads.decap_tbl_lock);
hash_init(esw->offloads.decap_tbl);
mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
+ hash_init(esw->offloads.route_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
@@ -214,6 +214,7 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
+ DECLARE_HASHTABLE(route_tbl, 8);
struct mutex decap_tbl_lock; /* protects decap_tbl */
DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;