new file mode 100644
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CONNTRACK_PROTO_ESP_H
+#define _CONNTRACK_PROTO_ESP_H
+#include <asm/byteorder.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+/* ESP PROTOCOL HEADER */
+
+struct esphdr {
+ __u32 spi;
+};
+
+struct nf_ct_esp {
+ __u32 l_spi, r_spi;
+};
+
+void nf_ct_esp_pernet_flush(struct net *net);
+
+void destroy_esp_conntrack_entry(struct nf_conn *ct);
+
+bool esp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct net *net, struct nf_conntrack_tuple *tuple);
+#endif /* _CONNTRACK_PROTO_ESP_H */
@@ -25,5 +25,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
#ifdef CONFIG_NF_CT_PROTO_GRE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre;
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_esp;
+#endif
#endif /*_NF_CONNTRACK_IPV4_H*/
@@ -21,6 +21,7 @@
#include <linux/netfilter/nf_conntrack_dccp.h>
#include <linux/netfilter/nf_conntrack_sctp.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_proto_esp.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -36,6 +37,7 @@ union nf_conntrack_proto {
struct ip_ct_tcp tcp;
struct nf_ct_udp udp;
struct nf_ct_gre gre;
+ struct nf_ct_esp esp;
unsigned int tmpl_padto;
};
@@ -47,6 +49,10 @@ struct nf_conntrack_net {
unsigned int users4;
unsigned int users6;
unsigned int users_bridge;
+
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ DECLARE_BITMAP(esp_id_map, 1024);
+#endif
};
#include <linux/types.h>
@@ -75,6 +75,8 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig);
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig);
+bool nf_conntrack_invert_esp_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig);
int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
@@ -132,6 +134,11 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state);
+int nf_conntrack_esp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state);
void nf_conntrack_generic_init_net(struct net *net);
void nf_conntrack_tcp_init_net(struct net *net);
@@ -141,6 +148,8 @@ void nf_conntrack_dccp_init_net(struct net *net);
void nf_conntrack_sctp_init_net(struct net *net);
void nf_conntrack_icmp_init_net(struct net *net);
void nf_conntrack_icmpv6_init_net(struct net *net);
+int nf_conntrack_esp_init(void);
+void nf_conntrack_esp_init_net(struct net *net);
/* Existing built-in generic protocol */
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
@@ -240,4 +249,11 @@ static inline struct nf_gre_net *nf_gre_pernet(struct net *net)
}
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+static inline struct nf_esp_net *nf_esp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.esp;
+}
+#endif
+
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
@@ -62,6 +62,9 @@ struct nf_conntrack_tuple {
struct {
__be16 key;
} gre;
+ struct {
+ __be16 id;
+ } esp;
} u;
/* The protocol. */
@@ -69,6 +69,20 @@ struct nf_gre_net {
};
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+enum esp_conntrack {
+ ESP_CT_UNREPLIED,
+ ESP_CT_REPLIED,
+ ESP_CT_MAX
+};
+
+struct nf_esp_net {
+ spinlock_t id_list_lock;
+ struct list_head id_list;
+ unsigned int esp_timeouts[ESP_CT_MAX];
+};
+#endif
+
struct nf_ip_net {
struct nf_generic_net generic;
struct nf_tcp_net tcp;
@@ -84,6 +98,9 @@ struct nf_ip_net {
#ifdef CONFIG_NF_CT_PROTO_GRE
struct nf_gre_net gre;
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ struct nf_esp_net esp;
+#endif
};
struct ct_pcpu {
@@ -39,6 +39,9 @@ union nf_conntrack_man_proto {
struct {
__be16 key; /* GRE key is 32bit, PPtP only uses 16bit */
} gre;
+ struct {
+ __be16 id;
+ } esp;
};
#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
@@ -90,6 +90,8 @@ enum ctattr_l4proto {
CTA_PROTO_ICMPV6_ID,
CTA_PROTO_ICMPV6_TYPE,
CTA_PROTO_ICMPV6_CODE,
+ CTA_PROTO_SRC_ESP_ID,
+ CTA_PROTO_DST_ESP_ID,
__CTA_PROTO_MAX
};
#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
@@ -199,6 +199,16 @@ config NF_CT_PROTO_UDPLITE
If unsure, say Y.
+config NF_CT_PROTO_ESP
+ bool "ESP protocol support"
+ depends on NETFILTER_ADVANCED
+ help
+ ESP connection tracking helper. Provides connection tracking for IPsec
+ clients behind this device based on SPI, especially useful for
+ distinguishing multiple clients when using NAT.
+
+ If unsure, say N.
+
config NF_CONNTRACK_AMANDA
tristate "Amanda backup protocol support"
depends on NETFILTER_ADVANCED
@@ -14,6 +14,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_ESP) += nf_conntrack_proto_esp.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -295,6 +295,10 @@ nf_ct_get_tuple(const struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
return gre_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ case IPPROTO_ESP:
+ return esp_pkt_to_tuple(skb, dataoff, net, tuple);
#endif
case IPPROTO_TCP:
case IPPROTO_UDP: /* fallthrough */
@@ -439,6 +443,10 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
#if IS_ENABLED(CONFIG_IPV6)
case IPPROTO_ICMPV6:
return nf_conntrack_invert_icmpv6_tuple(inverse, orig);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ case IPPROTO_ESP:
+ return nf_conntrack_invert_esp_tuple(inverse, orig);
#endif
}
@@ -593,6 +601,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
#endif
}
+static void destroy_esp_conntrack(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ destroy_esp_conntrack_entry(ct);
+#endif
+}
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
@@ -609,6 +624,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
destroy_gre_conntrack(ct);
+ if (unlikely(nf_ct_protonum(ct) == IPPROTO_ESP))
+ destroy_esp_conntrack(ct);
+
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -1783,6 +1801,11 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
case IPPROTO_GRE:
return nf_conntrack_gre_packet(ct, skb, dataoff,
ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ case IPPROTO_ESP:
+ return nf_conntrack_esp_packet(ct, skb, dataoff,
+ ctinfo, state);
#endif
}
@@ -1382,7 +1382,9 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
CTA_FILTER_F_CTA_PROTO_ICMP_ID | \
CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \
CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \
- CTA_FILTER_F_CTA_PROTO_ICMPV6_ID)
+ CTA_FILTER_F_CTA_PROTO_ICMPV6_ID | \
+ CTA_FILTER_F_CTA_PROTO_SRC_ESP_ID | \
+ CTA_FILTER_F_CTA_PROTO_DST_ESP_ID)
static int
ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
@@ -112,6 +112,9 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ case IPPROTO_ESP: return &nf_conntrack_l4proto_esp;
+#endif
#if IS_ENABLED(CONFIG_IPV6)
case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
#endif /* CONFIG_IPV6 */
@@ -656,6 +659,12 @@ int nf_conntrack_proto_init(void)
goto cleanup_sockopt;
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ ret = nf_conntrack_esp_init();
+ if (ret < 0)
+ goto cleanup_sockopt;
+#endif
+
return ret;
#if IS_ENABLED(CONFIG_IPV6)
@@ -691,6 +700,9 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#ifdef CONFIG_NF_CT_PROTO_GRE
nf_conntrack_gre_init_net(net);
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ nf_conntrack_esp_init_net(net);
+#endif
}
void nf_conntrack_proto_pernet_fini(struct net *net)
@@ -698,6 +710,9 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
#ifdef CONFIG_NF_CT_PROTO_GRE
nf_ct_gre_keymap_flush(net);
#endif
+#ifdef CONFIG_NF_CT_PROTO_ESP
+ nf_ct_esp_pernet_flush(net);
+#endif
}
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
new file mode 100644
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * <:copyright-gpl
+ * Copyright 2008 Broadcom Corp. All Rights Reserved.
+ * Copyright (C) 2021 Allied Telesis Labs NZ
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.
+ * :>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/dst.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <linux/netfilter/nf_conntrack_proto_esp.h>
+#include <net/netns/hash.h>
+#include <linux/rhashtable.h>
+#include <net/ipv6.h>
+
+#include "nf_internals.h"
+
+/* esp_id of 0 is left for unassigned values */
+#define TEMP_SPI_START 1
+#define TEMP_SPI_MAX (TEMP_SPI_START + 1024 - 1)
+
+struct _esp_entry {
+ /* linked list node for per net lookup via esp_id */
+ struct list_head net_node;
+
+ /* Hash table nodes for each required lookup
+ * lnode: net->hash_mix, l_spi, l_ip, r_ip
+ * rnode: net->hash_mix, r_spi, r_ip
+ * incmpl_rlist: net->hash_mix, r_ip
+ */
+ struct rhash_head lnode;
+ struct rhash_head rnode;
+ struct rhlist_head incmpl_rlist;
+
+ u16 esp_id;
+
+ u16 l3num;
+
+ u32 l_spi;
+ u32 r_spi;
+
+ union nf_inet_addr l_ip;
+ union nf_inet_addr r_ip;
+
+ u32 alloc_time_jiffies;
+ struct net *net;
+};
+
+struct _esp_hkey {
+ u8 l3num;
+ union nf_inet_addr src_ip;
+ union nf_inet_addr dst_ip;
+ u32 net_hmix;
+ u32 spi;
+};
+
+extern unsigned int nf_conntrack_net_id;
+
+static struct rhashtable ltable;
+static struct rhashtable rtable;
+static struct rhltable incmpl_rtable;
+static unsigned int esp_timeouts[ESP_CT_MAX] = {
+ [ESP_CT_UNREPLIED] = 60 * HZ,
+ [ESP_CT_REPLIED] = 3600 * HZ,
+};
+
+static void esp_ip_addr_copy(int af, union nf_inet_addr *dst,
+ const union nf_inet_addr *src)
+{
+ if (af == AF_INET6)
+ dst->in6 = src->in6;
+ else
+ dst->ip = src->ip;
+}
+
+static int esp_ip_addr_equal(int af, const union nf_inet_addr *a,
+ const union nf_inet_addr *b)
+{
+ if (af == AF_INET6)
+ return ipv6_addr_equal(&a->in6, &b->in6);
+ return a->ip == b->ip;
+}
+
+static inline struct nf_esp_net *esp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.esp;
+}
+
+static inline void calculate_key(const u32 net_hmix, const u32 spi,
+ const u8 l3num,
+ const union nf_inet_addr *src_ip,
+ const union nf_inet_addr *dst_ip,
+ struct _esp_hkey *key)
+{
+ key->net_hmix = net_hmix;
+ key->spi = spi;
+ key->l3num = l3num;
+ esp_ip_addr_copy(l3num, &key->src_ip, src_ip);
+ esp_ip_addr_copy(l3num, &key->dst_ip, dst_ip);
+}
+
+static inline u32 calculate_hash(const void *data, u32 len, u32 seed)
+{
+ return jhash(data, len, seed);
+}
+
+static int ltable_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ struct _esp_hkey obj_key = {};
+ const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key;
+ const struct _esp_entry *eobj = (const struct _esp_entry *)obj;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, eobj->l_spi, eobj->l3num, &eobj->l_ip,
+ &eobj->r_ip, &obj_key);
+ return memcmp(key, &obj_key, sizeof(struct _esp_hkey));
+}
+
+static int rtable_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ struct _esp_hkey obj_key = {};
+ const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key;
+ const struct _esp_entry *eobj = (const struct _esp_entry *)obj;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, eobj->r_spi, eobj->l3num, &any, &eobj->r_ip,
+ &obj_key);
+ return memcmp(key, &obj_key, sizeof(struct _esp_hkey));
+}
+
+static int incmpl_table_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ struct _esp_hkey obj_key = {};
+ const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key;
+ const struct _esp_entry *eobj = (const struct _esp_entry *)obj;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, 0, eobj->l3num, &any, &eobj->r_ip, &obj_key);
+ return memcmp(key, &obj_key, sizeof(struct _esp_hkey));
+}
+
+static u32 ltable_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ struct _esp_hkey key = {};
+ const struct _esp_entry *eobj = (const struct _esp_entry *)data;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, eobj->l_spi, eobj->l3num, &eobj->l_ip,
+ &eobj->r_ip, &key);
+ return calculate_hash(&key, len, seed);
+}
+
+static u32 rtable_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ struct _esp_hkey key = {};
+ const struct _esp_entry *eobj = (const struct _esp_entry *)data;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, eobj->r_spi, eobj->l3num, &any, &eobj->r_ip, &key);
+ return calculate_hash(&key, len, seed);
+}
+
+static u32 incmpl_table_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ struct _esp_hkey key = {};
+ const struct _esp_entry *eobj = (const struct _esp_entry *)data;
+ u32 net_hmix = net_hash_mix(eobj->net);
+
+ calculate_key(net_hmix, 0, eobj->l3num, &any, &eobj->r_ip, &key);
+ return calculate_hash(&key, len, seed);
+}
+
+static const struct rhashtable_params ltable_params = {
+ .key_len = sizeof(struct _esp_hkey),
+ .head_offset = offsetof(struct _esp_entry, lnode),
+ .hashfn = calculate_hash,
+ .obj_hashfn = ltable_obj_hashfn,
+ .obj_cmpfn = ltable_obj_cmpfn,
+};
+
+static const struct rhashtable_params rtable_params = {
+ .key_len = sizeof(struct _esp_hkey),
+ .head_offset = offsetof(struct _esp_entry, rnode),
+ .hashfn = calculate_hash,
+ .obj_hashfn = rtable_obj_hashfn,
+ .obj_cmpfn = rtable_obj_cmpfn,
+};
+
+static const struct rhashtable_params incmpl_rtable_params = {
+ .key_len = sizeof(struct _esp_hkey),
+ .head_offset = offsetof(struct _esp_entry, incmpl_rlist),
+ .hashfn = calculate_hash,
+ .obj_hashfn = incmpl_table_obj_hashfn,
+ .obj_cmpfn = incmpl_table_obj_cmpfn,
+};
+
+int nf_conntrack_esp_init(void)
+{
+ int ret;
+
+ ret = rhashtable_init(<able, <able_params);
+ if (ret)
+ return ret;
+
+ ret = rhashtable_init(&rtable, &rtable_params);
+ if (ret)
+ goto err_free_ltable;
+
+ ret = rhltable_init(&incmpl_rtable, &incmpl_rtable_params);
+ if (ret)
+ goto err_free_rtable;
+
+ return ret;
+
+err_free_rtable:
+ rhashtable_destroy(&rtable);
+err_free_ltable:
+ rhashtable_destroy(<able);
+
+ return ret;
+}
+
+void nf_conntrack_esp_init_net(struct net *net)
+{
+ int i;
+ struct nf_esp_net *net_esp = esp_pernet(net);
+
+ spin_lock_init(&net_esp->id_list_lock);
+ INIT_LIST_HEAD(&net_esp->id_list);
+
+ for (i = 0; i < ESP_CT_MAX; i++)
+ net_esp->esp_timeouts[i] = esp_timeouts[i];
+}
+
+static struct _esp_entry *find_esp_entry_by_id(struct nf_esp_net *esp_net, int esp_id)
+{
+ struct list_head *pos, *head;
+ struct _esp_entry *esp_entry;
+
+ head = &esp_net->id_list;
+ list_for_each(pos, head) {
+ esp_entry = list_entry(pos, struct _esp_entry, net_node);
+ if (esp_entry->esp_id == esp_id)
+ return esp_entry;
+ }
+ return NULL;
+}
+
+static void free_esp_entry(struct nf_conntrack_net *cnet, struct _esp_entry *esp_entry)
+{
+ if (esp_entry) {
+ /* Remove from all the hash tables */
+ pr_debug("Removing entry %x from all tables", esp_entry->esp_id);
+ list_del(&esp_entry->net_node);
+ rhashtable_remove_fast(<able, &esp_entry->lnode, ltable_params);
+ rhashtable_remove_fast(&rtable, &esp_entry->rnode, rtable_params);
+ rhltable_remove(&incmpl_rtable, &esp_entry->incmpl_rlist, incmpl_rtable_params);
+ clear_bit(esp_entry->esp_id - TEMP_SPI_START, cnet->esp_id_map);
+ kfree(esp_entry);
+ }
+}
+
+/* Free an entry referred to by esp_id.
+ *
+ * NOTE:
+ * Per net linked list locking and unlocking is the responsibility of the calling function.
+ * Range checking is the responsibility of the calling function.
+ */
+static void free_esp_entry_by_id(struct net *net, int esp_id)
+{
+ struct nf_esp_net *esp_net = esp_pernet(net);
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct _esp_entry *esp_entry = find_esp_entry_by_id(esp_net, esp_id);
+
+ free_esp_entry(cnet, esp_entry);
+}
+
+/* Allocate the first available IPSEC table entry.
+ * NOTE: This function may block on per net list lock.
+ */
+struct _esp_entry *alloc_esp_entry(struct net *net)
+{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_esp_net *esp_net = esp_pernet(net);
+ struct _esp_entry *esp_entry;
+ int id;
+
+again:
+ id = find_first_zero_bit(cnet->esp_id_map, 1024);
+ if (id >= 1024)
+ return NULL;
+
+ if (test_and_set_bit(id, cnet->esp_id_map))
+ goto again; /* raced */
+
+ esp_entry = kmalloc(sizeof(*esp_entry), GFP_ATOMIC);
+ if (!esp_entry) {
+ clear_bit(id, cnet->esp_id_map);
+ return NULL;
+ }
+
+ esp_entry->esp_id = id + TEMP_SPI_START;
+ esp_entry->alloc_time_jiffies = nfct_time_stamp;
+ esp_entry->net = net;
+
+ spin_lock(&esp_net->id_list_lock);
+ list_add(&esp_entry->net_node, &esp_net->id_list);
+ spin_unlock(&esp_net->id_list_lock);
+
+ return esp_entry;
+}
+
+/* Search for an ESP entry in the initial state based on the IP address of
+ * the remote peer.
+ */
+static struct _esp_entry *search_esp_entry_init_remote(struct net *net,
+ u16 l3num,
+ const union nf_inet_addr *src_ip)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ u32 net_hmix = net_hash_mix(net);
+ struct _esp_entry *first_esp_entry = NULL;
+ struct _esp_entry *esp_entry;
+ struct _esp_hkey key = {};
+ struct rhlist_head *pos, *list;
+
+ calculate_key(net_hmix, 0, l3num, &any, src_ip, &key);
+ list = rhltable_lookup(&incmpl_rtable, (const void *)&key, incmpl_rtable_params);
+ rhl_for_each_entry_rcu(esp_entry, pos, list, incmpl_rlist) {
+ if (net_eq(net, esp_entry->net) &&
+ l3num == esp_entry->l3num &&
+ esp_ip_addr_equal(l3num, src_ip, &esp_entry->r_ip)) {
+ if (!first_esp_entry) {
+ first_esp_entry = esp_entry;
+ } else if (first_esp_entry->alloc_time_jiffies - esp_entry->alloc_time_jiffies <= 0) {
+ /* This entry is older than the last one found so treat this
+ * as a better match.
+ */
+ first_esp_entry = esp_entry;
+ }
+ }
+ }
+
+ if (first_esp_entry) {
+ if (first_esp_entry->l3num == AF_INET) {
+ pr_debug("Matches incmpl_rtable entry %x with l_spi %x r_ip %pI4\n",
+ first_esp_entry->esp_id, first_esp_entry->l_spi,
+ &first_esp_entry->r_ip.in);
+ } else {
+ pr_debug("Matches incmpl_rtable entry %x with l_spi %x r_ip %pI6\n",
+ first_esp_entry->esp_id, first_esp_entry->l_spi,
+ &first_esp_entry->r_ip.in6);
+ }
+ }
+
+ return first_esp_entry;
+}
+
+/* Search for an ESP entry by SPI, source and destination IP addresses.
+ * NOTE: This function may block on per net list lock.
+ */
+static struct _esp_entry *search_esp_entry_by_spi(struct net *net, const __u32 spi,
+ u16 l3num,
+ const union nf_inet_addr *src_ip,
+ const union nf_inet_addr *dst_ip)
+{
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ u32 net_hmix = net_hash_mix(net);
+ struct _esp_entry *esp_entry;
+ struct _esp_hkey key = {};
+
+ /* Check for matching established session or repeated initial LAN side */
+ /* LAN side first */
+ calculate_key(net_hmix, spi, l3num, src_ip, dst_ip, &key);
+ esp_entry = rhashtable_lookup_fast(<able, (const void *)&key, ltable_params);
+ if (esp_entry) {
+ /* When r_spi is set this is an established session. When not set it's
+ * a repeated initial packet from LAN side. But both cases are treated
+ * the same.
+ */
+ if (esp_entry->l3num == AF_INET) {
+ pr_debug("Matches ltable entry %x with l_spi %x l_ip %pI4 r_ip %pI4\n",
+ esp_entry->esp_id, esp_entry->l_spi,
+ &esp_entry->l_ip.in, &esp_entry->r_ip.in);
+ } else {
+ pr_debug("Matches ltable entry %x with l_spi %x l_ip %pI6 r_ip %pI6\n",
+ esp_entry->esp_id, esp_entry->l_spi,
+ &esp_entry->l_ip.in6, &esp_entry->r_ip.in6);
+ }
+ return esp_entry;
+ }
+
+ /* Established remote side */
+ calculate_key(net_hmix, spi, l3num, &any, src_ip, &key);
+ esp_entry = rhashtable_lookup_fast(&rtable, (const void *)&key, rtable_params);
+ if (esp_entry) {
+ if (esp_entry->l3num == AF_INET) {
+ pr_debug("Matches rtable entry %x with l_spi %x r_spi %x l_ip %pI4 r_ip %pI4\n",
+ esp_entry->esp_id, esp_entry->l_spi, esp_entry->r_spi,
+ &esp_entry->l_ip.in, &esp_entry->r_ip.in);
+ } else {
+ pr_debug("Matches rtable entry %x with l_spi %x r_spi %x l_ip %pI6 r_ip %pI6\n",
+ esp_entry->esp_id, esp_entry->l_spi, esp_entry->r_spi,
+ &esp_entry->l_ip.in6, &esp_entry->r_ip.in6);
+ }
+ return esp_entry;
+ }
+
+ /* Incomplete remote side, check if packet has a missing r_spi */
+ esp_entry = search_esp_entry_init_remote(net, l3num, src_ip);
+ if (esp_entry) {
+ int err;
+
+ esp_entry->r_spi = spi;
+ /* Remove entry from incmpl_rtable and add to rtable */
+ rhltable_remove(&incmpl_rtable, &esp_entry->incmpl_rlist, incmpl_rtable_params);
+ /* Error will not be due to duplicate as established remote side lookup
+ * above would have found it. Delete entry.
+ */
+ err = rhashtable_insert_fast(&rtable, &esp_entry->rnode, rtable_params);
+ if (err) {
+ struct nf_esp_net *esp_net = esp_pernet(net);
+
+ spin_lock(&esp_net->id_list_lock);
+ free_esp_entry_by_id(net, esp_entry->esp_id);
+ spin_unlock(&esp_net->id_list_lock);
+ return NULL;
+ }
+ return esp_entry;
+ }
+
+ if (l3num == AF_INET) {
+ pr_debug("No entry matches for spi %x src_ip %pI4 dst_ip %pI4\n",
+ spi, &src_ip->in, &dst_ip->in);
+ } else {
+ pr_debug("No entry matches for spi %x src_ip %pI6 dst_ip %pI6\n",
+ spi, &src_ip->in6, &dst_ip->in6);
+ }
+ return NULL;
+}
+
+/* invert esp part of tuple */
+bool nf_conntrack_invert_esp_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->dst.u.esp.id = orig->dst.u.esp.id;
+ tuple->src.u.esp.id = orig->src.u.esp.id;
+ return true;
+}
+
+/* esp hdr info to tuple */
+bool esp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct net *net, struct nf_conntrack_tuple *tuple)
+{
+ struct esphdr _esphdr, *esphdr;
+ struct _esp_entry *esp_entry;
+ u32 spi;
+
+ esphdr = skb_header_pointer(skb, dataoff, sizeof(_esphdr), &_esphdr);
+ if (!esphdr) {
+ /* try to behave like "nf_conntrack_proto_generic" */
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+ return true;
+ }
+ spi = ntohl(esphdr->spi);
+
+ /* Check if esphdr already associated with a pre-existing connection:
+ * if no, create a new connection, missing the r_spi;
+ * if yes, check if we have seen the source IP:
+ * if no, fill in r_spi in the pre-existing connection.
+ */
+ esp_entry = search_esp_entry_by_spi(net, spi, tuple->src.l3num,
+ &tuple->src.u3, &tuple->dst.u3);
+ if (!esp_entry) {
+ struct _esp_hkey key = {};
+ const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT };
+ u32 net_hmix = net_hash_mix(net);
+ struct nf_esp_net *esp_net = esp_pernet(net);
+ struct _esp_entry *esp_entry_old;
+ int err;
+
+ esp_entry = alloc_esp_entry(net);
+ if (!esp_entry) {
+ pr_debug("All esp connection slots in use\n");
+ return false;
+ }
+ esp_entry->l_spi = spi;
+ esp_entry->l3num = tuple->src.l3num;
+ esp_ip_addr_copy(esp_entry->l3num, &esp_entry->l_ip, &tuple->src.u3);
+ esp_ip_addr_copy(esp_entry->l3num, &esp_entry->r_ip, &tuple->dst.u3);
+
+ /* Add entries to the hash tables */
+
+ calculate_key(net_hmix, esp_entry->l_spi, esp_entry->l3num, &esp_entry->l_ip,
+ &esp_entry->r_ip, &key);
+ esp_entry_old = rhashtable_lookup_get_insert_key(<able, &key, &esp_entry->lnode,
+ ltable_params);
+ if (esp_entry_old) {
+ spin_lock(&esp_net->id_list_lock);
+
+ if (IS_ERR(esp_entry_old)) {
+ free_esp_entry_by_id(net, esp_entry->esp_id);
+ spin_unlock(&esp_net->id_list_lock);
+ return false;
+ }
+
+ free_esp_entry_by_id(net, esp_entry->esp_id);
+ spin_unlock(&esp_net->id_list_lock);
+
+ /* insertion raced, use existing entry */
+ esp_entry = esp_entry_old;
+ }
+ /* esp_entry_old == NULL -- insertion successful */
+
+ calculate_key(net_hmix, 0, esp_entry->l3num, &any, &esp_entry->r_ip, &key);
+ err = rhltable_insert_key(&incmpl_rtable, (const void *)&key,
+ &esp_entry->incmpl_rlist, incmpl_rtable_params);
+ if (err) {
+ spin_lock(&esp_net->id_list_lock);
+ free_esp_entry_by_id(net, esp_entry->esp_id);
+ spin_unlock(&esp_net->id_list_lock);
+ return false;
+ }
+
+ if (esp_entry->l3num == AF_INET) {
+ pr_debug("New entry %x with l_spi %x l_ip %pI4 r_ip %pI4\n",
+ esp_entry->esp_id, esp_entry->l_spi,
+ &esp_entry->l_ip.in, &esp_entry->r_ip.in);
+ } else {
+ pr_debug("New entry %x with l_spi %x l_ip %pI6 r_ip %pI6\n",
+ esp_entry->esp_id, esp_entry->l_spi,
+ &esp_entry->l_ip.in6, &esp_entry->r_ip.in6);
+ }
+ }
+
+ tuple->dst.u.esp.id = esp_entry->esp_id;
+ tuple->src.u.esp.id = esp_entry->esp_id;
+ return true;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+/* print private data for conntrack */
+static void esp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+{
+ seq_printf(s, "l_spi=%x, r_spi=%x ", ct->proto.esp.l_spi, ct->proto.esp.r_spi);
+}
+#endif
+
+/* Returns verdict for packet, and may modify conntrack */
+int nf_conntrack_esp_packet(struct nf_conn *ct, struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
+{
+ int esp_id;
+ struct nf_conntrack_tuple *tuple;
+ unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+ struct nf_esp_net *esp_net = esp_pernet(nf_ct_net(ct));
+
+ if (!timeouts)
+ timeouts = esp_net->esp_timeouts;
+
+ /* If we've seen traffic both ways, this is some kind of ESP
+ * stream. Extend timeout.
+ */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[ESP_CT_REPLIED]);
+ /* Also, more likely to be important, and not a probe */
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) {
+ /* Was originally IPCT_STATUS but this is no longer an option.
+ * GRE uses assured for same purpose
+ */
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+
+ /* Retrieve SPIs of original and reply from esp_entry.
+ * Both directions should contain the same esp_entry,
+ * so just check the first one.
+ */
+ tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
+
+ esp_id = tuple->src.u.esp.id;
+ if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX) {
+ struct _esp_entry *esp_entry;
+
+ spin_lock(&esp_net->id_list_lock);
+ esp_entry = find_esp_entry_by_id(esp_net, esp_id);
+ spin_unlock(&esp_net->id_list_lock);
+
+ if (esp_entry) {
+ ct->proto.esp.l_spi = esp_entry->l_spi;
+ ct->proto.esp.r_spi = esp_entry->r_spi;
+ }
+ }
+ }
+ } else {
+ nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[ESP_CT_UNREPLIED]);
+ }
+
+ return NF_ACCEPT;
+}
+
+void nf_ct_esp_pernet_flush(struct net *net)
+{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_esp_net *esp_net = esp_pernet(net);
+ struct list_head *pos, *tmp, *head = &esp_net->id_list;
+ struct _esp_entry *esp_entry;
+
+ spin_lock(&esp_net->id_list_lock);
+ list_for_each_safe(pos, tmp, head) {
+ esp_entry = list_entry(pos, struct _esp_entry, net_node);
+ free_esp_entry(cnet, esp_entry);
+ }
+ spin_unlock(&esp_net->id_list_lock);
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory
+ */
+void destroy_esp_conntrack_entry(struct nf_conn *ct)
+{
+ struct nf_conntrack_tuple *tuple;
+ enum ip_conntrack_dir dir;
+ int esp_id;
+ struct net *net = nf_ct_net(ct);
+ struct nf_esp_net *esp_net = esp_pernet(net);
+
+ /* Probably all the ESP entries referenced in this connection are the same,
+ * but the free function handles repeated frees, so best to do them all.
+ */
+ for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
+ tuple = nf_ct_tuple(ct, dir);
+
+ spin_lock(&esp_net->id_list_lock);
+
+ esp_id = tuple->src.u.esp.id;
+ if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX)
+ free_esp_entry_by_id(net, esp_id);
+ tuple->src.u.esp.id = 0;
+
+ esp_id = tuple->dst.u.esp.id;
+ if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX)
+ free_esp_entry_by_id(net, esp_id);
+ tuple->dst.u.esp.id = 0;
+
+ spin_unlock(&esp_net->id_list_lock);
+ }
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int esp_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ if (nla_put_be16(skb, CTA_PROTO_SRC_ESP_ID, t->src.u.esp.id) ||
+ nla_put_be16(skb, CTA_PROTO_DST_ESP_ID, t->dst.u.esp.id))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy esp_nla_policy[CTA_PROTO_MAX + 1] = {
+ [CTA_PROTO_SRC_ESP_ID] = { .type = NLA_U16 },
+ [CTA_PROTO_DST_ESP_ID] = { .type = NLA_U16 },
+};
+
+static int esp_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t,
+ u32 flags)
+{
+ if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_ESP_ID)) {
+ if (!tb[CTA_PROTO_SRC_ESP_ID])
+ return -EINVAL;
+
+ t->src.u.esp.id = nla_get_be16(tb[CTA_PROTO_SRC_ESP_ID]);
+ }
+
+ if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_ESP_ID)) {
+ if (!tb[CTA_PROTO_DST_ESP_ID])
+ return -EINVAL;
+
+ t->dst.u.esp.id = nla_get_be16(tb[CTA_PROTO_DST_ESP_ID]);
+ }
+
+ return 0;
+}
+
+static unsigned int esp_nlattr_tuple_size(void)
+{
+ return nla_policy_len(esp_nla_policy, CTA_PROTO_MAX + 1);
+}
+#endif
+
+/* protocol helper struct */
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_esp = {
+ .l4proto = IPPROTO_ESP,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+ .print_conntrack = esp_print_conntrack,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .tuple_to_nlattr = esp_tuple_to_nlattr,
+ .nlattr_tuple_size = esp_nlattr_tuple_size,
+ .nlattr_to_tuple = esp_nlattr_to_tuple,
+ .nla_policy = esp_nla_policy,
+#endif
+};
@@ -88,6 +88,14 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.gre.key),
ntohs(tuple->dst.u.gre.key));
break;
+ case IPPROTO_ESP:
+ /* Both src and dest esp.id should be equal but showing both
+ * will help find errors.
+ */
+ seq_printf(s, "srcid=0x%x dstid=0x%x ",
+ ntohs(tuple->src.u.esp.id),
+ ntohs(tuple->dst.u.esp.id));
+ break;
default:
break;
}
@@ -19,7 +19,9 @@
#define CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE (1 << 9)
#define CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE (1 << 10)
#define CTA_FILTER_F_CTA_PROTO_ICMPV6_ID (1 << 11)
-#define CTA_FILTER_F_MAX (1 << 12)
+#define CTA_FILTER_F_CTA_PROTO_SRC_ESP_ID (1 << 12)
+#define CTA_FILTER_F_CTA_PROTO_DST_ESP_ID (1 << 13)
+#define CTA_FILTER_F_MAX (1 << 14)
#define CTA_FILTER_F_ALL (CTA_FILTER_F_MAX-1)
#define CTA_FILTER_FLAG(ctattr) CTA_FILTER_F_ ## ctattr
new file mode 100755
@@ -0,0 +1,268 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that related ESP connections are tracked via spi.
+#
+# Setup is:
+#
+# nsclient3(veth0) -> (veth2)
+# (br0)nsrouter1(veth1) -> (veth1)nsrouter2 -> (veth0)nsclient2
+# nsclient1(veth0) -> (veth0)
+# Setup xfrm esp connections for IPv4 and IPv6 and check they are tracked.
+#
+# In addition, nsrouter1 will perform IP masquerading. If nsrouter1 does not support esp
+# connection tracking, it will be unable to tell the difference between packets from nsclient2 to
+# either nsclient1 or nsclient3.
+#
+# ESP connections (for IPv6) need to use tunnel mode, as ICMPv6 computes checksum over encapsulating
+# IP header addresses.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsclient3 nsrouter1 nsrouter2 nsclient2"
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ipv4() {
+ echo -n 192.168.$1.$2
+}
+
+ipv6 () {
+ echo -n dead:$1::$2
+}
+
+cleanup() {
+ for n in $ns_all; do ip netns del $n;done
+}
+
+check_counter()
+{
+ local ns_name=$1
+ local name="unknown"
+ local expect="packets 0 bytes 0"
+ local lret=0
+
+ cnt=$(ip netns exec $ns_name nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns_name has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns_name nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+ return $lret
+}
+
+check_unknown()
+{
+ for n in nsrouter1 nsrouter2; do
+ check_counter $n
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+ return 0
+}
+
+check_conntrack()
+{
+ local ret=0
+
+ for p in ipv4 ipv6; do
+ cnt=$(ip netns exec nsrouter1 conntrack -f $p -L 2>&1)
+ # Check tracked connection was esp by port (conntrack shows unknown at the moment)
+ local num=$(echo -e "$cnt" | grep -cE "[a-zA-Z]+ +50")
+ if [ $? -ne 0 ] || [ "x$num" != "x2" ]; then
+ echo -e "ERROR: expect to see two conntrack esp flows for $p:\n $cnt" 1>&2
+ ret=1
+ fi
+ done
+ return $ret
+}
+
+for n in $ns_all; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+ip link add veth0 netns nsclient1 type veth peer name veth0 netns nsrouter1
+ip link add veth0 netns nsclient3 type veth peer name veth2 netns nsrouter1
+ip link add br0 netns nsrouter1 type bridge
+ip -net nsrouter1 link set veth0 master br0
+ip -net nsrouter1 link set veth2 master br0
+ip link add veth1 netns nsrouter1 type veth peer name veth1 netns nsrouter2
+ip link add veth0 netns nsrouter2 type veth peer name veth0 netns nsclient2
+
+for n in $ns_all; do
+ ip -net $n link set veth0 up
+done
+ip -net nsrouter1 link set veth1 up
+ip -net nsrouter1 link set veth2 up
+ip -net nsrouter1 link set br0 up
+ip -net nsrouter2 link set veth1 up
+
+for i in 1 2; do
+ ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+ ip -net nsclient$i addr add $(ipv6 $i 2)/64 dev veth0
+ ip -net nsclient$i route add default via $(ipv4 $i 1)
+ ip -net nsclient$i -6 route add default via $(ipv6 $i 1)
+
+ ip -net nsrouter$i addr add $(ipv4 3 $i)/24 dev veth1
+ ip -net nsrouter$i addr add $(ipv6 3 $i)/64 dev veth1
+done
+ip -net nsrouter1 addr add $(ipv4 1 1)/24 dev br0
+ip -net nsrouter1 addr add $(ipv6 1 1)/64 dev br0
+ip -net nsrouter2 addr add $(ipv4 2 1)/24 dev veth0
+ip -net nsrouter2 addr add $(ipv6 2 1)/64 dev veth0
+
+ip -net nsclient3 addr add $(ipv4 1 3)/24 dev veth0
+ip -net nsclient3 addr add $(ipv6 1 3)/64 dev veth0
+ip -net nsclient3 route add default via $(ipv4 1 1)
+ip -net nsclient3 -6 route add default via $(ipv6 1 1)
+
+ip -net nsrouter1 route add default via $(ipv4 3 2)
+ip -net nsrouter1 -6 route add default via $(ipv6 3 2)
+ip -net nsrouter2 route add default via $(ipv4 3 1)
+ip -net nsrouter2 -6 route add default via $(ipv6 3 1)
+
+for i in 1 2; do
+ ip netns exec nsrouter$i sysctl -q net.ipv4.conf.all.forwarding=1
+ ip netns exec nsrouter$i sysctl -q net.ipv6.conf.all.forwarding=1
+done
+
+for i in 1 2; do
+ ip netns exec nsrouter$i nft -f - <<-EOF
+ table inet filter {
+ counter unknown { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto esp ct state new,established accept
+ counter name "unknown" accept
+ }
+ }
+ EOF
+done
+
+for i in 1 2; do
+ ip netns exec nsrouter1 nft -f - <<-EOF
+ table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ oifname "veth1" counter masquerade
+ }
+ }
+ table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ oifname "veth1" counter masquerade
+ }
+ }
+ EOF
+done
+sleep 2
+
+ip_tunnel() {
+ ip -net nsclient$2 tunnel add tunnel$1 mode vti${1%4} local $3 remote $4 key 0x$1
+ ip -net nsclient$2 link set tunnel$1 up
+}
+
+ip_xfrm() {
+ ip -net nsclient$2 xfrm state add src $4 dst $5 \
+ proto esp spi 0x$1$2$3 mode tunnel mark 0x$1 \
+ sel src $6 dst $7 \
+ auth-trunc 'hmac(sha256)' \
+ 0x0000000000000000000000000000000000000000000000000000000000000$1$2$3 128 \
+ enc 'cbc(aes)' \
+ 0x0000000000000000000000000000000000000000000000000000000000000$1$2$3
+
+ ip -net nsclient$2 xfrm state add src $5 dst $4 \
+ proto esp spi 0x$1$3$2 mode tunnel mark 0x$1 \
+ sel src $7 dst $6 \
+ auth-trunc 'hmac(sha256)' \
+ 0x0000000000000000000000000000000000000000000000000000000000000$1$3$2 128 \
+ enc 'cbc(aes)' \
+ 0x0000000000000000000000000000000000000000000000000000000000000$1$3$2
+
+ ip -net nsclient$2 xfrm policy add src $7 dst $6 dir in mark 0x$1 \
+ tmpl src $5 dst $4 proto esp mode tunnel
+ ip -net nsclient$2 xfrm policy add src $6 dst $7 dir out mark 0x$1 \
+ tmpl src $4 dst $5 proto esp mode tunnel
+}
+
+ip_tunnel 4 1 $(ipv4 1 2) $(ipv4 2 2)
+ip -net nsclient1 addr add $(ipv4 250 1)/24 dev tunnel4
+ip_xfrm 4 1 2 $(ipv4 1 2) $(ipv4 2 2) $(ipv4 250 1) $(ipv4 250 2)
+
+ip_tunnel 4 3 $(ipv4 1 3) $(ipv4 2 2)
+ip -net nsclient3 addr add $(ipv4 251 1)/24 dev tunnel4
+ip_xfrm 4 3 2 $(ipv4 1 3) $(ipv4 2 2) $(ipv4 251 1) $(ipv4 251 2)
+
+ip_tunnel 4 2 $(ipv4 2 2) $(ipv4 3 1)
+ip -net nsclient2 addr add $(ipv4 250 2)/24 dev tunnel4
+ip -net nsclient2 addr add $(ipv4 251 2)/24 dev tunnel4
+ip_xfrm 4 2 1 $(ipv4 2 2) $(ipv4 3 1) $(ipv4 250 2) $(ipv4 250 1)
+ip_xfrm 4 2 3 $(ipv4 2 2) $(ipv4 3 1) $(ipv4 251 2) $(ipv4 251 1)
+
+
+ip_tunnel 6 1 $(ipv6 1 2) $(ipv6 2 2)
+ip -net nsclient1 addr add $(ipv6 250 1)/64 dev tunnel6
+ip_xfrm 6 1 2 $(ipv6 1 2) $(ipv6 2 2) $(ipv6 250 1) $(ipv6 250 2)
+
+ip_tunnel 6 3 $(ipv6 1 3) $(ipv6 2 2)
+ip -net nsclient3 addr add $(ipv6 251 1)/64 dev tunnel6
+ip_xfrm 6 3 2 $(ipv6 1 3) $(ipv6 2 2) $(ipv6 251 1) $(ipv6 251 2)
+
+ip_tunnel 6 2 $(ipv6 2 2) $(ipv6 3 1)
+ip -net nsclient2 addr add $(ipv6 250 2)/64 dev tunnel6
+ip -net nsclient2 addr add $(ipv6 251 2)/64 dev tunnel6
+ip_xfrm 6 2 1 $(ipv6 2 2) $(ipv6 3 1) $(ipv6 250 2) $(ipv6 250 1)
+ip_xfrm 6 2 3 $(ipv6 2 2) $(ipv6 3 1) $(ipv6 251 2) $(ipv6 251 1)
+
+test_ping() {
+ ip netns exec $1 ping -q -c 1 $2 >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken from $1 to $2" 1>&2
+ fi
+}
+
+test_ping nsclient1 $(ipv4 250 2)
+test_ping nsclient3 $(ipv4 251 2)
+test_ping nsclient1 $(ipv6 250 2)
+test_ping nsclient3 $(ipv6 251 2)
+
+check_conntrack
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: ESP connections were tracked via SPIs"
+else
+ echo "ERROR: ESP connections were not tracked"
+fi
+
+cleanup
+exit $ret