@@ -84,6 +84,7 @@ enum {
NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */
NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */
+ NETIF_F_HW_TCP_DDP_BIT, /* TCP direct data placement offload */
/*
* Add your fresh new feature above and remember to update
@@ -157,6 +158,7 @@ enum {
#define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST)
#define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST)
#define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC)
+#define NETIF_F_HW_TCP_DDP __NETIF_F(HW_TCP_DDP)
/* Finds the next feature with the highest number of the range of start till 0.
*/
@@ -935,6 +935,7 @@ struct dev_ifalias {
struct devlink;
struct tlsdev_ops;
+struct tcp_ddp_dev_ops;
struct netdev_name_node {
struct hlist_node hlist;
@@ -1922,6 +1923,10 @@ struct net_device {
const struct tlsdev_ops *tlsdev_ops;
#endif
+#ifdef CONFIG_TCP_DDP
+ const struct tcp_ddp_dev_ops *tcp_ddp_ops;
+#endif
+
const struct header_ops *header_ops;
unsigned int flags;
@@ -66,6 +66,8 @@ struct inet_connection_sock_af_ops {
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
* @icsk_clean_acked Clean acked data hook
+ * @icsk_ulp_ddp_ops Pluggable ULP direct data placement control hook
+ * @icsk_ulp_ddp_data ULP direct data placement private data
* @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
@@ -94,6 +96,8 @@ struct inet_connection_sock {
const struct tcp_ulp_ops *icsk_ulp_ops;
void __rcu *icsk_ulp_data;
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
+ const struct tcp_ddp_ulp_ops *icsk_ulp_ddp_ops;
+ void __rcu *icsk_ulp_ddp_data;
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:5,
new file mode 100644
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * tcp_ddp.h
+ * Author: Boris Pismenny <borisp@mellanox.com>
+ * Copyright (C) 2020 Mellanox Technologies.
+ */
+#ifndef _TCP_DDP_H
+#define _TCP_DDP_H
+
+#include <linux/blkdev.h>
+#include <linux/netdevice.h>
+#include <net/inet_connection_sock.h>
+#include <net/sock.h>
+
+/* limits returned by the offload driver, zero means don't care */
+struct tcp_ddp_limits {
+ int max_ddp_sgl_len;
+};
+
+enum tcp_ddp_type {
+ TCP_DDP_NVME = 1,
+};
+
+struct tcp_ddp_config {
+ enum tcp_ddp_type type;
+ unsigned char buf[];
+};
+
+struct nvme_tcp_config {
+ struct tcp_ddp_config cfg;
+
+ u16 pfv;
+ u8 cpda;
+ u8 dgst;
+ int queue_size;
+ int queue_id;
+ int io_cpu;
+};
+
+struct tcp_ddp_io {
+ u32 command_id;
+ int nents;
+ struct sg_table sg_table;
+ struct scatterlist first_sgl[SG_CHUNK_SIZE];
+};
+
+struct tcp_ddp_dev_ops {
+ int (*tcp_ddp_limits)(struct net_device *netdev,
+ struct tcp_ddp_limits *limits);
+ int (*tcp_ddp_sk_add)(struct net_device *netdev,
+ struct sock *sk,
+ struct tcp_ddp_config *config);
+ void (*tcp_ddp_sk_del)(struct net_device *netdev,
+ struct sock *sk);
+ int (*tcp_ddp_setup)(struct net_device *netdev,
+ struct sock *sk,
+ struct tcp_ddp_io *io);
+ int (*tcp_ddp_teardown)(struct net_device *netdev,
+ struct sock *sk,
+ struct tcp_ddp_io *io,
+ void *ddp_ctx);
+ void (*tcp_ddp_resync)(struct net_device *netdev,
+ struct sock *sk, u32 seq);
+};
+
+#define TCP_DDP_RESYNC_REQ (1 << 0)
+
+/*
+ * Interface to register uppper layer Direct Data Placement (DDP) TCP offload
+ */
+struct tcp_ddp_ulp_ops {
+ /* NIC requests ulp to indicate if @seq is the start of a message */
+ bool (*resync_request)(struct sock *sk, u32 seq, u32 flags);
+ /* NIC driver informs the ulp that ddp teardown is done */
+ void (*ddp_teardown_done)(void *ddp_ctx);
+};
+
+struct tcp_ddp_ctx {
+ enum tcp_ddp_type type;
+ unsigned char buf[];
+};
+
+static inline struct tcp_ddp_ctx *tcp_ddp_get_ctx(const struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return (__force struct tcp_ddp_ctx *)icsk->icsk_ulp_ddp_data;
+}
+
+#endif //_TCP_DDP_H
@@ -460,6 +460,15 @@ config ETHTOOL_NETLINK
netlink. It provides better extensibility and some new features,
e.g. notification messages.
+config TCP_DDP
+ bool "TCP direct data placement offload"
+ default n
+ help
+ Direct Data Placement (DDP) offload for TCP enables ULP, such as
+ NVMe-TCP/iSCSI, to request the NIC to place TCP payload data
+ of a command response directly into kernel pages.
+
+
endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
@@ -69,6 +69,7 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#include <net/tcp_ddp.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -6059,9 +6060,15 @@ EXPORT_SYMBOL(pskb_extract);
*/
void skb_condense(struct sk_buff *skb)
{
+ bool is_ddp = false;
+
+#ifdef CONFIG_TCP_DDP
+ is_ddp = skb->sk && inet_csk(skb->sk) &&
+ inet_csk(skb->sk)->icsk_ulp_ddp_data;
+#endif
if (skb->data_len) {
if (skb->data_len > skb->end - skb->tail ||
- skb_cloned(skb))
+ skb_cloned(skb) || is_ddp)
return;
/* Nice, we can free page frag(s) right now */
@@ -68,6 +68,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
[NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list",
[NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload",
+ [NETIF_F_HW_TCP_DDP_BIT] = "tcp-ddp-offload",
};
const char