diff mbox series

[net-next,RFC,v1,03/10] net: Introduce crc offload for tcp ddp ulp

Message ID 20200930162010.21610-4-borisp@mellanox.com
State Superseded
Headers show
Series nvme-tcp receive offloads | expand

Commit Message

Boris Pismenny Sept. 30, 2020, 4:20 p.m. UTC
This commit itroduces support for CRC offload to direct data placement
ULP on the receive side. Both DDP and CRC share a common API to
initialize the offload for a TCP socket. But otherwise, both can
be executed independently.

On the receive side, CRC offload requires a new SKB bit that
indicates that no CRC error was encountered while processing this packet.
If all packets of a ULP message have this bit set, then the CRC
verification for the message can be skipped, as hardware already checked
it.

The following patches will set and use this bit to perform NVME-TCP
CRC offload.

A subsequent series, will add NVMe-TCP transmit side CRC support.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
 include/linux/netdev_features.h | 2 ++
 include/linux/skbuff.h          | 4 ++++
 net/Kconfig                     | 8 ++++++++
 net/ethtool/common.c            | 1 +
 net/ipv4/tcp_input.c            | 7 +++++++
 net/ipv4/tcp_ipv4.c             | 3 +++
 net/ipv4/tcp_offload.c          | 3 +++
 7 files changed, 28 insertions(+)

Comments

Sagi Grimberg Oct. 8, 2020, 9:51 p.m. UTC | #1
> This commit itroduces support for CRC offload to direct data placement
               introduces
> ULP on the receive side. Both DDP and CRC share a common API to
> initialize the offload for a TCP socket. But otherwise, both can
> be executed independently.

 From the API it not clear that the offload engine does crc32c, do you
see this extended to other crc types in the future?

Other than this the patch looks good to me,
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Boris Pismenny Oct. 11, 2020, 2:58 p.m. UTC | #2
On 09/10/2020 0:51, Sagi Grimberg wrote:
>> This commit itroduces support for CRC offload to direct data placement
>                introduces
>> ULP on the receive side. Both DDP and CRC share a common API to
>> initialize the offload for a TCP socket. But otherwise, both can
>> be executed independently.
>  From the API it not clear that the offload engine does crc32c, do you
> see this extended to other crc types in the future?

Yes, it is somewhat implicit, and it depends on the tcp ddp configuration. At the moment we only do nvme-tcp, and that has only CRC32C. If in the future, there would be other CRC variants, or data digest algorithms, then the code could be easily extended.

In general, any data digest over TCP can leverage this code and SKB member. Not only other CRC types can benefit from it, but even more complex data digest algorithms like SHA can use this. In essence this bit is similar to the TLS skb->decrypted bit. In TLS, skb->decrypted also indicates that the authentication has no error, exactly like ddp_crc indicates that there is no CRC32C error. The only reason we didn't use the same bit for both is that these two protocol offloads can be combined and that will benefit from two independent bits.

>
> Other than this the patch looks good to me,
> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
diff mbox series

Patch

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index a0074b244372..27001f5c0be1 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -85,6 +85,7 @@  enum {
 
 	NETIF_F_HW_MACSEC_BIT,		/* Offload MACsec operations */
 	NETIF_F_HW_TCP_DDP_BIT,		/* TCP direct data placement offload */
+	NETIF_F_HW_TCP_DDP_CRC_BIT,	/* TCP DDP CRC offload */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -159,6 +160,7 @@  enum {
 #define NETIF_F_GSO_FRAGLIST	__NETIF_F(GSO_FRAGLIST)
 #define NETIF_F_HW_MACSEC	__NETIF_F(HW_MACSEC)
 #define NETIF_F_HW_TCP_DDP	__NETIF_F(HW_TCP_DDP)
+#define NETIF_F_HW_TCP_DDP_CRC	__NETIF_F(HW_TCP_DDP_CRC)
 
 /* Finds the next feature with the highest number of the range of start till 0.
  */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 04a18e01b362..0530d849ebf2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -858,6 +858,10 @@  struct sk_buff {
 #ifdef CONFIG_TLS_DEVICE
 	__u8			decrypted:1;
 #endif
+#ifdef CONFIG_TCP_DDP_CRC
+	__u8                    ddp_crc:1;
+#endif
+
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
diff --git a/net/Kconfig b/net/Kconfig
index 346e9fa7a6ec..7891b4380e56 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -468,6 +468,14 @@  config TCP_DDP
 	  NVMe-TCP/iSCSI, to request the NIC to place TCP payload data
 	  of a command response directly into kernel pages.
 
+config TCP_DDP_CRC
+	bool "TCP direct data placement CRC offload"
+	default n
+	help
+	  Direct Data Placement (DDP) CRC32C offload for TCP enables ULP, such as
+	  NVMe-TCP/iSCSI, to request the NIC to calculate/verify the data digest
+	  of commands as they go through the NIC. Thus avoiding the costly
+	  per-byte overhead.
 
 endif   # if NET
 
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index a2ff7a4a6bbf..4941670dd25d 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -69,6 +69,7 @@  const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
 	[NETIF_F_GRO_FRAGLIST_BIT] =	 "rx-gro-list",
 	[NETIF_F_HW_MACSEC_BIT] =	 "macsec-hw-offload",
 	[NETIF_F_HW_TCP_DDP_BIT] =	 "tcp-ddp-offload",
+	[NETIF_F_HW_TCP_DDP_CRC_BIT] =	 "tcp-ddp-crc-offload",
 };
 
 const char
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f7b3e37d2198..f8e1e8ebef2c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5099,6 +5099,9 @@  tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 #ifdef CONFIG_TLS_DEVICE
 		nskb->decrypted = skb->decrypted;
+#endif
+#ifdef CONFIG_TCP_DDP_CRC
+		nskb->ddp_crc = skb->ddp_crc;
 #endif
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		if (list)
@@ -5132,6 +5135,10 @@  tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
 #ifdef CONFIG_TLS_DEVICE
 				if (skb->decrypted != nskb->decrypted)
 					goto end;
+#endif
+#ifdef CONFIG_TCP_DDP_CRC
+				if (skb->ddp_crc != nskb->ddp_crc)
+					goto end;
 #endif
 			}
 		}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ace48b2790ff..2693029e6ee7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1791,6 +1791,9 @@  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
 #ifdef CONFIG_TLS_DEVICE
 	    tail->decrypted != skb->decrypted ||
+#endif
+#ifdef CONFIG_TCP_DDP_CRC
+	    tail->ddp_crc != skb->ddp_crc ||
 #endif
 	    thtail->doff != th->doff ||
 	    memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index e09147ac9a99..39f5f0bcf181 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -262,6 +262,9 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 #ifdef CONFIG_TLS_DEVICE
 	flush |= p->decrypted ^ skb->decrypted;
 #endif
+#ifdef CONFIG_TCP_DDP_CRC
+	flush |= p->ddp_crc ^ skb->ddp_crc;
+#endif
 
 	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;