Message ID | 94ca05ca-2871-3da6-e14f-0a9cb48ed2a5@solarflare.com |
---|---|
State | New |
Headers | show |
Series | sfc: EF100 TSO enhancements | expand |
On Wed, Oct 28, 2020 at 9:39 PM Edward Cree <ecree@solarflare.com> wrote: > > The NIC only needs to know where the headers it has to edit (TCP and > inner and outer IPv4) are, which fits GSO_PARTIAL nicely. > It also supports non-PARTIAL offload of UDP tunnels, again just > needing to be told the outer transport offset so that it can edit > the UDP length field. > (It's not clear to me whether the stack will ever use the non-PARTIAL > version with the netdev feature flags we're setting here.) > > Signed-off-by: Edward Cree <ecree@solarflare.com> > --- > drivers/net/ethernet/sfc/ef100_nic.c | 13 ++++++-- > drivers/net/ethernet/sfc/ef100_tx.c | 45 ++++++++++++++++------------ > 2 files changed, 37 insertions(+), 21 deletions(-) > > diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c > index 3148fe770356..bf92cdc60cda 100644 > --- a/drivers/net/ethernet/sfc/ef100_nic.c > +++ b/drivers/net/ethernet/sfc/ef100_nic.c > @@ -182,8 +182,16 @@ static int efx_ef100_init_datapath_caps(struct efx_nic *efx) > if (rc) > return rc; > > - if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) > - efx->net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6; > + if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) { > + struct net_device *net_dev = efx->net_dev; > + netdev_features_t tso = NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_PARTIAL | > + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; > + > + net_dev->features |= tso; > + net_dev->hw_features |= tso; > + net_dev->hw_enc_features |= tso; > + net_dev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; > + } > efx->num_mac_stats = MCDI_WORD(outbuf, > GET_CAPABILITIES_V4_OUT_MAC_STATS_NUM_STATS); > netif_dbg(efx, probe, efx->net_dev, > @@ -1101,6 +1109,7 @@ static int ef100_probe_main(struct efx_nic *efx) > nic_data->efx = efx; > net_dev->features |= efx->type->offload_features; > net_dev->hw_features |= efx->type->offload_features; > + net_dev->hw_enc_features |= efx->type->offload_features; > > /* Populate design-parameter defaults */ > nic_data->tso_max_hdr_len = ESE_EF100_DP_GZ_TSO_MAX_HDR_LEN_DEFAULT; > diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c > index a90e5a9d2a37..d267b12bdaa0 100644 > --- a/drivers/net/ethernet/sfc/ef100_tx.c > +++ b/drivers/net/ethernet/sfc/ef100_tx.c > @@ -54,8 +54,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) > struct efx_nic *efx = tx_queue->efx; > struct ef100_nic_data *nic_data; > struct efx_tx_buffer *buffer; > - struct tcphdr *tcphdr; > - struct iphdr *iphdr; > size_t header_len; > u32 mss; > > @@ -98,20 +96,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) > buffer->unmap_len = 0; > buffer->skb = skb; > ++tx_queue->insert_count; > - > - /* Adjust the TCP checksum to exclude the total length, since we set > - * ED_INNER_IP_LEN in the descriptor. > - */ > - tcphdr = tcp_hdr(skb); > - if (skb_is_gso_v6(skb)) { > - tcphdr->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, > - &ipv6_hdr(skb)->daddr, > - 0, IPPROTO_TCP, 0); > - } else { > - iphdr = ip_hdr(skb); > - tcphdr->check = ~csum_tcpudp_magic(iphdr->saddr, iphdr->daddr, > - 0, IPPROTO_TCP, 0); > - } > return true; > } > > @@ -209,17 +193,35 @@ static void ef100_make_tso_desc(struct efx_nic *efx, > ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; > u16 vlan_enable = efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX ? > skb_vlan_tag_present(skb) : 0; > + bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; > unsigned int len, ip_offset, tcp_offset, payload_segs; > + unsigned int outer_ip_offset, outer_l4_offset; > u16 vlan_tci = skb_vlan_tag_get(skb); > u32 mss = skb_shinfo(skb)->gso_size; > + bool encap = skb->encapsulation; > + struct tcphdr *tcp; > + u32 paylen; > > len = skb->len - buffer->len; > /* We use 1 for the TSO descriptor and 1 for the header */ > payload_segs = segment_count - 2; > - ip_offset = skb_network_offset(skb); > - tcp_offset = skb_transport_offset(skb); > + if (encap) { > + outer_ip_offset = skb_network_offset(skb); > + outer_l4_offset = skb_transport_offset(skb); > + ip_offset = skb_inner_network_offset(skb); > + tcp_offset = skb_inner_transport_offset(skb); > + } else { > + ip_offset = skb_network_offset(skb); > + tcp_offset = skb_transport_offset(skb); > + outer_ip_offset = outer_l4_offset = 0; > + } > + > + /* subtract TCP payload length from inner checksum */ > + tcp = (void *)skb->data + tcp_offset; > + paylen = skb->len - tcp_offset; > + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); > > - EFX_POPULATE_OWORD_13(*txd, > + EFX_POPULATE_OWORD_17(*txd, > ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, > ESF_GZ_TX_TSO_MSS, mss, > ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, > @@ -231,6 +233,11 @@ static void ef100_make_tso_desc(struct efx_nic *efx, > ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, > ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, > ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, > + ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, > + ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, > + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, encap && !gso_partial, This is a boolean field to signal whether the NIC needs to fix up the udp length field ? Which in the case of GSO_PARTIAL has already been resolved by the gso layer (in __skb_udp_tunnel_segment). Just curious, is this ever expected to be true? Not based on current advertised features, right? > + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : > + ESE_GZ_TX_DESC_IP4_ID_NO_OP, > ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, > ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci > ); >
On 30/10/2020 15:49, Willem de Bruijn wrote: > On Wed, Oct 28, 2020 at 9:39 PM Edward Cree <ecree@solarflare.com> wrote: >> + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, encap && !gso_partial, > > This is a boolean field to signal whether the NIC needs to fix up the > udp length field ? Yes. > Which in the case of GSO_PARTIAL has already been resolved by the gso > layer (in __skb_udp_tunnel_segment). Indeed. > Just curious, is this ever expected to be true? Not based on current > advertised features, right? As I mentioned in the patch description and cover letter, I'm not entirely certain. I don't _think_ the stack will ever give us an encap skb without GSO_PARTIAL with the features we've advertised, but since the hardware supports it I thought it better to handle that case anyway, just in case I'm mistaken. -ed
On Fri, Oct 30, 2020 at 12:16 PM Edward Cree <ecree@solarflare.com> wrote: > > On 30/10/2020 15:49, Willem de Bruijn wrote: > > On Wed, Oct 28, 2020 at 9:39 PM Edward Cree <ecree@solarflare.com> wrote: > >> + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, encap && !gso_partial, > > > > This is a boolean field to signal whether the NIC needs to fix up the > > udp length field ? > Yes. Thanks > > Which in the case of GSO_PARTIAL has already been resolved by the gso > > layer (in __skb_udp_tunnel_segment). > Indeed. > > > Just curious, is this ever expected to be true? Not based on current > > advertised features, right? > As I mentioned in the patch description and cover letter, I'm not > entirely certain. I don't _think_ the stack will ever give us an > encap skb without GSO_PARTIAL with the features we've advertised, That's my understanding too. > but since the hardware supports it I thought it better to handle > that case anyway, just in case I'm mistaken. Then you could (as follow-up) advertise without GSO_PARTIAL and avoid the whole transition through the gso layer?
On 30/10/2020 16:26, Willem de Bruijn wrote: > Then you could (as follow-up) advertise without GSO_PARTIAL and avoid > the whole transition through the gso layer? The thing is, non-PARTIAL offload only supports tunnels that the NIC understands (single-layer UDP tunnels), but AIUI GSO_PARTIAL can support all sorts of other things, such as gretaps (though we'd need some more advertised features, I haven't figured out quite which ones yet but when I do and can test it I'll send a follow-up) and nested tunnels (as long as we don't need to edit the 'middle' IP ID, e.g. if it's DF or IPv6). So we definitely want to advertise GSO_PARTIAL support. But possibly I don't need to have NETIF_F_GSO_UDP_TUNNEL[_CSUM] in net_dev->gso_partial_features? -ed
On Fri, Oct 30, 2020 at 12:43 PM Edward Cree <ecree@solarflare.com> wrote: > > On 30/10/2020 16:26, Willem de Bruijn wrote: > > Then you could (as follow-up) advertise without GSO_PARTIAL and avoid > > the whole transition through the gso layer? > > The thing is, non-PARTIAL offload only supports tunnels that the NIC > understands (single-layer UDP tunnels), but AIUI GSO_PARTIAL can > support all sorts of other things, such as gretaps (though we'd need > some more advertised features, I haven't figured out quite which > ones yet but when I do and can test it I'll send a follow-up) and > nested tunnels (as long as we don't need to edit the 'middle' IP ID, > e.g. if it's DF or IPv6). So we definitely want to advertise > GSO_PARTIAL support. > But possibly I don't need to have NETIF_F_GSO_UDP_TUNNEL[_CSUM] in > net_dev->gso_partial_features? If the device can handle fixing the odd last segment length, indeed. If you remove them from gso_partial_flags, then gso_features_check won't mask them out /* Support for GSO partial features requires software * intervention before we can actually process the packets * so we need to strip support for any partial features now * and we can pull them back in after we have partially * segmented the frame. */ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) features &= ~dev->gso_partial_features; as called in validate_xmit_skb prior to testing whether the packet can be passed to the nic as is in netif_needs_gso. Until adding other tunnel types like NETIF_F_GSO_GRE_CSUM, for this device gso_partial_features would then be 0 and thus NETIF_F_GSO_PARTIAL is not needed at all?
On 30/10/2020 17:33, Willem de Bruijn wrote: > On Fri, Oct 30, 2020 at 12:43 PM Edward Cree <ecree@solarflare.com> wrote: >> But possibly I don't need to have NETIF_F_GSO_UDP_TUNNEL[_CSUM] in >> net_dev->gso_partial_features? > If the device can handle fixing the odd last segment length, indeed. It can, but... I thought Linux didn't give drivers odd last segments any more. At least I'm fairly sure that in the (non-PARTIAL) non-encap TSO tests I've done, the GSO skbs we've been given have all been a whole number of mss long. Which means I haven't been able to test that the device gets it right. > Until adding other tunnel types like NETIF_F_GSO_GRE_CSUM, for this > device gso_partial_features would then be 0 and thus > NETIF_F_GSO_PARTIAL is not needed at all? Unless the kernel supports GSO_PARTIAL of nested tunnels. The device will handle (say) VxLAN-in-VxLAN just fine, as long as you don't want it to update the middle IPID; and being able to cope with crazy things like that was (I thought) part of the point of GSO_PARTIAL. Indeed, given that GSO_PARTIAL is supposed to be a non-protocol- ossified design, it seems a bit silly to me that we have to specify a list of other NETIF_F_GSO_foo, rather than just being able to say "I can handle anything of the form ETH-IP-gubbins-IP-TCP" and let the kernel put whatever it likes — VxLAN, GRE, fou-over-geneve with cherry and sprinkles — in the 'gubbins'. Wasn't that what 'less is more' was supposed to be all about? -ed
On Fri, Oct 30, 2020 at 2:14 PM Edward Cree <ecree@solarflare.com> wrote: > > On 30/10/2020 17:33, Willem de Bruijn wrote: > > On Fri, Oct 30, 2020 at 12:43 PM Edward Cree <ecree@solarflare.com> wrote: > >> But possibly I don't need to have NETIF_F_GSO_UDP_TUNNEL[_CSUM] in > >> net_dev->gso_partial_features? > > If the device can handle fixing the odd last segment length, indeed. > It can, but... > I thought Linux didn't give drivers odd last segments any more. At > least I'm fairly sure that in the (non-PARTIAL) non-encap TSO tests > I've done, the GSO skbs we've been given have all been a whole > number of mss long. > Which means I haven't been able to test that the device gets it right. Perhaps the local TCP stack tries to avoid it. Off the top of my head not sure if this is assured in all edge cases. The forwarding path is another wildcard. > > Until adding other tunnel types like NETIF_F_GSO_GRE_CSUM, for this > > device gso_partial_features would then be 0 and thus > > NETIF_F_GSO_PARTIAL is not needed at all? > Unless the kernel supports GSO_PARTIAL of nested tunnels. The device > will handle (say) VxLAN-in-VxLAN just fine, as long as you don't want > it to update the middle IPID; and being able to cope with crazy > things like that was (I thought) part of the point of GSO_PARTIAL. Oh right. > Indeed, given that GSO_PARTIAL is supposed to be a non-protocol- > ossified design, it seems a bit silly to me that we have to specify > a list of other NETIF_F_GSO_foo, rather than just being able to say > "I can handle anything of the form ETH-IP-gubbins-IP-TCP" and let > the kernel put whatever it likes — VxLAN, GRE, fou-over-geneve with > cherry and sprinkles — in the 'gubbins'. Wasn't that what 'less is > more' was supposed to be all about? > > -ed
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 3148fe770356..bf92cdc60cda 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -182,8 +182,16 @@ static int efx_ef100_init_datapath_caps(struct efx_nic *efx) if (rc) return rc; - if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) - efx->net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6; + if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) { + struct net_device *net_dev = efx->net_dev; + netdev_features_t tso = NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; + + net_dev->features |= tso; + net_dev->hw_features |= tso; + net_dev->hw_enc_features |= tso; + net_dev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; + } efx->num_mac_stats = MCDI_WORD(outbuf, GET_CAPABILITIES_V4_OUT_MAC_STATS_NUM_STATS); netif_dbg(efx, probe, efx->net_dev, @@ -1101,6 +1109,7 @@ static int ef100_probe_main(struct efx_nic *efx) nic_data->efx = efx; net_dev->features |= efx->type->offload_features; net_dev->hw_features |= efx->type->offload_features; + net_dev->hw_enc_features |= efx->type->offload_features; /* Populate design-parameter defaults */ nic_data->tso_max_hdr_len = ESE_EF100_DP_GZ_TSO_MAX_HDR_LEN_DEFAULT; diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index a90e5a9d2a37..d267b12bdaa0 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -54,8 +54,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct efx_nic *efx = tx_queue->efx; struct ef100_nic_data *nic_data; struct efx_tx_buffer *buffer; - struct tcphdr *tcphdr; - struct iphdr *iphdr; size_t header_len; u32 mss; @@ -98,20 +96,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) buffer->unmap_len = 0; buffer->skb = skb; ++tx_queue->insert_count; - - /* Adjust the TCP checksum to exclude the total length, since we set - * ED_INNER_IP_LEN in the descriptor. - */ - tcphdr = tcp_hdr(skb); - if (skb_is_gso_v6(skb)) { - tcphdr->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); - } else { - iphdr = ip_hdr(skb); - tcphdr->check = ~csum_tcpudp_magic(iphdr->saddr, iphdr->daddr, - 0, IPPROTO_TCP, 0); - } return true; } @@ -209,17 +193,35 @@ static void ef100_make_tso_desc(struct efx_nic *efx, ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; u16 vlan_enable = efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX ? skb_vlan_tag_present(skb) : 0; + bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; unsigned int len, ip_offset, tcp_offset, payload_segs; + unsigned int outer_ip_offset, outer_l4_offset; u16 vlan_tci = skb_vlan_tag_get(skb); u32 mss = skb_shinfo(skb)->gso_size; + bool encap = skb->encapsulation; + struct tcphdr *tcp; + u32 paylen; len = skb->len - buffer->len; /* We use 1 for the TSO descriptor and 1 for the header */ payload_segs = segment_count - 2; - ip_offset = skb_network_offset(skb); - tcp_offset = skb_transport_offset(skb); + if (encap) { + outer_ip_offset = skb_network_offset(skb); + outer_l4_offset = skb_transport_offset(skb); + ip_offset = skb_inner_network_offset(skb); + tcp_offset = skb_inner_transport_offset(skb); + } else { + ip_offset = skb_network_offset(skb); + tcp_offset = skb_transport_offset(skb); + outer_ip_offset = outer_l4_offset = 0; + } + + /* subtract TCP payload length from inner checksum */ + tcp = (void *)skb->data + tcp_offset; + paylen = skb->len - tcp_offset; + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); - EFX_POPULATE_OWORD_13(*txd, + EFX_POPULATE_OWORD_17(*txd, ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, ESF_GZ_TX_TSO_MSS, mss, ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, @@ -231,6 +233,11 @@ static void ef100_make_tso_desc(struct efx_nic *efx, ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, + ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, + ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, encap && !gso_partial, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : + ESE_GZ_TX_DESC_IP4_ID_NO_OP, ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci );
The NIC only needs to know where the headers it has to edit (TCP and inner and outer IPv4) are, which fits GSO_PARTIAL nicely. It also supports non-PARTIAL offload of UDP tunnels, again just needing to be told the outer transport offset so that it can edit the UDP length field. (It's not clear to me whether the stack will ever use the non-PARTIAL version with the netdev feature flags we're setting here.) Signed-off-by: Edward Cree <ecree@solarflare.com> --- drivers/net/ethernet/sfc/ef100_nic.c | 13 ++++++-- drivers/net/ethernet/sfc/ef100_tx.c | 45 ++++++++++++++++------------ 2 files changed, 37 insertions(+), 21 deletions(-)