Message ID | 20221108061731.1589327-2-paulliu@debian.org |
---|---|
State | Accepted |
Commit | a3bf193bf4ea8703bcf96b1a34713fb2ae87aa39 |
Headers | show |
Series | add TCP and HTTP for downloading images | expand |
On Tue, Nov 8, 2022 at 8:17 AM Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org> wrote: > > From: "Ying-Chun Liu (PaulLiu)" <paul.liu@linaro.org> > > Currently file transfers are done using tftp or NFS both > over udp. This requires a request to be sent from client > (u-boot) to the boot server. > > The current standard is TCP with selective acknowledgment. > > Signed-off-by: Duncan Hare <DH@Synoia.com> > Signed-off-by: Duncan Hare <DuncanCHare@yahoo.com> > Signed-off-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org> > Reviewed-by: Simon Glass <sjg@chromium.org> > Cc: Christian Gmeiner <christian.gmeiner@gmail.com> > Cc: Joe Hershberger <joe.hershberger@ni.com> > Cc: Michal Simek <michal.simek@xilinx.com> > Cc: Ramon Fried <rfried.dev@gmail.com> > --- > v1-v12: Made by Duncan, didn't tracked. > v13: Fix some issues which is reviewed by Christian > v14: Add options to enable/disable SACK. > v15: Fix various syntax errors reviewed by Michal. > Remove magic numbers. Use kernel-doc format. > v16: Add more kernel-doc. Fix more double spaces. > v19: export tcp_set_pseudo_header() for unit test. > --- > include/net.h | 36 ++- > include/net/tcp.h | 299 +++++++++++++++++++ > net/Kconfig | 16 ++ > net/Makefile | 1 + > net/net.c | 30 ++ > net/tcp.c | 720 ++++++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 1093 insertions(+), 9 deletions(-) > create mode 100644 include/net/tcp.h > create mode 100644 net/tcp.c > > diff --git a/include/net.h b/include/net.h > index 32364ed0ce..f4140523c2 100644 > --- a/include/net.h > +++ b/include/net.h > @@ -365,6 +365,7 @@ struct vlan_ethernet_hdr { > #define PROT_NCSI 0x88f8 /* NC-SI control packets */ > > #define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ > +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ > #define IPPROTO_UDP 17 /* User Datagram Protocol */ > > /* > @@ -690,19 +691,36 @@ static inline void net_send_packet(uchar *pkt, int len) > (void) eth_send(pkt, len); > } > > -/* > - * Transmit "net_tx_packet" as UDP packet, performing ARP request if needed > - * (ether will be populated) > - * > - * @param ether Raw packet buffer > - * @param dest IP address to send the datagram to > - * @param dport Destination UDP port > - * @param sport Source UDP port > - * @param payload_len Length of data after the UDP header > +/** > + * net_send_ip_packet() - Transmit "net_tx_packet" as UDP or TCP packet, > + * send ARP request if needed (ether will be populated) > + * @ether: Raw packet buffer > + * @dest: IP address to send the datagram to > + * @dport: Destination UDP port > + * @sport: Source UDP port > + * @payload_len: Length of data after the UDP header > + * @action: TCP action to be performed > + * @tcp_seq_num: TCP sequence number of this transmission > + * @tcp_ack_num: TCP stream acknolegement number > + * > + * Return: 0 on success, other value on failure > */ > int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > int payload_len, int proto, u8 action, u32 tcp_seq_num, > u32 tcp_ack_num); > +/** > + * net_send_tcp_packet() - Transmit TCP packet. > + * @payload_len: length of payload > + * @dport: Destination TCP port > + * @sport: Source TCP port > + * @action: TCP action to be performed > + * @tcp_seq_num: TCP sequence number of this transmission > + * @tcp_ack_num: TCP stream acknolegement number > + * > + * Return: 0 on success, other value on failure > + */ > +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, > + u32 tcp_seq_num, u32 tcp_ack_num); > int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, > int sport, int payload_len); > > diff --git a/include/net/tcp.h b/include/net/tcp.h > new file mode 100644 > index 0000000000..322551694f > --- /dev/null > +++ b/include/net/tcp.h > @@ -0,0 +1,299 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * TCP Support with SACK for file transfer. > + * > + * Copyright 2017 Duncan Hare, All rights reserved. > + */ > + > +#define TCP_ACTIVITY 127 /* Number of packets received */ > + /* before console progress mark */ > +/** > + * struct ip_tcp_hdr - IP and TCP header > + * @ip_hl_v: header length and version > + * @ip_tos: type of service > + * @ip_len: total length > + * @ip_id: identification > + * @ip_off: fragment offset field > + * @ip_ttl: time to live > + * @ip_p: protocol > + * @ip_sum: checksum > + * @ip_src: Source IP address > + * @ip_dst: Destination IP address > + * @tcp_src: TCP source port > + * @tcp_dst: TCP destination port > + * @tcp_seq: TCP sequence number > + * @tcp_ack: TCP Acknowledgment number > + * @tcp_hlen: 4 bits TCP header Length/4, 4 bits reserved, 2 more bits reserved > + * @tcp_flag: flags of TCP > + * @tcp_win: TCP windows size > + * @tcp_xsum: Checksum > + * @tcp_ugr: Pointer to urgent data > + */ > +struct ip_tcp_hdr { > + u8 ip_hl_v; > + u8 ip_tos; > + u16 ip_len; > + u16 ip_id; > + u16 ip_off; > + u8 ip_ttl; > + u8 ip_p; > + u16 ip_sum; > + struct in_addr ip_src; > + struct in_addr ip_dst; > + u16 tcp_src; > + u16 tcp_dst; > + u32 tcp_seq; > + u32 tcp_ack; > + u8 tcp_hlen; > + u8 tcp_flags; > + u16 tcp_win; > + u16 tcp_xsum; > + u16 tcp_ugr; > +} __packed; > + > +#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) > +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE) > + > +#define TCP_DATA 0x00 /* Data Packet - internal use only */ > +#define TCP_FIN 0x01 /* Finish flag */ > +#define TCP_SYN 0x02 /* Synch (start) flag */ > +#define TCP_RST 0x04 /* reset flag */ > +#define TCP_PUSH 0x08 /* Push - Notify app */ > +#define TCP_ACK 0x10 /* Acknowledgment of data received */ > +#define TCP_URG 0x20 /* Urgent */ > +#define TCP_ECE 0x40 /* Congestion control */ > +#define TCP_CWR 0x80 /* Congestion Control */ > + > +/* > + * TCP header options, Seq, MSS, and SACK > + */ > + > +#define TCP_SACK 32 /* Number of packets analyzed */ > + /* on leading edge of stream */ > + > +#define TCP_O_END 0x00 /* End of option list */ > +#define TCP_1_NOP 0x01 /* Single padding NOP */ > +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ > +#define TCP_O_MSS 0x02 /* MSS Size option */ > +#define TCP_O_SCL 0x03 /* Window Scale option */ > +#define TCP_P_SACK 0x04 /* SACK permitted */ > +#define TCP_V_SACK 0x05 /* SACK values */ > +#define TCP_O_TS 0x08 /* Timestamp option */ > +#define TCP_OPT_LEN_2 0x02 > +#define TCP_OPT_LEN_3 0x03 > +#define TCP_OPT_LEN_4 0x04 > +#define TCP_OPT_LEN_6 0x06 > +#define TCP_OPT_LEN_8 0x08 > +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */ > +#define TCP_MSS 1460 /* Max segment size */ > +#define TCP_SCALE 0x01 /* Scale */ > + > +/** > + * struct tcp_mss - TCP option structure for MSS (Max segment size) > + * @kind: Field ID > + * @len: Field length > + * @mss: Segment size value > + */ > +struct tcp_mss { > + u8 kind; > + u8 len; > + u16 mss; > +} __packed; > + > +/** > + * struct tcp_scale - TCP option structure for Windows scale > + * @kind: Field ID > + * @len: Field length > + * @scale: windows shift value used for networks with many hops. > + * Typically 4 or more hops > + */ > +struct tcp_scale { > + u8 kind; > + u8 len; > + u8 scale; > +} __packed; > + > +/** > + * struct tcp_sack_p - TCP option structure for SACK permitted > + * @kind: Field ID > + * @len: Field length > + */ > +struct tcp_sack_p { > + u8 kind; > + u8 len; > +} __packed; > + > +/** > + * struct sack_edges - structure for SACK edges > + * @l: Left edge of stream > + * @r: right edge of stream > + */ > +struct sack_edges { > + u32 l; > + u32 r; > +} __packed; > + > +#define TCP_SACK_SIZE (sizeof(struct sack_edges)) > + > +/* > + * A TCP stream has holes when packets are missing or disordered. > + * A hill is the inverse of a hole, and is data received. > + * TCP received hills (a sequence of data), and inferrs Holes > + * from the "hills" or packets received. > + */ > + > +#define TCP_SACK_HILLS 4 > + > +/** > + * struct tcp_sack_v - TCP option structure for SACK > + * @kind: Field ID > + * @len: Field length > + * @hill: L & R window edges > + */ > +struct tcp_sack_v { > + u8 kind; > + u8 len; > + struct sack_edges hill[TCP_SACK_HILLS]; > +} __packed; > + > +/** > + * struct tcp_t_opt - TCP option structure for time stamps > + * @kind: Field ID > + * @len: Field length > + * @t_snd: Sender timestamp > + * @t_rcv: Receiver timestamp > + */ > +struct tcp_t_opt { > + u8 kind; > + u8 len; > + u32 t_snd; > + u32 t_rcv; > +} __packed; > + > +#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt)) > + > +/* > + * ip tcp structure with options > + */ > + > +/** > + * struct ip_tcp_hdr_o - IP + TCP header + TCP options > + * @hdr: IP + TCP header > + * @mss: TCP MSS Option > + * @scale: TCP Windows Scale Option > + * @sack_p: TCP Sack-Permitted Option > + * @t_opt: TCP Timestamp Option > + * @end: end of options > + */ > +struct ip_tcp_hdr_o { > + struct ip_tcp_hdr hdr; > + struct tcp_mss mss; > + struct tcp_scale scale; > + struct tcp_sack_p sack_p; > + struct tcp_t_opt t_opt; > + u8 end; > +} __packed; > + > +#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o)) > + > +/** > + * struct ip_tcp_hdr_s - IP + TCP header + TCP options > + * @hdr: IP + TCP header > + * @t_opt: TCP Timestamp Option > + * @sack_v: TCP SACK Option > + * @end: end of options > + */ > +struct ip_tcp_hdr_s { > + struct ip_tcp_hdr hdr; > + struct tcp_t_opt t_opt; > + struct tcp_sack_v sack_v; > + u8 end; > +} __packed; > + > +#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s)) > + > +/* > + * TCP pseudo header definitions > + */ > +#define PSEUDO_PAD_SIZE 8 > + > +/** > + * struct pseudo_hdr - Pseudo Header > + * @padding: pseudo hdr size = ip_tcp hdr size > + * @p_src: Source IP address > + * @p_dst: Destination IP address > + * @rsvd: reserved > + * @p: protocol > + * @len: length of header > + */ > +struct pseudo_hdr { > + u8 padding[PSEUDO_PAD_SIZE]; > + struct in_addr p_src; > + struct in_addr p_dst; > + u8 rsvd; > + u8 p; > + u16 len; > +} __packed; > + > +#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE > + > +/** > + * union tcp_build_pkt - union for building TCP/IP packet. > + * @ph: pseudo header > + * @ip: IP and TCP header plus TCP options > + * @sack: IP and TCP header plus SACK options > + * @raw: buffer > + * > + * Build Pseudo header in packed buffer > + * first, calculate TCP checksum, then build IP header in packed buffer. > + * > + */ > +union tcp_build_pkt { > + struct pseudo_hdr ph; > + struct ip_tcp_hdr_o ip; > + struct ip_tcp_hdr_s sack; > + uchar raw[1600]; > +} __packed; > + > +/** > + * enum tcp_state - TCP State machine states for connection > + * @TCP_CLOSED: Need to send SYN to connect > + * @TCP_SYN_SENT: Trying to connect, waiting for SYN ACK > + * @TCP_ESTABLISHED: both server & client have a connection > + * @TCP_CLOSE_WAIT: Rec FIN, passed to app for FIN, ACK rsp > + * @TCP_CLOSING: Rec FIN, sent FIN, ACK waiting for ACK > + * @TCP_FIN_WAIT_1: Sent FIN waiting for response > + * @TCP_FIN_WAIT_2: Rec ACK from FIN sent, waiting for FIN > + */ > +enum tcp_state { > + TCP_CLOSED, > + TCP_SYN_SENT, > + TCP_ESTABLISHED, > + TCP_CLOSE_WAIT, > + TCP_CLOSING, > + TCP_FIN_WAIT_1, > + TCP_FIN_WAIT_2 > +}; > + > +enum tcp_state tcp_get_tcp_state(void); > +void tcp_set_tcp_state(enum tcp_state new_state); > +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, > + u8 action, u32 tcp_seq_num, u32 tcp_ack_num); > + > +/** > + * rxhand_tcp() - An incoming packet handler. > + * @pkt: pointer to the application packet > + * @dport: destination UDP port > + * @sip: source IP address > + * @sport: source UDP port > + * @len: packet length > + */ > +typedef void rxhand_tcp(uchar *pkt, unsigned int dport, > + struct in_addr sip, unsigned int sport, > + unsigned int len); > +void tcp_set_tcp_handler(rxhand_tcp *f); > + > +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len); > + > +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, > + int tcp_len, int pkt_len); > diff --git a/net/Kconfig b/net/Kconfig > index 52e261884d..cb600fe5eb 100644 > --- a/net/Kconfig > +++ b/net/Kconfig > @@ -174,6 +174,22 @@ config BOOTP_MAX_ROOT_PATH_LEN > help > Select maximal length of option 17 root path. > > +config PROT_TCP > + bool "TCP stack" > + help > + Enable a generic tcp framework that allows defining a custom > + handler for tcp protocol. > + > +config PROT_TCP_SACK > + bool "TCP SACK support" > + depends on PROT_TCP > + help > + TCP protocol with SACK. SACK means selective acknowledgements. > + By turning this option on TCP will learn what segments are already > + received. So that it improves TCP's retransmission efficiency. > + This option should be turn on if you want to achieve the fastest > + file transfer possible. > + > endif # if NET > > config SYS_RX_ETH_BUFFER > diff --git a/net/Makefile b/net/Makefile > index 6c812502d3..d131d1cb1a 100644 > --- a/net/Makefile > +++ b/net/Makefile > @@ -30,6 +30,7 @@ obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o > obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o > obj-$(CONFIG_CMD_WOL) += wol.o > obj-$(CONFIG_PROT_UDP) += udp.o > +obj-$(CONFIG_PROT_TCP) += tcp.o > > # Disable this warning as it is triggered by: > # sprintf(buf, index ? "foo%d" : "foo", index) > diff --git a/net/net.c b/net/net.c > index b27b021d07..7878a9970b 100644 > --- a/net/net.c > +++ b/net/net.c > @@ -117,6 +117,7 @@ > #if defined(CONFIG_CMD_WOL) > #include "wol.h" > #endif > +#include <net/tcp.h> > > /** BOOTP EXTENTIONS **/ > > @@ -387,6 +388,8 @@ int net_init(void) > > /* Only need to setup buffer pointers once. */ > first_call = 0; > + if (IS_ENABLED(CONFIG_PROT_TCP)) > + tcp_set_tcp_state(TCP_CLOSED); > } > > return net_init_loop(); > @@ -833,6 +836,16 @@ int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, > IPPROTO_UDP, 0, 0, 0); > } > > +#if defined(CONFIG_PROT_TCP) > +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, > + u32 tcp_seq_num, u32 tcp_ack_num) > +{ > + return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport, > + sport, payload_len, IPPROTO_TCP, action, > + tcp_seq_num, tcp_ack_num); > +} > +#endif > + > int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > int payload_len, int proto, u8 action, u32 tcp_seq_num, > u32 tcp_ack_num) > @@ -864,6 +877,14 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > payload_len); > pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; > break; > +#if defined(CONFIG_PROT_TCP) > + case IPPROTO_TCP: > + pkt_hdr_size = eth_hdr_size > + + tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport, > + payload_len, action, tcp_seq_num, > + tcp_ack_num); > + break; > +#endif > default: > return -EINVAL; > } > @@ -1273,6 +1294,15 @@ void net_process_received_packet(uchar *in_packet, int len) > if (ip->ip_p == IPPROTO_ICMP) { > receive_icmp(ip, len, src_ip, et); > return; > +#if defined(CONFIG_PROT_TCP) > + } else if (ip->ip_p == IPPROTO_TCP) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP PH (to=%pI4, from=%pI4, len=%d)\n", > + &dst_ip, &src_ip, len); > + > + rxhand_tcp_f((union tcp_build_pkt *)ip, len); > + return; > +#endif > } else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ > return; > } > diff --git a/net/tcp.c b/net/tcp.c > new file mode 100644 > index 0000000000..8d338c72e8 > --- /dev/null > +++ b/net/tcp.c > @@ -0,0 +1,720 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright 2017 Duncan Hare, all rights reserved. > + */ > + > +/* > + * General Desription: > + * > + * TCP support for the wget command, for fast file downloading. > + * > + * HTTP/TCP Receiver: > + * > + * Prerequisites: - own ethernet address > + * - own IP address > + * - Server IP address > + * - Server with TCP > + * - TCP application (eg wget) > + * Next Step HTTPS? > + */ > +#include <common.h> > +#include <command.h> > +#include <console.h> > +#include <env_internal.h> > +#include <errno.h> > +#include <net.h> > +#include <net/tcp.h> > + > +/* > + * TCP sliding window control used by us to request re-TX > + */ > +static struct tcp_sack_v tcp_lost; > + > +/* TCP option timestamp */ > +static u32 loc_timestamp; > +static u32 rmt_timestamp; > + > +static u32 tcp_seq_init; > +static u32 tcp_ack_edge; > +static u32 tcp_seq_max; > + > +static int tcp_activity_count; > + > +/* > + * Search for TCP_SACK and review the comments before the code section > + * TCP_SACK is the number of packets at the front of the stream > + */ > + > +enum pkt_state {PKT, NOPKT}; > +struct sack_r { > + struct sack_edges se; > + enum pkt_state st; > +}; > + > +static struct sack_r edge_a[TCP_SACK]; > +static unsigned int sack_idx; > +static unsigned int prev_len; > + > +/* > + * TCP lengths are stored as a rounded up number of 32 bit words. > + * Add 3 to length round up, rounded, then divided into the > + * length in 32 bit words. > + */ > +#define LEN_B_TO_DW(x) ((x) >> 2) > +#define ROUND_TCPHDR_LEN(x) (LEN_B_TO_DW((x) + 3)) > +#define SHIFT_TO_TCPHDRLEN_FIELD(x) ((x) << 4) > +#define GET_TCP_HDR_LEN_IN_BYTES(x) ((x) >> 2) > + > +/* TCP connection state */ > +static enum tcp_state current_tcp_state; > + > +/* Current TCP RX packet handler */ > +static rxhand_tcp *tcp_packet_handler; > + > +/** > + * tcp_get_tcp_state() - get current TCP state > + * > + * Return: Current TCP state > + */ > +enum tcp_state tcp_get_tcp_state(void) > +{ > + return current_tcp_state; > +} > + > +/** > + * tcp_set_tcp_state() - set current TCP state > + * @new_state: new TCP state > + */ > +void tcp_set_tcp_state(enum tcp_state new_state) > +{ > + current_tcp_state = new_state; > +} > + > +static void dummy_handler(uchar *pkt, unsigned int dport, > + struct in_addr sip, unsigned int sport, > + unsigned int len) > +{ > +} > + > +/** > + * tcp_set_tcp_handler() - set a handler to receive data > + * @f: handler > + */ > +void tcp_set_tcp_handler(rxhand_tcp *f) > +{ > + debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); > + if (!f) > + tcp_packet_handler = dummy_handler; > + else > + tcp_packet_handler = f; > +} > + > +/** > + * tcp_set_pseudo_header() - set TCP pseudo header > + * @pkt: the packet > + * @src: source IP address > + * @dest: destinaion IP address > + * @tcp_len: tcp length > + * @pkt_len: packet length > + * > + * Return: the checksum of the packet > + */ > +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, > + int tcp_len, int pkt_len) > +{ > + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; > + int checksum_len; > + > + /* > + * Pseudo header > + * > + * Zero the byte after the last byte so that the header checksum > + * will always work. > + */ > + pkt[pkt_len] = 0; > + > + net_copy_ip((void *)&b->ph.p_src, &src); > + net_copy_ip((void *)&b->ph.p_dst, &dest); > + b->ph.rsvd = 0; > + b->ph.p = IPPROTO_TCP; > + b->ph.len = htons(tcp_len); > + checksum_len = tcp_len + PSEUDO_HDR_SIZE; > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n", > + &b->ph.p_dst, &b->ph.p_src, checksum_len); > + > + return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len); > +} > + > +/** > + * net_set_ack_options() - set TCP options in acknowledge packets > + * @b: the packet > + * > + * Return: TCP header length > + */ > +int net_set_ack_options(union tcp_build_pkt *b) > +{ > + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); > + > + b->sack.t_opt.kind = TCP_O_TS; > + b->sack.t_opt.len = TCP_OPT_LEN_A; > + b->sack.t_opt.t_snd = htons(loc_timestamp); > + b->sack.t_opt.t_rcv = rmt_timestamp; > + b->sack.sack_v.kind = TCP_1_NOP; > + b->sack.sack_v.len = 0; > + > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { > + if (tcp_lost.len > TCP_OPT_LEN_2) { > + debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", > + tcp_lost.len); > + b->sack.sack_v.len = tcp_lost.len; > + b->sack.sack_v.kind = TCP_V_SACK; > + b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); > + b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); > + > + /* > + * These SACK structures are initialized with NOPs to > + * provide TCP header alignment padding. There are 4 > + * SACK structures used for both header padding and > + * internally. > + */ > + b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); > + b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); > + b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); > + b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); > + b->sack.sack_v.hill[3].l = TCP_O_NOP; > + b->sack.sack_v.hill[3].r = TCP_O_NOP; > + } > + > + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + > + TCP_TSOPT_SIZE + > + tcp_lost.len)); > + } else { > + b->sack.sack_v.kind = 0; > + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + > + TCP_TSOPT_SIZE)); > + } > + > + /* > + * This returns the actual rounded up length of the > + * TCP header to add to the total packet length > + */ > + > + return GET_TCP_HDR_LEN_IN_BYTES(b->sack.hdr.tcp_hlen); > +} > + > +/** > + * net_set_ack_options() - set TCP options in SYN packets > + * @b: the packet > + */ > +void net_set_syn_options(union tcp_build_pkt *b) > +{ > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) > + tcp_lost.len = 0; > + > + b->ip.hdr.tcp_hlen = 0xa0; > + > + b->ip.mss.kind = TCP_O_MSS; > + b->ip.mss.len = TCP_OPT_LEN_4; > + b->ip.mss.mss = htons(TCP_MSS); > + b->ip.scale.kind = TCP_O_SCL; > + b->ip.scale.scale = TCP_SCALE; > + b->ip.scale.len = TCP_OPT_LEN_3; > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { > + b->ip.sack_p.kind = TCP_P_SACK; > + b->ip.sack_p.len = TCP_OPT_LEN_2; > + } else { > + b->ip.sack_p.kind = TCP_1_NOP; > + b->ip.sack_p.len = TCP_1_NOP; > + } > + b->ip.t_opt.kind = TCP_O_TS; > + b->ip.t_opt.len = TCP_OPT_LEN_A; > + loc_timestamp = get_ticks(); > + rmt_timestamp = 0; > + b->ip.t_opt.t_snd = 0; > + b->ip.t_opt.t_rcv = 0; > + b->ip.end = TCP_O_END; > +} > + > +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, > + u8 action, u32 tcp_seq_num, u32 tcp_ack_num) > +{ > + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; > + int pkt_hdr_len; > + int pkt_len; > + int tcp_len; > + > + /* > + * Header: 5 32 bit words. 4 bits TCP header Length, > + * 4 bits reserved options > + */ > + b->ip.hdr.tcp_flags = action; > + pkt_hdr_len = IP_TCP_HDR_SIZE; > + b->ip.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); > + > + switch (action) { > + case TCP_SYN: > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_num); > + tcp_activity_count = 0; > + net_set_syn_options(b); > + tcp_seq_num = 0; > + tcp_ack_num = 0; > + pkt_hdr_len = IP_TCP_O_SIZE; > + if (current_tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ > + action = TCP_FIN; > + current_tcp_state = TCP_FIN_WAIT_1; > + } else { > + current_tcp_state = TCP_SYN_SENT; > + } > + break; > + case TCP_ACK: > + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); > + b->ip.hdr.tcp_flags = action; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, > + action); > + break; > + case TCP_FIN: > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n", > + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); > + payload_len = 0; > + pkt_hdr_len = IP_TCP_HDR_SIZE; > + current_tcp_state = TCP_FIN_WAIT_1; > + break; > + > + /* Notify connection closing */ > + > + case (TCP_FIN | TCP_ACK): > + case (TCP_FIN | TCP_ACK | TCP_PUSH): > + if (current_tcp_state == TCP_CLOSE_WAIT) > + current_tcp_state = TCP_CLOSING; > + > + tcp_ack_edge++; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_edge, action); > + fallthrough; > + default: > + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); > + b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_num, action); > + } > + > + pkt_len = pkt_hdr_len + payload_len; > + tcp_len = pkt_len - IP_HDR_SIZE; > + > + /* TCP Header */ > + b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); > + b->ip.hdr.tcp_src = htons(sport); > + b->ip.hdr.tcp_dst = htons(dport); > + b->ip.hdr.tcp_seq = htonl(tcp_seq_num); > + tcp_seq_num = tcp_seq_num + payload_len; > + > + /* > + * TCP window size - TCP header variable tcp_win. > + * Change tcp_win only if you have an understanding of network > + * overrun, congestion, TCP segment sizes, TCP windows, TCP scale, > + * queuing theory and packet buffering. If there are too few buffers, > + * there will be data loss, recovery may work or the sending TCP, > + * the server, could abort the stream transmission. > + * MSS is governed by maximum Ethernet frame length. > + * The number of buffers is governed by the desire to have a queue of > + * full buffers to be processed at the destination to maximize > + * throughput. Temporary memory use for the boot phase on modern > + * SOCs is may not be considered a constraint to buffer space, if > + * it is, then the u-boot tftp or nfs kernel netboot should be > + * considered. > + */ > + b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); > + > + b->ip.hdr.tcp_xsum = 0; > + b->ip.hdr.tcp_ugr = 0; > + > + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, > + tcp_len, pkt_len); > + > + net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, > + pkt_len, IPPROTO_TCP); > + > + return pkt_hdr_len; > +} > + > +/** > + * tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer) > + * @tcp_seq_num: TCP sequence start number > + * @len: the length of sequence numbers > + * @tcp_seq_max: maximum of sequence numbers > + */ > +void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) > +{ > + u32 idx_sack, sack_in; > + u32 sack_end = TCP_SACK - 1; > + u32 hill = 0; > + enum pkt_state expect = PKT; > + u32 seq = tcp_seq_num - tcp_seq_init; > + u32 hol_l = tcp_ack_edge - tcp_seq_init; > + u32 hol_r = 0; > + > + /* Place new seq number in correct place in receive array */ > + if (prev_len == 0) > + prev_len = len; > + > + idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); > + if (idx_sack < TCP_SACK) { > + edge_a[idx_sack].se.l = tcp_seq_num; > + edge_a[idx_sack].se.r = tcp_seq_num + len; > + edge_a[idx_sack].st = PKT; > + > + /* > + * The fin (last) packet is not the same length as data > + * packets, and if it's length is recorded and used for > + * array index calculation, calculation breaks. > + */ > + if (prev_len < len) > + prev_len = len; > + } > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", > + seq, hol_l, len, sack_idx, sack_end); > + > + /* Right edge of contiguous stream, is the left edge of first hill */ > + hol_l = tcp_seq_num - tcp_seq_init; > + hol_r = hol_l + len; > + > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) > + tcp_lost.len = TCP_OPT_LEN_2; > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", > + idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); > + > + for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; > + sack_in++) { > + switch (expect) { > + case NOPKT: > + switch (edge_a[sack_in].st) { > + case NOPKT: > + debug_cond(DEBUG_INT_STATE, "N"); > + break; > + case PKT: > + debug_cond(DEBUG_INT_STATE, "n"); > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { > + tcp_lost.hill[hill].l = > + edge_a[sack_in].se.l; > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + } > + expect = PKT; > + break; > + } > + break; > + case PKT: > + switch (edge_a[sack_in].st) { > + case NOPKT: > + debug_cond(DEBUG_INT_STATE, "p"); > + if (sack_in > sack_idx && > + hill < TCP_SACK_HILLS) { > + hill++; > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) > + tcp_lost.len += TCP_OPT_LEN_8; > + } > + expect = NOPKT; > + break; > + case PKT: > + debug_cond(DEBUG_INT_STATE, "P"); > + > + if (tcp_ack_edge == edge_a[sack_in].se.l) { > + tcp_ack_edge = edge_a[sack_in].se.r; > + edge_a[sack_in].st = NOPKT; > + sack_idx++; > + } else { > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && > + hill < TCP_SACK_HILLS) > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && > + sack_in == sack_end - 1) > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + } > + break; > + } > + break; > + } > + } > + debug_cond(DEBUG_INT_STATE, "\n"); > + if (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || tcp_lost.len <= TCP_OPT_LEN_2) > + sack_idx = 0; > +} > + > +/** > + * tcp_parse_options() - parsing TCP options > + * @o: pointer to the option field. > + * @o_len: length of the option field. > + */ > +void tcp_parse_options(uchar *o, int o_len) > +{ > + struct tcp_t_opt *tsopt; > + uchar *p = o; > + > + /* > + * NOPs are options with a zero length, and thus are special. > + * All other options have length fields. > + */ > + for (p = o; p < (o + o_len); p = p + p[1]) { > + if (!p[1]) > + return; /* Finished processing options */ > + > + switch (p[0]) { > + case TCP_O_END: > + return; > + case TCP_O_MSS: > + case TCP_O_SCL: > + case TCP_P_SACK: > + case TCP_V_SACK: > + break; > + case TCP_O_TS: > + tsopt = (struct tcp_t_opt *)p; > + rmt_timestamp = tsopt->t_snd; > + return; > + } > + > + /* Process optional NOPs */ > + if (p[0] == TCP_O_NOP) > + p++; > + } > +} > + > +static u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) > +{ > + u8 tcp_fin = tcp_flags & TCP_FIN; > + u8 tcp_syn = tcp_flags & TCP_SYN; > + u8 tcp_rst = tcp_flags & TCP_RST; > + u8 tcp_push = tcp_flags & TCP_PUSH; > + u8 tcp_ack = tcp_flags & TCP_ACK; > + u8 action = TCP_DATA; > + int i; > + > + /* > + * tcp_flags are examined to determine TX action in a given state > + * tcp_push is interpreted to mean "inform the app" > + * urg, ece, cer and nonce flags are not supported. > + * > + * exe and crw are use to signal and confirm knowledge of congestion. > + * This TCP only sends a file request and acks. If it generates > + * congestion, the network is broken. > + */ > + debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); > + if (tcp_rst) { > + action = TCP_DATA; > + current_tcp_state = TCP_CLOSED; > + net_set_state(NETLOOP_FAIL); > + debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); > + return TCP_RST; > + } > + > + switch (current_tcp_state) { > + case TCP_CLOSED: > + debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); > + if (tcp_ack) > + action = TCP_DATA; > + else if (tcp_syn) > + action = TCP_RST; > + else if (tcp_fin) > + action = TCP_DATA; > + break; > + case TCP_SYN_SENT: > + debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n", > + tcp_flags, *tcp_seq_num); > + if (tcp_fin) { > + action = action | TCP_PUSH; > + current_tcp_state = TCP_CLOSE_WAIT; > + } > + if (tcp_syn) { > + action = action | TCP_ACK | TCP_PUSH; > + if (tcp_ack) { > + tcp_seq_init = *tcp_seq_num; > + *tcp_seq_num = *tcp_seq_num + 1; > + tcp_seq_max = *tcp_seq_num; > + tcp_ack_edge = *tcp_seq_num; > + sack_idx = 0; > + edge_a[sack_idx].se.l = *tcp_seq_num; > + edge_a[sack_idx].se.r = *tcp_seq_num; > + prev_len = 0; > + current_tcp_state = TCP_ESTABLISHED; > + for (i = 0; i < TCP_SACK; i++) > + edge_a[i].st = NOPKT; > + } > + } else if (tcp_ack) { > + action = TCP_DATA; > + } > + > + break; > + case TCP_ESTABLISHED: > + debug_cond(DEBUG_INT_STATE, "TCP_ESTABLISHED %x\n", tcp_flags); > + if (*tcp_seq_num > tcp_seq_max) > + tcp_seq_max = *tcp_seq_num; > + if (payload_len > 0) { > + tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max); > + tcp_fin = TCP_DATA; /* cause standalone FIN */ > + } > + > + if ((tcp_fin) && > + (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || > + tcp_lost.len <= TCP_OPT_LEN_2)) { > + action = action | TCP_FIN | TCP_PUSH | TCP_ACK; > + current_tcp_state = TCP_CLOSE_WAIT; > + } else if (tcp_ack) { > + action = TCP_DATA; > + } > + > + if (tcp_syn) > + action = TCP_ACK + TCP_RST; > + else if (tcp_push) > + action = action | TCP_PUSH; > + break; > + case TCP_CLOSE_WAIT: > + debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); > + action = TCP_DATA; > + break; > + case TCP_FIN_WAIT_2: > + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); > + if (tcp_ack) { > + action = TCP_PUSH | TCP_ACK; > + current_tcp_state = TCP_CLOSED; > + puts("\n"); > + } else if (tcp_syn) { > + action = TCP_DATA; > + } else if (tcp_fin) { > + action = TCP_DATA; > + } > + break; > + case TCP_FIN_WAIT_1: > + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); > + if (tcp_fin) { > + action = TCP_ACK | TCP_FIN; > + current_tcp_state = TCP_FIN_WAIT_2; > + } > + if (tcp_syn) > + action = TCP_RST; > + if (tcp_ack) { > + current_tcp_state = TCP_CLOSED; > + tcp_seq_num = tcp_seq_num + 1; > + } > + break; > + case TCP_CLOSING: > + debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); > + if (tcp_ack) { > + action = TCP_PUSH; > + current_tcp_state = TCP_CLOSED; > + puts("\n"); > + } else if (tcp_syn) { > + action = TCP_RST; > + } else if (tcp_fin) { > + action = TCP_DATA; > + } > + break; > + } > + return action; > +} > + > +/** > + * rxhand_tcp_f() - process receiving data and call data handler. > + * @b: the packet > + * @pkt_len: the length of packet. > + */ > +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) > +{ > + int tcp_len = pkt_len - IP_HDR_SIZE; > + u16 tcp_rx_xsum = b->ip.hdr.ip_sum; > + u8 tcp_action = TCP_DATA; > + u32 tcp_seq_num, tcp_ack_num; > + struct in_addr action_and_state; > + int tcp_hdr_len, payload_len; > + > + /* Verify IP header */ > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", > + &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); > + > + b->ip.hdr.ip_src = net_server_ip; > + b->ip.hdr.ip_dst = net_ip; > + b->ip.hdr.ip_sum = 0; > + if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", > + &net_ip, &net_server_ip, pkt_len); > + return; > + } > + > + /* Build pseudo header and verify TCP header */ > + tcp_rx_xsum = b->ip.hdr.tcp_xsum; > + b->ip.hdr.tcp_xsum = 0; > + if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src, > + b->ip.hdr.ip_dst, tcp_len, > + pkt_len)) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", > + &net_ip, &net_server_ip, tcp_len); > + return; > + } > + > + tcp_hdr_len = GET_TCP_HDR_LEN_IN_BYTES(b->ip.hdr.tcp_hlen); > + payload_len = tcp_len - tcp_hdr_len; > + > + if (tcp_hdr_len > TCP_HDR_SIZE) > + tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, > + tcp_hdr_len - TCP_HDR_SIZE); > + /* > + * Incoming sequence and ack numbers are server's view of the numbers. > + * The app must swap the numbers when responding. > + */ > + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); > + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); > + > + /* Packets are not ordered. Send to app as received. */ > + tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, > + &tcp_seq_num, payload_len); > + > + tcp_activity_count++; > + if (tcp_activity_count > TCP_ACTIVITY) { > + puts("| "); > + tcp_activity_count = 0; > + } > + > + if ((tcp_action & TCP_PUSH) || payload_len > 0) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n", > + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); > + > + action_and_state.s_addr = tcp_action; > + (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, > + tcp_seq_num, action_and_state, > + tcp_ack_num, payload_len); > + > + } else if (tcp_action != TCP_DATA) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n", > + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); > + > + /* > + * Warning: Incoming Ack & Seq sequence numbers are transposed > + * here to outgoing Seq & Ack sequence numbers > + */ > + net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), > + ntohs(b->ip.hdr.tcp_dst), > + (tcp_action & (~TCP_PUSH)), > + tcp_seq_num, tcp_ack_num); > + } > +} > -- > 2.35.1 > Reviewed-by: Ramon Fried <rfried.dev@gmail.com>
On Tue, Nov 08, 2022 at 02:17:28PM +0800, Ying-Chun Liu (PaulLiu) wrote: > From: "Ying-Chun Liu (PaulLiu)" <paul.liu@linaro.org> > > Currently file transfers are done using tftp or NFS both > over udp. This requires a request to be sent from client > (u-boot) to the boot server. > > The current standard is TCP with selective acknowledgment. > > Signed-off-by: Duncan Hare <DH@Synoia.com> > Signed-off-by: Duncan Hare <DuncanCHare@yahoo.com> > Signed-off-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org> > Reviewed-by: Simon Glass <sjg@chromium.org> > Cc: Christian Gmeiner <christian.gmeiner@gmail.com> > Cc: Joe Hershberger <joe.hershberger@ni.com> > Cc: Michal Simek <michal.simek@xilinx.com> > Cc: Ramon Fried <rfried.dev@gmail.com> > Reviewed-by: Ramon Fried <rfried.dev@gmail.com> Applied to u-boot/master, thanks!
On 28.11.2022 20:52, Tom Rini wrote: > On Tue, Nov 08, 2022 at 02:17:28PM +0800, Ying-Chun Liu (PaulLiu) wrote: > >> From: "Ying-Chun Liu (PaulLiu)" <paul.liu@linaro.org> >> >> Currently file transfers are done using tftp or NFS both >> over udp. This requires a request to be sent from client >> (u-boot) to the boot server. >> >> The current standard is TCP with selective acknowledgment. >> >> Signed-off-by: Duncan Hare <DH@Synoia.com> >> Signed-off-by: Duncan Hare <DuncanCHare@yahoo.com> >> Signed-off-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org> >> Reviewed-by: Simon Glass <sjg@chromium.org> >> Cc: Christian Gmeiner <christian.gmeiner@gmail.com> >> Cc: Joe Hershberger <joe.hershberger@ni.com> >> Cc: Michal Simek <michal.simek@xilinx.com> >> Cc: Ramon Fried <rfried.dev@gmail.com> >> Reviewed-by: Ramon Fried <rfried.dev@gmail.com> > > Applied to u-boot/master, thanks! Thanks for your work on this feature and congratulations on getting it through the review process! It's really nice to see official TCP support.
Thank you all for taking this over the finish line. I Duncan Hare 714 931 7952DRE# 01350926 On Monday, November 28, 2022 at 11:52:23 AM PST, Tom Rini <trini@konsulko.com> wrote: On Tue, Nov 08, 2022 at 02:17:28PM +0800, Ying-Chun Liu (PaulLiu) wrote: > From: "Ying-Chun Liu (PaulLiu)" <paul.liu@linaro.org> > > Currently file transfers are done using tftp or NFS both > over udp. This requires a request to be sent from client > (u-boot) to the boot server. > > The current standard is TCP with selective acknowledgment. > > Signed-off-by: Duncan Hare <DH@Synoia.com> > Signed-off-by: Duncan Hare <DuncanCHare@yahoo.com> > Signed-off-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org> > Reviewed-by: Simon Glass <sjg@chromium.org> > Cc: Christian Gmeiner <christian.gmeiner@gmail.com> > Cc: Joe Hershberger <joe.hershberger@ni.com> > Cc: Michal Simek <michal.simek@xilinx.com> > Cc: Ramon Fried <rfried.dev@gmail.com> > Reviewed-by: Ramon Fried <rfried.dev@gmail.com> Applied to u-boot/master, thanks!
diff --git a/include/net.h b/include/net.h index 32364ed0ce..f4140523c2 100644 --- a/include/net.h +++ b/include/net.h @@ -365,6 +365,7 @@ struct vlan_ethernet_hdr { #define PROT_NCSI 0x88f8 /* NC-SI control packets */ #define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ #define IPPROTO_UDP 17 /* User Datagram Protocol */ /* @@ -690,19 +691,36 @@ static inline void net_send_packet(uchar *pkt, int len) (void) eth_send(pkt, len); } -/* - * Transmit "net_tx_packet" as UDP packet, performing ARP request if needed - * (ether will be populated) - * - * @param ether Raw packet buffer - * @param dest IP address to send the datagram to - * @param dport Destination UDP port - * @param sport Source UDP port - * @param payload_len Length of data after the UDP header +/** + * net_send_ip_packet() - Transmit "net_tx_packet" as UDP or TCP packet, + * send ARP request if needed (ether will be populated) + * @ether: Raw packet buffer + * @dest: IP address to send the datagram to + * @dport: Destination UDP port + * @sport: Source UDP port + * @payload_len: Length of data after the UDP header + * @action: TCP action to be performed + * @tcp_seq_num: TCP sequence number of this transmission + * @tcp_ack_num: TCP stream acknolegement number + * + * Return: 0 on success, other value on failure */ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num); +/** + * net_send_tcp_packet() - Transmit TCP packet. + * @payload_len: length of payload + * @dport: Destination TCP port + * @sport: Source TCP port + * @action: TCP action to be performed + * @tcp_seq_num: TCP sequence number of this transmission + * @tcp_ack_num: TCP stream acknolegement number + * + * Return: 0 on success, other value on failure + */ +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num); int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len); diff --git a/include/net/tcp.h b/include/net/tcp.h new file mode 100644 index 0000000000..322551694f --- /dev/null +++ b/include/net/tcp.h @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP Support with SACK for file transfer. + * + * Copyright 2017 Duncan Hare, All rights reserved. + */ + +#define TCP_ACTIVITY 127 /* Number of packets received */ + /* before console progress mark */ +/** + * struct ip_tcp_hdr - IP and TCP header + * @ip_hl_v: header length and version + * @ip_tos: type of service + * @ip_len: total length + * @ip_id: identification + * @ip_off: fragment offset field + * @ip_ttl: time to live + * @ip_p: protocol + * @ip_sum: checksum + * @ip_src: Source IP address + * @ip_dst: Destination IP address + * @tcp_src: TCP source port + * @tcp_dst: TCP destination port + * @tcp_seq: TCP sequence number + * @tcp_ack: TCP Acknowledgment number + * @tcp_hlen: 4 bits TCP header Length/4, 4 bits reserved, 2 more bits reserved + * @tcp_flag: flags of TCP + * @tcp_win: TCP windows size + * @tcp_xsum: Checksum + * @tcp_ugr: Pointer to urgent data + */ +struct ip_tcp_hdr { + u8 ip_hl_v; + u8 ip_tos; + u16 ip_len; + u16 ip_id; + u16 ip_off; + u8 ip_ttl; + u8 ip_p; + u16 ip_sum; + struct in_addr ip_src; + struct in_addr ip_dst; + u16 tcp_src; + u16 tcp_dst; + u32 tcp_seq; + u32 tcp_ack; + u8 tcp_hlen; + u8 tcp_flags; + u16 tcp_win; + u16 tcp_xsum; + u16 tcp_ugr; +} __packed; + +#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE) + +#define TCP_DATA 0x00 /* Data Packet - internal use only */ +#define TCP_FIN 0x01 /* Finish flag */ +#define TCP_SYN 0x02 /* Synch (start) flag */ +#define TCP_RST 0x04 /* reset flag */ +#define TCP_PUSH 0x08 /* Push - Notify app */ +#define TCP_ACK 0x10 /* Acknowledgment of data received */ +#define TCP_URG 0x20 /* Urgent */ +#define TCP_ECE 0x40 /* Congestion control */ +#define TCP_CWR 0x80 /* Congestion Control */ + +/* + * TCP header options, Seq, MSS, and SACK + */ + +#define TCP_SACK 32 /* Number of packets analyzed */ + /* on leading edge of stream */ + +#define TCP_O_END 0x00 /* End of option list */ +#define TCP_1_NOP 0x01 /* Single padding NOP */ +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ +#define TCP_O_MSS 0x02 /* MSS Size option */ +#define TCP_O_SCL 0x03 /* Window Scale option */ +#define TCP_P_SACK 0x04 /* SACK permitted */ +#define TCP_V_SACK 0x05 /* SACK values */ +#define TCP_O_TS 0x08 /* Timestamp option */ +#define TCP_OPT_LEN_2 0x02 +#define TCP_OPT_LEN_3 0x03 +#define TCP_OPT_LEN_4 0x04 +#define TCP_OPT_LEN_6 0x06 +#define TCP_OPT_LEN_8 0x08 +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */ +#define TCP_MSS 1460 /* Max segment size */ +#define TCP_SCALE 0x01 /* Scale */ + +/** + * struct tcp_mss - TCP option structure for MSS (Max segment size) + * @kind: Field ID + * @len: Field length + * @mss: Segment size value + */ +struct tcp_mss { + u8 kind; + u8 len; + u16 mss; +} __packed; + +/** + * struct tcp_scale - TCP option structure for Windows scale + * @kind: Field ID + * @len: Field length + * @scale: windows shift value used for networks with many hops. + * Typically 4 or more hops + */ +struct tcp_scale { + u8 kind; + u8 len; + u8 scale; +} __packed; + +/** + * struct tcp_sack_p - TCP option structure for SACK permitted + * @kind: Field ID + * @len: Field length + */ +struct tcp_sack_p { + u8 kind; + u8 len; +} __packed; + +/** + * struct sack_edges - structure for SACK edges + * @l: Left edge of stream + * @r: right edge of stream + */ +struct sack_edges { + u32 l; + u32 r; +} __packed; + +#define TCP_SACK_SIZE (sizeof(struct sack_edges)) + +/* + * A TCP stream has holes when packets are missing or disordered. + * A hill is the inverse of a hole, and is data received. + * TCP received hills (a sequence of data), and inferrs Holes + * from the "hills" or packets received. + */ + +#define TCP_SACK_HILLS 4 + +/** + * struct tcp_sack_v - TCP option structure for SACK + * @kind: Field ID + * @len: Field length + * @hill: L & R window edges + */ +struct tcp_sack_v { + u8 kind; + u8 len; + struct sack_edges hill[TCP_SACK_HILLS]; +} __packed; + +/** + * struct tcp_t_opt - TCP option structure for time stamps + * @kind: Field ID + * @len: Field length + * @t_snd: Sender timestamp + * @t_rcv: Receiver timestamp + */ +struct tcp_t_opt { + u8 kind; + u8 len; + u32 t_snd; + u32 t_rcv; +} __packed; + +#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt)) + +/* + * ip tcp structure with options + */ + +/** + * struct ip_tcp_hdr_o - IP + TCP header + TCP options + * @hdr: IP + TCP header + * @mss: TCP MSS Option + * @scale: TCP Windows Scale Option + * @sack_p: TCP Sack-Permitted Option + * @t_opt: TCP Timestamp Option + * @end: end of options + */ +struct ip_tcp_hdr_o { + struct ip_tcp_hdr hdr; + struct tcp_mss mss; + struct tcp_scale scale; + struct tcp_sack_p sack_p; + struct tcp_t_opt t_opt; + u8 end; +} __packed; + +#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o)) + +/** + * struct ip_tcp_hdr_s - IP + TCP header + TCP options + * @hdr: IP + TCP header + * @t_opt: TCP Timestamp Option + * @sack_v: TCP SACK Option + * @end: end of options + */ +struct ip_tcp_hdr_s { + struct ip_tcp_hdr hdr; + struct tcp_t_opt t_opt; + struct tcp_sack_v sack_v; + u8 end; +} __packed; + +#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s)) + +/* + * TCP pseudo header definitions + */ +#define PSEUDO_PAD_SIZE 8 + +/** + * struct pseudo_hdr - Pseudo Header + * @padding: pseudo hdr size = ip_tcp hdr size + * @p_src: Source IP address + * @p_dst: Destination IP address + * @rsvd: reserved + * @p: protocol + * @len: length of header + */ +struct pseudo_hdr { + u8 padding[PSEUDO_PAD_SIZE]; + struct in_addr p_src; + struct in_addr p_dst; + u8 rsvd; + u8 p; + u16 len; +} __packed; + +#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE + +/** + * union tcp_build_pkt - union for building TCP/IP packet. + * @ph: pseudo header + * @ip: IP and TCP header plus TCP options + * @sack: IP and TCP header plus SACK options + * @raw: buffer + * + * Build Pseudo header in packed buffer + * first, calculate TCP checksum, then build IP header in packed buffer. + * + */ +union tcp_build_pkt { + struct pseudo_hdr ph; + struct ip_tcp_hdr_o ip; + struct ip_tcp_hdr_s sack; + uchar raw[1600]; +} __packed; + +/** + * enum tcp_state - TCP State machine states for connection + * @TCP_CLOSED: Need to send SYN to connect + * @TCP_SYN_SENT: Trying to connect, waiting for SYN ACK + * @TCP_ESTABLISHED: both server & client have a connection + * @TCP_CLOSE_WAIT: Rec FIN, passed to app for FIN, ACK rsp + * @TCP_CLOSING: Rec FIN, sent FIN, ACK waiting for ACK + * @TCP_FIN_WAIT_1: Sent FIN waiting for response + * @TCP_FIN_WAIT_2: Rec ACK from FIN sent, waiting for FIN + */ +enum tcp_state { + TCP_CLOSED, + TCP_SYN_SENT, + TCP_ESTABLISHED, + TCP_CLOSE_WAIT, + TCP_CLOSING, + TCP_FIN_WAIT_1, + TCP_FIN_WAIT_2 +}; + +enum tcp_state tcp_get_tcp_state(void); +void tcp_set_tcp_state(enum tcp_state new_state); +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num); + +/** + * rxhand_tcp() - An incoming packet handler. + * @pkt: pointer to the application packet + * @dport: destination UDP port + * @sip: source IP address + * @sport: source UDP port + * @len: packet length + */ +typedef void rxhand_tcp(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len); +void tcp_set_tcp_handler(rxhand_tcp *f); + +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len); + +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, + int tcp_len, int pkt_len); diff --git a/net/Kconfig b/net/Kconfig index 52e261884d..cb600fe5eb 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -174,6 +174,22 @@ config BOOTP_MAX_ROOT_PATH_LEN help Select maximal length of option 17 root path. +config PROT_TCP + bool "TCP stack" + help + Enable a generic tcp framework that allows defining a custom + handler for tcp protocol. + +config PROT_TCP_SACK + bool "TCP SACK support" + depends on PROT_TCP + help + TCP protocol with SACK. SACK means selective acknowledgements. + By turning this option on TCP will learn what segments are already + received. So that it improves TCP's retransmission efficiency. + This option should be turn on if you want to achieve the fastest + file transfer possible. + endif # if NET config SYS_RX_ETH_BUFFER diff --git a/net/Makefile b/net/Makefile index 6c812502d3..d131d1cb1a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o obj-$(CONFIG_CMD_WOL) += wol.o obj-$(CONFIG_PROT_UDP) += udp.o +obj-$(CONFIG_PROT_TCP) += tcp.o # Disable this warning as it is triggered by: # sprintf(buf, index ? "foo%d" : "foo", index) diff --git a/net/net.c b/net/net.c index b27b021d07..7878a9970b 100644 --- a/net/net.c +++ b/net/net.c @@ -117,6 +117,7 @@ #if defined(CONFIG_CMD_WOL) #include "wol.h" #endif +#include <net/tcp.h> /** BOOTP EXTENTIONS **/ @@ -387,6 +388,8 @@ int net_init(void) /* Only need to setup buffer pointers once. */ first_call = 0; + if (IS_ENABLED(CONFIG_PROT_TCP)) + tcp_set_tcp_state(TCP_CLOSED); } return net_init_loop(); @@ -833,6 +836,16 @@ int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, IPPROTO_UDP, 0, 0, 0); } +#if defined(CONFIG_PROT_TCP) +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num) +{ + return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport, + sport, payload_len, IPPROTO_TCP, action, + tcp_seq_num, tcp_ack_num); +} +#endif + int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num) @@ -864,6 +877,14 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, payload_len); pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; break; +#if defined(CONFIG_PROT_TCP) + case IPPROTO_TCP: + pkt_hdr_size = eth_hdr_size + + tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport, + payload_len, action, tcp_seq_num, + tcp_ack_num); + break; +#endif default: return -EINVAL; } @@ -1273,6 +1294,15 @@ void net_process_received_packet(uchar *in_packet, int len) if (ip->ip_p == IPPROTO_ICMP) { receive_icmp(ip, len, src_ip, et); return; +#if defined(CONFIG_PROT_TCP) + } else if (ip->ip_p == IPPROTO_TCP) { + debug_cond(DEBUG_DEV_PKT, + "TCP PH (to=%pI4, from=%pI4, len=%d)\n", + &dst_ip, &src_ip, len); + + rxhand_tcp_f((union tcp_build_pkt *)ip, len); + return; +#endif } else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ return; } diff --git a/net/tcp.c b/net/tcp.c new file mode 100644 index 0000000000..8d338c72e8 --- /dev/null +++ b/net/tcp.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 Duncan Hare, all rights reserved. + */ + +/* + * General Desription: + * + * TCP support for the wget command, for fast file downloading. + * + * HTTP/TCP Receiver: + * + * Prerequisites: - own ethernet address + * - own IP address + * - Server IP address + * - Server with TCP + * - TCP application (eg wget) + * Next Step HTTPS? + */ +#include <common.h> +#include <command.h> +#include <console.h> +#include <env_internal.h> +#include <errno.h> +#include <net.h> +#include <net/tcp.h> + +/* + * TCP sliding window control used by us to request re-TX + */ +static struct tcp_sack_v tcp_lost; + +/* TCP option timestamp */ +static u32 loc_timestamp; +static u32 rmt_timestamp; + +static u32 tcp_seq_init; +static u32 tcp_ack_edge; +static u32 tcp_seq_max; + +static int tcp_activity_count; + +/* + * Search for TCP_SACK and review the comments before the code section + * TCP_SACK is the number of packets at the front of the stream + */ + +enum pkt_state {PKT, NOPKT}; +struct sack_r { + struct sack_edges se; + enum pkt_state st; +}; + +static struct sack_r edge_a[TCP_SACK]; +static unsigned int sack_idx; +static unsigned int prev_len; + +/* + * TCP lengths are stored as a rounded up number of 32 bit words. + * Add 3 to length round up, rounded, then divided into the + * length in 32 bit words. + */ +#define LEN_B_TO_DW(x) ((x) >> 2) +#define ROUND_TCPHDR_LEN(x) (LEN_B_TO_DW((x) + 3)) +#define SHIFT_TO_TCPHDRLEN_FIELD(x) ((x) << 4) +#define GET_TCP_HDR_LEN_IN_BYTES(x) ((x) >> 2) + +/* TCP connection state */ +static enum tcp_state current_tcp_state; + +/* Current TCP RX packet handler */ +static rxhand_tcp *tcp_packet_handler; + +/** + * tcp_get_tcp_state() - get current TCP state + * + * Return: Current TCP state + */ +enum tcp_state tcp_get_tcp_state(void) +{ + return current_tcp_state; +} + +/** + * tcp_set_tcp_state() - set current TCP state + * @new_state: new TCP state + */ +void tcp_set_tcp_state(enum tcp_state new_state) +{ + current_tcp_state = new_state; +} + +static void dummy_handler(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len) +{ +} + +/** + * tcp_set_tcp_handler() - set a handler to receive data + * @f: handler + */ +void tcp_set_tcp_handler(rxhand_tcp *f) +{ + debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); + if (!f) + tcp_packet_handler = dummy_handler; + else + tcp_packet_handler = f; +} + +/** + * tcp_set_pseudo_header() - set TCP pseudo header + * @pkt: the packet + * @src: source IP address + * @dest: destinaion IP address + * @tcp_len: tcp length + * @pkt_len: packet length + * + * Return: the checksum of the packet + */ +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, + int tcp_len, int pkt_len) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int checksum_len; + + /* + * Pseudo header + * + * Zero the byte after the last byte so that the header checksum + * will always work. + */ + pkt[pkt_len] = 0; + + net_copy_ip((void *)&b->ph.p_src, &src); + net_copy_ip((void *)&b->ph.p_dst, &dest); + b->ph.rsvd = 0; + b->ph.p = IPPROTO_TCP; + b->ph.len = htons(tcp_len); + checksum_len = tcp_len + PSEUDO_HDR_SIZE; + + debug_cond(DEBUG_DEV_PKT, + "TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n", + &b->ph.p_dst, &b->ph.p_src, checksum_len); + + return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len); +} + +/** + * net_set_ack_options() - set TCP options in acknowledge packets + * @b: the packet + * + * Return: TCP header length + */ +int net_set_ack_options(union tcp_build_pkt *b) +{ + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + + b->sack.t_opt.kind = TCP_O_TS; + b->sack.t_opt.len = TCP_OPT_LEN_A; + b->sack.t_opt.t_snd = htons(loc_timestamp); + b->sack.t_opt.t_rcv = rmt_timestamp; + b->sack.sack_v.kind = TCP_1_NOP; + b->sack.sack_v.len = 0; + + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + if (tcp_lost.len > TCP_OPT_LEN_2) { + debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", + tcp_lost.len); + b->sack.sack_v.len = tcp_lost.len; + b->sack.sack_v.kind = TCP_V_SACK; + b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); + b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); + + /* + * These SACK structures are initialized with NOPs to + * provide TCP header alignment padding. There are 4 + * SACK structures used for both header padding and + * internally. + */ + b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); + b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); + b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); + b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); + b->sack.sack_v.hill[3].l = TCP_O_NOP; + b->sack.sack_v.hill[3].r = TCP_O_NOP; + } + + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + + TCP_TSOPT_SIZE + + tcp_lost.len)); + } else { + b->sack.sack_v.kind = 0; + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + + TCP_TSOPT_SIZE)); + } + + /* + * This returns the actual rounded up length of the + * TCP header to add to the total packet length + */ + + return GET_TCP_HDR_LEN_IN_BYTES(b->sack.hdr.tcp_hlen); +} + +/** + * net_set_ack_options() - set TCP options in SYN packets + * @b: the packet + */ +void net_set_syn_options(union tcp_build_pkt *b) +{ + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len = 0; + + b->ip.hdr.tcp_hlen = 0xa0; + + b->ip.mss.kind = TCP_O_MSS; + b->ip.mss.len = TCP_OPT_LEN_4; + b->ip.mss.mss = htons(TCP_MSS); + b->ip.scale.kind = TCP_O_SCL; + b->ip.scale.scale = TCP_SCALE; + b->ip.scale.len = TCP_OPT_LEN_3; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + b->ip.sack_p.kind = TCP_P_SACK; + b->ip.sack_p.len = TCP_OPT_LEN_2; + } else { + b->ip.sack_p.kind = TCP_1_NOP; + b->ip.sack_p.len = TCP_1_NOP; + } + b->ip.t_opt.kind = TCP_O_TS; + b->ip.t_opt.len = TCP_OPT_LEN_A; + loc_timestamp = get_ticks(); + rmt_timestamp = 0; + b->ip.t_opt.t_snd = 0; + b->ip.t_opt.t_rcv = 0; + b->ip.end = TCP_O_END; +} + +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int pkt_hdr_len; + int pkt_len; + int tcp_len; + + /* + * Header: 5 32 bit words. 4 bits TCP header Length, + * 4 bits reserved options + */ + b->ip.hdr.tcp_flags = action; + pkt_hdr_len = IP_TCP_HDR_SIZE; + b->ip.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + + switch (action) { + case TCP_SYN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num); + tcp_activity_count = 0; + net_set_syn_options(b); + tcp_seq_num = 0; + tcp_ack_num = 0; + pkt_hdr_len = IP_TCP_O_SIZE; + if (current_tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ + action = TCP_FIN; + current_tcp_state = TCP_FIN_WAIT_1; + } else { + current_tcp_state = TCP_SYN_SENT; + } + break; + case TCP_ACK: + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, + action); + break; + case TCP_FIN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); + payload_len = 0; + pkt_hdr_len = IP_TCP_HDR_SIZE; + current_tcp_state = TCP_FIN_WAIT_1; + break; + + /* Notify connection closing */ + + case (TCP_FIN | TCP_ACK): + case (TCP_FIN | TCP_ACK | TCP_PUSH): + if (current_tcp_state == TCP_CLOSE_WAIT) + current_tcp_state = TCP_CLOSING; + + tcp_ack_edge++; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_edge, action); + fallthrough; + default: + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num, action); + } + + pkt_len = pkt_hdr_len + payload_len; + tcp_len = pkt_len - IP_HDR_SIZE; + + /* TCP Header */ + b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); + b->ip.hdr.tcp_src = htons(sport); + b->ip.hdr.tcp_dst = htons(dport); + b->ip.hdr.tcp_seq = htonl(tcp_seq_num); + tcp_seq_num = tcp_seq_num + payload_len; + + /* + * TCP window size - TCP header variable tcp_win. + * Change tcp_win only if you have an understanding of network + * overrun, congestion, TCP segment sizes, TCP windows, TCP scale, + * queuing theory and packet buffering. If there are too few buffers, + * there will be data loss, recovery may work or the sending TCP, + * the server, could abort the stream transmission. + * MSS is governed by maximum Ethernet frame length. + * The number of buffers is governed by the desire to have a queue of + * full buffers to be processed at the destination to maximize + * throughput. Temporary memory use for the boot phase on modern + * SOCs is may not be considered a constraint to buffer space, if + * it is, then the u-boot tftp or nfs kernel netboot should be + * considered. + */ + b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + + b->ip.hdr.tcp_xsum = 0; + b->ip.hdr.tcp_ugr = 0; + + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, + tcp_len, pkt_len); + + net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, + pkt_len, IPPROTO_TCP); + + return pkt_hdr_len; +} + +/** + * tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer) + * @tcp_seq_num: TCP sequence start number + * @len: the length of sequence numbers + * @tcp_seq_max: maximum of sequence numbers + */ +void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) +{ + u32 idx_sack, sack_in; + u32 sack_end = TCP_SACK - 1; + u32 hill = 0; + enum pkt_state expect = PKT; + u32 seq = tcp_seq_num - tcp_seq_init; + u32 hol_l = tcp_ack_edge - tcp_seq_init; + u32 hol_r = 0; + + /* Place new seq number in correct place in receive array */ + if (prev_len == 0) + prev_len = len; + + idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); + if (idx_sack < TCP_SACK) { + edge_a[idx_sack].se.l = tcp_seq_num; + edge_a[idx_sack].se.r = tcp_seq_num + len; + edge_a[idx_sack].st = PKT; + + /* + * The fin (last) packet is not the same length as data + * packets, and if it's length is recorded and used for + * array index calculation, calculation breaks. + */ + if (prev_len < len) + prev_len = len; + } + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", + seq, hol_l, len, sack_idx, sack_end); + + /* Right edge of contiguous stream, is the left edge of first hill */ + hol_l = tcp_seq_num - tcp_seq_init; + hol_r = hol_l + len; + + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len = TCP_OPT_LEN_2; + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", + idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); + + for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; + sack_in++) { + switch (expect) { + case NOPKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "N"); + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "n"); + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + tcp_lost.hill[hill].l = + edge_a[sack_in].se.l; + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + } + expect = PKT; + break; + } + break; + case PKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "p"); + if (sack_in > sack_idx && + hill < TCP_SACK_HILLS) { + hill++; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len += TCP_OPT_LEN_8; + } + expect = NOPKT; + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "P"); + + if (tcp_ack_edge == edge_a[sack_in].se.l) { + tcp_ack_edge = edge_a[sack_in].se.r; + edge_a[sack_in].st = NOPKT; + sack_idx++; + } else { + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && + hill < TCP_SACK_HILLS) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && + sack_in == sack_end - 1) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + } + break; + } + break; + } + } + debug_cond(DEBUG_INT_STATE, "\n"); + if (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || tcp_lost.len <= TCP_OPT_LEN_2) + sack_idx = 0; +} + +/** + * tcp_parse_options() - parsing TCP options + * @o: pointer to the option field. + * @o_len: length of the option field. + */ +void tcp_parse_options(uchar *o, int o_len) +{ + struct tcp_t_opt *tsopt; + uchar *p = o; + + /* + * NOPs are options with a zero length, and thus are special. + * All other options have length fields. + */ + for (p = o; p < (o + o_len); p = p + p[1]) { + if (!p[1]) + return; /* Finished processing options */ + + switch (p[0]) { + case TCP_O_END: + return; + case TCP_O_MSS: + case TCP_O_SCL: + case TCP_P_SACK: + case TCP_V_SACK: + break; + case TCP_O_TS: + tsopt = (struct tcp_t_opt *)p; + rmt_timestamp = tsopt->t_snd; + return; + } + + /* Process optional NOPs */ + if (p[0] == TCP_O_NOP) + p++; + } +} + +static u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) +{ + u8 tcp_fin = tcp_flags & TCP_FIN; + u8 tcp_syn = tcp_flags & TCP_SYN; + u8 tcp_rst = tcp_flags & TCP_RST; + u8 tcp_push = tcp_flags & TCP_PUSH; + u8 tcp_ack = tcp_flags & TCP_ACK; + u8 action = TCP_DATA; + int i; + + /* + * tcp_flags are examined to determine TX action in a given state + * tcp_push is interpreted to mean "inform the app" + * urg, ece, cer and nonce flags are not supported. + * + * exe and crw are use to signal and confirm knowledge of congestion. + * This TCP only sends a file request and acks. If it generates + * congestion, the network is broken. + */ + debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); + if (tcp_rst) { + action = TCP_DATA; + current_tcp_state = TCP_CLOSED; + net_set_state(NETLOOP_FAIL); + debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); + return TCP_RST; + } + + switch (current_tcp_state) { + case TCP_CLOSED: + debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); + if (tcp_ack) + action = TCP_DATA; + else if (tcp_syn) + action = TCP_RST; + else if (tcp_fin) + action = TCP_DATA; + break; + case TCP_SYN_SENT: + debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n", + tcp_flags, *tcp_seq_num); + if (tcp_fin) { + action = action | TCP_PUSH; + current_tcp_state = TCP_CLOSE_WAIT; + } + if (tcp_syn) { + action = action | TCP_ACK | TCP_PUSH; + if (tcp_ack) { + tcp_seq_init = *tcp_seq_num; + *tcp_seq_num = *tcp_seq_num + 1; + tcp_seq_max = *tcp_seq_num; + tcp_ack_edge = *tcp_seq_num; + sack_idx = 0; + edge_a[sack_idx].se.l = *tcp_seq_num; + edge_a[sack_idx].se.r = *tcp_seq_num; + prev_len = 0; + current_tcp_state = TCP_ESTABLISHED; + for (i = 0; i < TCP_SACK; i++) + edge_a[i].st = NOPKT; + } + } else if (tcp_ack) { + action = TCP_DATA; + } + + break; + case TCP_ESTABLISHED: + debug_cond(DEBUG_INT_STATE, "TCP_ESTABLISHED %x\n", tcp_flags); + if (*tcp_seq_num > tcp_seq_max) + tcp_seq_max = *tcp_seq_num; + if (payload_len > 0) { + tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max); + tcp_fin = TCP_DATA; /* cause standalone FIN */ + } + + if ((tcp_fin) && + (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || + tcp_lost.len <= TCP_OPT_LEN_2)) { + action = action | TCP_FIN | TCP_PUSH | TCP_ACK; + current_tcp_state = TCP_CLOSE_WAIT; + } else if (tcp_ack) { + action = TCP_DATA; + } + + if (tcp_syn) + action = TCP_ACK + TCP_RST; + else if (tcp_push) + action = action | TCP_PUSH; + break; + case TCP_CLOSE_WAIT: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); + action = TCP_DATA; + break; + case TCP_FIN_WAIT_2: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); + if (tcp_ack) { + action = TCP_PUSH | TCP_ACK; + current_tcp_state = TCP_CLOSED; + puts("\n"); + } else if (tcp_syn) { + action = TCP_DATA; + } else if (tcp_fin) { + action = TCP_DATA; + } + break; + case TCP_FIN_WAIT_1: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); + if (tcp_fin) { + action = TCP_ACK | TCP_FIN; + current_tcp_state = TCP_FIN_WAIT_2; + } + if (tcp_syn) + action = TCP_RST; + if (tcp_ack) { + current_tcp_state = TCP_CLOSED; + tcp_seq_num = tcp_seq_num + 1; + } + break; + case TCP_CLOSING: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); + if (tcp_ack) { + action = TCP_PUSH; + current_tcp_state = TCP_CLOSED; + puts("\n"); + } else if (tcp_syn) { + action = TCP_RST; + } else if (tcp_fin) { + action = TCP_DATA; + } + break; + } + return action; +} + +/** + * rxhand_tcp_f() - process receiving data and call data handler. + * @b: the packet + * @pkt_len: the length of packet. + */ +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) +{ + int tcp_len = pkt_len - IP_HDR_SIZE; + u16 tcp_rx_xsum = b->ip.hdr.ip_sum; + u8 tcp_action = TCP_DATA; + u32 tcp_seq_num, tcp_ack_num; + struct in_addr action_and_state; + int tcp_hdr_len, payload_len; + + /* Verify IP header */ + debug_cond(DEBUG_DEV_PKT, + "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", + &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); + + b->ip.hdr.ip_src = net_server_ip; + b->ip.hdr.ip_dst = net_ip; + b->ip.hdr.ip_sum = 0; + if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", + &net_ip, &net_server_ip, pkt_len); + return; + } + + /* Build pseudo header and verify TCP header */ + tcp_rx_xsum = b->ip.hdr.tcp_xsum; + b->ip.hdr.tcp_xsum = 0; + if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src, + b->ip.hdr.ip_dst, tcp_len, + pkt_len)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", + &net_ip, &net_server_ip, tcp_len); + return; + } + + tcp_hdr_len = GET_TCP_HDR_LEN_IN_BYTES(b->ip.hdr.tcp_hlen); + payload_len = tcp_len - tcp_hdr_len; + + if (tcp_hdr_len > TCP_HDR_SIZE) + tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, + tcp_hdr_len - TCP_HDR_SIZE); + /* + * Incoming sequence and ack numbers are server's view of the numbers. + * The app must swap the numbers when responding. + */ + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); + + /* Packets are not ordered. Send to app as received. */ + tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, + &tcp_seq_num, payload_len); + + tcp_activity_count++; + if (tcp_activity_count > TCP_ACTIVITY) { + puts("| "); + tcp_activity_count = 0; + } + + if ((tcp_action & TCP_PUSH) || payload_len > 0) { + debug_cond(DEBUG_DEV_PKT, + "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + action_and_state.s_addr = tcp_action; + (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, + tcp_seq_num, action_and_state, + tcp_ack_num, payload_len); + + } else if (tcp_action != TCP_DATA) { + debug_cond(DEBUG_DEV_PKT, + "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + /* + * Warning: Incoming Ack & Seq sequence numbers are transposed + * here to outgoing Seq & Ack sequence numbers + */ + net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), + ntohs(b->ip.hdr.tcp_dst), + (tcp_action & (~TCP_PUSH)), + tcp_seq_num, tcp_ack_num); + } +}