@@ -28,6 +28,12 @@ extern "C" {
#include <odp/hints.h>
#include <net/if.h>
+/** Determine if a socket read/write error should be reported. Transient errors
+ * that simply require the caller to retry are ignored, the _send/_recv APIs
+ * are non-blocking and it is the caller's responsibility to retry if the
+ * requested number of packets were not handled. */
+#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e != EINTR)
+
/* Forward declaration */
struct pktio_if_ops;
@@ -373,9 +373,7 @@ static int sock_mmsg_send(pktio_entry_t *pktio_entry,
struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX][ODP_BUFFER_MAX_SEG];
int ret;
int sockfd;
- unsigned i;
- unsigned sent_msgs = 0;
- unsigned flags;
+ unsigned n, i;
if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
return -1;
@@ -389,17 +387,24 @@ static int sock_mmsg_send(pktio_entry_t *pktio_entry,
iovecs[i]);
}
- flags = MSG_DONTWAIT;
- for (i = 0; i < len; i += sent_msgs) {
- ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
- sent_msgs = ret > 0 ? (unsigned)ret : 0;
- flags = 0; /* blocking for next rounds */
+ for (i = 0; i < len; ) {
+ ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
+ if (odp_unlikely(ret <= -1)) {
+ if (i == 0 && SOCK_ERR_REPORT(errno)) {
+ __odp_errno = errno;
+ ODP_ERR("sendmmsg(): %s\n", strerror(errno));
+ return -1;
+ }
+ break;
+ }
+
+ i += ret;
}
- for (i = 0; i < len; i++)
- odp_packet_free(pkt_table[i]);
+ for (n = 0; n < i; ++n)
+ odp_packet_free(pkt_table[n]);
- return len;
+ return i;
}
/*
@@ -174,49 +174,70 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring,
{
union frame_map ppd;
uint32_t pkt_len;
- unsigned frame_num, next_frame_num;
+ unsigned first_frame_num, frame_num, frame_count;
int ret;
- unsigned i = 0;
+ uint8_t *buf;
+ unsigned n, i = 0;
+ unsigned nb_tx = 0;
+ int send_errno;
- frame_num = ring->frame_num;
+ first_frame_num = ring->frame_num;
+ frame_num = first_frame_num;
+ frame_count = ring->rd_num;
while (i < len) {
- if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
- ppd.raw = ring->rd[frame_num].iov_base;
+ ppd.raw = ring->rd[frame_num].iov_base;
+ if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
+ break;
- next_frame_num = (frame_num + 1) % ring->rd_num;
+ pkt_len = odp_packet_len(pkt_table[i]);
+ ppd.v2->tp_h.tp_snaplen = pkt_len;
+ ppd.v2->tp_h.tp_len = pkt_len;
- pkt_len = odp_packet_len(pkt_table[i]);
- ppd.v2->tp_h.tp_snaplen = pkt_len;
- ppd.v2->tp_h.tp_len = pkt_len;
+ buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN -
+ sizeof(struct sockaddr_ll);
+ odp_packet_copydata_out(pkt_table[i], 0, pkt_len, buf);
- odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
- (uint8_t *)ppd.raw +
- TPACKET2_HDRLEN -
- sizeof(struct sockaddr_ll));
+ mmap_tx_user_ready(ppd.raw);
- mmap_tx_user_ready(ppd.raw);
+ if (++frame_num >= frame_count)
+ frame_num = 0;
- odp_packet_free(pkt_table[i]);
- frame_num = next_frame_num;
- i++;
- } else {
+ i++;
+ }
+
+ ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
+ send_errno = errno;
+
+ /* On success, the return value indicates the number of bytes sent. On
+ * failure a value of -1 is returned, even if the failure occurred
+ * after some of the packets in the ring have already been sent, so we
+ * need to inspect the packet status to determine which were sent. */
+ for (n = first_frame_num; n < first_frame_num + i; ++n) {
+ struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
+
+ if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
+ nb_tx++;
+ } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
+ /* status will be cleared on the next send request */
break;
}
}
- ring->frame_num = frame_num;
+ ring->frame_num = (ring->frame_num + nb_tx) % frame_count;
- ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret == -1) {
- if (errno != EAGAIN) {
- __odp_errno = errno;
- ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno));
- return -1;
- }
+ if (odp_unlikely(ret == -1 &&
+ nb_tx == 0 &&
+ SOCK_ERR_REPORT(send_errno))) {
+ __odp_errno = send_errno;
+ ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
+ return -1;
}
- return i;
+ for (i = 0; i < nb_tx; ++i)
+ odp_packet_free(pkt_table[i]);
+
+ return nb_tx;
}
static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
@@ -260,21 +281,6 @@ static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
ring->flen = ring->req.tp_frame_size;
}
-static int mmap_set_packet_loss_discard(int sock)
-{
- int ret, discard = 1;
-
- ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
- sizeof(discard));
- if (ret == -1) {
- __odp_errno = errno;
- ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
static int mmap_setup_ring(int sock, struct ring *ring, int type,
odp_pool_t pool_hdl, int fanout)
{
@@ -284,12 +290,6 @@ static int mmap_setup_ring(int sock, struct ring *ring, int type,
ring->type = type;
ring->version = TPACKET_V2;
- if (type == PACKET_TX_RING) {
- ret = mmap_set_packet_loss_discard(sock);
- if (ret != 0)
- return -1;
- }
-
mmap_fill_ring(ring, pool_hdl, fanout);
ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req));
Errors which occur while sending packets via odp_pktio_send() aren't handled correctly or even consistently across the two socket based implementations. The problems being addressed are; - calls may block indefinitely in certain error conditions (mmsg) - packets may be freed and reported as being sent correctly even though they weren't really sent (mmap) - return value doesn't always accurately reflect number of packets sent - inconsistent use of __odp_errno Signed-off-by: Stuart Haslam <stuart.haslam@linaro.org> --- .../linux-generic/include/odp_packet_io_internal.h | 6 ++ platform/linux-generic/pktio/socket.c | 27 +++--- platform/linux-generic/pktio/socket_mmap.c | 96 +++++++++++----------- 3 files changed, 70 insertions(+), 59 deletions(-)