@@ -36,6 +36,7 @@ struct qdisc_rate_table {
enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
+ __QDISC_STATE_MISSED,
};
struct qdisc_size_table {
@@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
+ if (spin_trylock(&qdisc->seqlock))
+ goto nolock_empty;
+
+ /* If the MISSED flag is set, it means other thread has
+ * set the MISSED flag before second spin_trylock(), so
+ * we can return false here to avoid multi cpus doing
+ * the set_bit() and second spin_trylock() concurrently.
+ */
+ if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+ return false;
+
+ /* Set the MISSED flag before the second spin_trylock(),
+ * if the second spin_trylock() return false, it means
+ * other cpu holding the lock will do dequeuing for us
+ * or it will see the MISSED flag set after releasing
+ * lock and reschedule the net_tx_action() to do the
+ * dequeuing.
+ */
+ set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+ /* Retry again in case other CPU may not see the new flag
+ * after it releases the lock at the end of qdisc_run_end().
+ */
if (!spin_trylock(&qdisc->seqlock))
return false;
+
+nolock_empty:
WRITE_ONCE(qdisc->empty, false);
} else if (qdisc_is_running(qdisc)) {
return false;
@@ -176,8 +202,13 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
write_seqcount_end(&qdisc->running);
- if (qdisc->flags & TCQ_F_NOLOCK)
+ if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
+
+ if (unlikely(test_bit(__QDISC_STATE_MISSED,
+ &qdisc->state)))
+ __netif_schedule(qdisc);
+ }
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct sk_buff *skb = NULL;
+ bool need_retry = true;
int band;
+retry:
for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
struct skb_array *q = band2list(priv, band);
@@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
}
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
+ } else if (need_retry &&
+ test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+ /* Delay clearing the STATE_MISSED here to reduce
+ * the overhead of the second spin_trylock() in
+ * qdisc_run_begin() and __netif_schedule() calling
+ * in qdisc_run_end().
+ */
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+ /* Make sure dequeuing happens after clearing
+ * STATE_MISSED.
+ */
+ smp_mb__after_atomic();
+
+ need_retry = false;
+
+ goto retry;
} else {
WRITE_ONCE(qdisc->empty, true);
}