Message ID | 20210129181812.256216-3-weiwan@google.com |
---|---|
State | Superseded |
Headers | show |
Series | implement kthread based napi poll | expand |
On Fri, Jan 29, 2021 at 10:22 AM Wei Wang <weiwan@google.com> wrote: > > This patch allows running each napi poll loop inside its own > kernel thread. > The kthread is created during netif_napi_add() if dev->threaded > is set. And threaded mode is enabled in napi_enable(). We will > provide a way to set dev->threaded and enable threaded mode > without a device up/down in the following patch. > > Once that threaded mode is enabled and the kthread is > started, napi_schedule() will wake-up such thread instead > of scheduling the softirq. > > The threaded poll loop behaves quite likely the net_rx_action, > but it does not have to manipulate local irqs and uses > an explicit scheduling point based on netdev_budget. > > Co-developed-by: Paolo Abeni <pabeni@redhat.com> > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Co-developed-by: Jakub Kicinski <kuba@kernel.org> > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > Signed-off-by: Wei Wang <weiwan@google.com> > --- > include/linux/netdevice.h | 21 +++---- > net/core/dev.c | 117 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 124 insertions(+), 14 deletions(-) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 02dcef4d66e2..f1e9fe9017ac 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -347,6 +347,7 @@ struct napi_struct { > struct list_head dev_list; > struct hlist_node napi_hash_node; > unsigned int napi_id; > + struct task_struct *thread; > }; > > enum { > @@ -358,6 +359,7 @@ enum { > NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ > NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ > NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ > + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ > }; > > enum { > @@ -369,6 +371,7 @@ enum { > NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), > NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), > NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), > + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), > }; > > enum gro_result { > @@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n) > */ > void napi_disable(struct napi_struct *n); > > -/** > - * napi_enable - enable NAPI scheduling > - * @n: NAPI context > - * > - * Resume NAPI from being scheduled on this context. > - * Must be paired with napi_disable. > - */ > -static inline void napi_enable(struct napi_struct *n) > -{ > - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); > - smp_mb__before_atomic(); > - clear_bit(NAPI_STATE_SCHED, &n->state); > - clear_bit(NAPI_STATE_NPSVC, &n->state); > -} > +void napi_enable(struct napi_struct *n); > > /** > * napi_synchronize - wait until NAPI is not running > @@ -1826,6 +1816,8 @@ enum netdev_priv_flags { > * > * @wol_enabled: Wake-on-LAN is enabled > * > + * @threaded: napi threaded mode is enabled > + * > * @net_notifier_list: List of per-net netdev notifier block > * that follow this device when it is moved > * to another network namespace. > @@ -2143,6 +2135,7 @@ struct net_device { > struct lock_class_key *qdisc_running_key; > bool proto_down; > unsigned wol_enabled:1; > + unsigned threaded:1; > > struct list_head net_notifier_list; > > diff --git a/net/core/dev.c b/net/core/dev.c > index 7d23bff03864..743dd69fba19 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -91,6 +91,7 @@ > #include <linux/etherdevice.h> > #include <linux/ethtool.h> > #include <linux/skbuff.h> > +#include <linux/kthread.h> > #include <linux/bpf.h> > #include <linux/bpf_trace.h> > #include <net/net_namespace.h> > @@ -1493,6 +1494,37 @@ void netdev_notify_peers(struct net_device *dev) > } > EXPORT_SYMBOL(netdev_notify_peers); > > +static int napi_threaded_poll(void *data); > + > +static int napi_kthread_create(struct napi_struct *n) > +{ > + int err = 0; > + > + /* Create and wake up the kthread once to put it in > + * TASK_INTERRUPTIBLE mode to avoid the blocked task > + * warning and work with loadavg. > + */ > + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", > + n->dev->name, n->napi_id); > + if (IS_ERR(n->thread)) { > + err = PTR_ERR(n->thread); > + pr_err("kthread_run failed with err %d\n", err); > + n->thread = NULL; > + } > + > + return err; > +} > + > +static void napi_kthread_stop(struct napi_struct *n) > +{ > + if (!n->thread) > + return; > + > + kthread_stop(n->thread); > + clear_bit(NAPI_STATE_THREADED, &n->state); > + n->thread = NULL; > +} > + So I think the napi_kthread_stop should also be split into two parts and distributed between the napi_disable and netif_napi_del functions. We should probably be clearing the NAPI_STATE_THREADED bit in napi_disable, and freeing the thread in netif_napi_del. > static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) > { > const struct net_device_ops *ops = dev->netdev_ops; > @@ -4252,6 +4284,21 @@ int gro_normal_batch __read_mostly = 8; > static inline void ____napi_schedule(struct softnet_data *sd, > struct napi_struct *napi) > { > + struct task_struct *thread; > + > + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { > + /* Paired with smp_mb__before_atomic() in > + * napi_enable(). Use READ_ONCE() to guarantee > + * a complete read on napi->thread. Only call > + * wake_up_process() when it's not NULL. > + */ > + thread = READ_ONCE(napi->thread); > + if (thread) { > + wake_up_process(thread); > + return; > + } > + } > + > list_add_tail(&napi->poll_list, &sd->poll_list); > __raise_softirq_irqoff(NET_RX_SOFTIRQ); > } > @@ -6720,6 +6767,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, > set_bit(NAPI_STATE_NPSVC, &napi->state); > list_add_rcu(&napi->dev_list, &dev->napi_list); > napi_hash_add(napi); > + /* Create kthread for this napi if dev->threaded is set. > + * Clear dev->threaded if kthread creation failed so that > + * threaded mode will not be enabled in napi_enable(). > + */ > + if (dev->threaded && napi_kthread_create(napi)) > + dev->threaded = 0; > } > EXPORT_SYMBOL(netif_napi_add); > > @@ -6734,12 +6787,31 @@ void napi_disable(struct napi_struct *n) > msleep(1); > > hrtimer_cancel(&n->timer); > + napi_kthread_stop(n); > So I think there may be an issue here since we had netif_napi_add create the thread, but you are freeing it in napi_kthread_stop if I am not mistaken. That is why I suggested making this only a clear_bit call like the ones below to just clear the threaded flag from the state. > clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); > clear_bit(NAPI_STATE_DISABLE, &n->state); > } > EXPORT_SYMBOL(napi_disable); > > +/** > + * napi_enable - enable NAPI scheduling > + * @n: NAPI context > + * > + * Resume NAPI from being scheduled on this context. > + * Must be paired with napi_disable. > + */ > +void napi_enable(struct napi_struct *n) > +{ > + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); > + smp_mb__before_atomic(); > + clear_bit(NAPI_STATE_SCHED, &n->state); > + clear_bit(NAPI_STATE_NPSVC, &n->state); > + if (n->dev->threaded && n->thread) > + set_bit(NAPI_STATE_THREADED, &n->state); > +} > +EXPORT_SYMBOL(napi_enable); > + > static void flush_gro_hash(struct napi_struct *napi) > { > int i; > @@ -6862,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) > return work; > } > > +static int napi_thread_wait(struct napi_struct *napi) > +{ > + set_current_state(TASK_INTERRUPTIBLE); > + > + while (!kthread_should_stop() && !napi_disable_pending(napi)) { > + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { > + WARN_ON(!list_empty(&napi->poll_list)); > + __set_current_state(TASK_RUNNING); > + return 0; > + } > + > + schedule(); > + set_current_state(TASK_INTERRUPTIBLE); > + } > + __set_current_state(TASK_RUNNING); > + return -1; > +} > + > +static int napi_threaded_poll(void *data) > +{ > + struct napi_struct *napi = data; > + void *have; > + > + while (!napi_thread_wait(napi)) { > + for (;;) { > + bool repoll = false; > + > + local_bh_disable(); > + > + have = netpoll_poll_lock(napi); > + __napi_poll(napi, &repoll); > + netpoll_poll_unlock(have); > + > + __kfree_skb_flush(); > + local_bh_enable(); > + > + if (!repoll) > + break; > + > + cond_resched(); > + } > + } > + return 0; > +} > + > static __latent_entropy void net_rx_action(struct softirq_action *h) > { > struct softnet_data *sd = this_cpu_ptr(&softnet_data); > -- > 2.30.0.365.g02bc693789-goog >
On Wed, Feb 3, 2021 at 9:20 AM Alexander Duyck <alexander.duyck@gmail.com> wrote: > > On Fri, Jan 29, 2021 at 10:22 AM Wei Wang <weiwan@google.com> wrote: > > > > This patch allows running each napi poll loop inside its own > > kernel thread. > > The kthread is created during netif_napi_add() if dev->threaded > > is set. And threaded mode is enabled in napi_enable(). We will > > provide a way to set dev->threaded and enable threaded mode > > without a device up/down in the following patch. > > > > Once that threaded mode is enabled and the kthread is > > started, napi_schedule() will wake-up such thread instead > > of scheduling the softirq. > > > > The threaded poll loop behaves quite likely the net_rx_action, > > but it does not have to manipulate local irqs and uses > > an explicit scheduling point based on netdev_budget. > > > > Co-developed-by: Paolo Abeni <pabeni@redhat.com> > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > > Co-developed-by: Jakub Kicinski <kuba@kernel.org> > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > > Signed-off-by: Wei Wang <weiwan@google.com> > > --- > > include/linux/netdevice.h | 21 +++---- > > net/core/dev.c | 117 ++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 124 insertions(+), 14 deletions(-) > > > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > > index 02dcef4d66e2..f1e9fe9017ac 100644 > > --- a/include/linux/netdevice.h > > +++ b/include/linux/netdevice.h > > @@ -347,6 +347,7 @@ struct napi_struct { > > struct list_head dev_list; > > struct hlist_node napi_hash_node; > > unsigned int napi_id; > > + struct task_struct *thread; > > }; > > > > enum { > > @@ -358,6 +359,7 @@ enum { > > NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ > > NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ > > NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ > > + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ > > }; > > > > enum { > > @@ -369,6 +371,7 @@ enum { > > NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), > > NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), > > NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), > > + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), > > }; > > > > enum gro_result { > > @@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n) > > */ > > void napi_disable(struct napi_struct *n); > > > > -/** > > - * napi_enable - enable NAPI scheduling > > - * @n: NAPI context > > - * > > - * Resume NAPI from being scheduled on this context. > > - * Must be paired with napi_disable. > > - */ > > -static inline void napi_enable(struct napi_struct *n) > > -{ > > - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); > > - smp_mb__before_atomic(); > > - clear_bit(NAPI_STATE_SCHED, &n->state); > > - clear_bit(NAPI_STATE_NPSVC, &n->state); > > -} > > +void napi_enable(struct napi_struct *n); > > > > /** > > * napi_synchronize - wait until NAPI is not running > > @@ -1826,6 +1816,8 @@ enum netdev_priv_flags { > > * > > * @wol_enabled: Wake-on-LAN is enabled > > * > > + * @threaded: napi threaded mode is enabled > > + * > > * @net_notifier_list: List of per-net netdev notifier block > > * that follow this device when it is moved > > * to another network namespace. > > @@ -2143,6 +2135,7 @@ struct net_device { > > struct lock_class_key *qdisc_running_key; > > bool proto_down; > > unsigned wol_enabled:1; > > + unsigned threaded:1; > > > > struct list_head net_notifier_list; > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > index 7d23bff03864..743dd69fba19 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -91,6 +91,7 @@ > > #include <linux/etherdevice.h> > > #include <linux/ethtool.h> > > #include <linux/skbuff.h> > > +#include <linux/kthread.h> > > #include <linux/bpf.h> > > #include <linux/bpf_trace.h> > > #include <net/net_namespace.h> > > @@ -1493,6 +1494,37 @@ void netdev_notify_peers(struct net_device *dev) > > } > > EXPORT_SYMBOL(netdev_notify_peers); > > > > +static int napi_threaded_poll(void *data); > > + > > +static int napi_kthread_create(struct napi_struct *n) > > +{ > > + int err = 0; > > + > > + /* Create and wake up the kthread once to put it in > > + * TASK_INTERRUPTIBLE mode to avoid the blocked task > > + * warning and work with loadavg. > > + */ > > + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", > > + n->dev->name, n->napi_id); > > + if (IS_ERR(n->thread)) { > > + err = PTR_ERR(n->thread); > > + pr_err("kthread_run failed with err %d\n", err); > > + n->thread = NULL; > > + } > > + > > + return err; > > +} > > + > > +static void napi_kthread_stop(struct napi_struct *n) > > +{ > > + if (!n->thread) > > + return; > > + > > + kthread_stop(n->thread); > > + clear_bit(NAPI_STATE_THREADED, &n->state); > > + n->thread = NULL; > > +} > > + > > So I think the napi_kthread_stop should also be split into two parts > and distributed between the napi_disable and netif_napi_del functions. > > We should probably be clearing the NAPI_STATE_THREADED bit in > napi_disable, and freeing the thread in netif_napi_del. > > > static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) > > { > > const struct net_device_ops *ops = dev->netdev_ops; > > @@ -4252,6 +4284,21 @@ int gro_normal_batch __read_mostly = 8; > > static inline void ____napi_schedule(struct softnet_data *sd, > > struct napi_struct *napi) > > { > > + struct task_struct *thread; > > + > > + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { > > + /* Paired with smp_mb__before_atomic() in > > + * napi_enable(). Use READ_ONCE() to guarantee > > + * a complete read on napi->thread. Only call > > + * wake_up_process() when it's not NULL. > > + */ > > + thread = READ_ONCE(napi->thread); > > + if (thread) { > > + wake_up_process(thread); > > + return; > > + } > > + } > > + > > list_add_tail(&napi->poll_list, &sd->poll_list); > > __raise_softirq_irqoff(NET_RX_SOFTIRQ); > > } > > @@ -6720,6 +6767,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, > > set_bit(NAPI_STATE_NPSVC, &napi->state); > > list_add_rcu(&napi->dev_list, &dev->napi_list); > > napi_hash_add(napi); > > + /* Create kthread for this napi if dev->threaded is set. > > + * Clear dev->threaded if kthread creation failed so that > > + * threaded mode will not be enabled in napi_enable(). > > + */ > > + if (dev->threaded && napi_kthread_create(napi)) > > + dev->threaded = 0; > > } > > EXPORT_SYMBOL(netif_napi_add); > > > > @@ -6734,12 +6787,31 @@ void napi_disable(struct napi_struct *n) > > msleep(1); > > > > hrtimer_cancel(&n->timer); > > + napi_kthread_stop(n); > > > > So I think there may be an issue here since we had netif_napi_add > create the thread, but you are freeing it in napi_kthread_stop if I am > not mistaken. That is why I suggested making this only a clear_bit > call like the ones below to just clear the threaded flag from the > state. Makes sense. I will split napi_kthread_stop(). > > > clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); > > clear_bit(NAPI_STATE_DISABLE, &n->state); > > } > > EXPORT_SYMBOL(napi_disable); > > > > +/** > > + * napi_enable - enable NAPI scheduling > > + * @n: NAPI context > > + * > > + * Resume NAPI from being scheduled on this context. > > + * Must be paired with napi_disable. > > + */ > > +void napi_enable(struct napi_struct *n) > > +{ > > + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); > > + smp_mb__before_atomic(); > > + clear_bit(NAPI_STATE_SCHED, &n->state); > > + clear_bit(NAPI_STATE_NPSVC, &n->state); > > + if (n->dev->threaded && n->thread) > > + set_bit(NAPI_STATE_THREADED, &n->state); > > +} > > +EXPORT_SYMBOL(napi_enable); > > + > > static void flush_gro_hash(struct napi_struct *napi) > > { > > int i; > > @@ -6862,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) > > return work; > > } > > > > +static int napi_thread_wait(struct napi_struct *napi) > > +{ > > + set_current_state(TASK_INTERRUPTIBLE); > > + > > + while (!kthread_should_stop() && !napi_disable_pending(napi)) { > > + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { > > + WARN_ON(!list_empty(&napi->poll_list)); > > + __set_current_state(TASK_RUNNING); > > + return 0; > > + } > > + > > + schedule(); > > + set_current_state(TASK_INTERRUPTIBLE); > > + } > > + __set_current_state(TASK_RUNNING); > > + return -1; > > +} > > + > > +static int napi_threaded_poll(void *data) > > +{ > > + struct napi_struct *napi = data; > > + void *have; > > + > > + while (!napi_thread_wait(napi)) { > > + for (;;) { > > + bool repoll = false; > > + > > + local_bh_disable(); > > + > > + have = netpoll_poll_lock(napi); > > + __napi_poll(napi, &repoll); > > + netpoll_poll_unlock(have); > > + > > + __kfree_skb_flush(); > > + local_bh_enable(); > > + > > + if (!repoll) > > + break; > > + > > + cond_resched(); > > + } > > + } > > + return 0; > > +} > > + > > static __latent_entropy void net_rx_action(struct softirq_action *h) > > { > > struct softnet_data *sd = this_cpu_ptr(&softnet_data); > > -- > > 2.30.0.365.g02bc693789-goog > >
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02dcef4d66e2..f1e9fe9017ac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -347,6 +347,7 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; unsigned int napi_id; + struct task_struct *thread; }; enum { @@ -358,6 +359,7 @@ enum { NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ }; enum { @@ -369,6 +371,7 @@ enum { NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), }; enum gro_result { @@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n) */ void napi_disable(struct napi_struct *n); -/** - * napi_enable - enable NAPI scheduling - * @n: NAPI context - * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. - */ -static inline void napi_enable(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_atomic(); - clear_bit(NAPI_STATE_SCHED, &n->state); - clear_bit(NAPI_STATE_NPSVC, &n->state); -} +void napi_enable(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running @@ -1826,6 +1816,8 @@ enum netdev_priv_flags { * * @wol_enabled: Wake-on-LAN is enabled * + * @threaded: napi threaded mode is enabled + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2143,6 +2135,7 @@ struct net_device { struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; + unsigned threaded:1; struct list_head net_notifier_list; diff --git a/net/core/dev.c b/net/core/dev.c index 7d23bff03864..743dd69fba19 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -91,6 +91,7 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/skbuff.h> +#include <linux/kthread.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <net/net_namespace.h> @@ -1493,6 +1494,37 @@ void netdev_notify_peers(struct net_device *dev) } EXPORT_SYMBOL(netdev_notify_peers); +static int napi_threaded_poll(void *data); + +static int napi_kthread_create(struct napi_struct *n) +{ + int err = 0; + + /* Create and wake up the kthread once to put it in + * TASK_INTERRUPTIBLE mode to avoid the blocked task + * warning and work with loadavg. + */ + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", + n->dev->name, n->napi_id); + if (IS_ERR(n->thread)) { + err = PTR_ERR(n->thread); + pr_err("kthread_run failed with err %d\n", err); + n->thread = NULL; + } + + return err; +} + +static void napi_kthread_stop(struct napi_struct *n) +{ + if (!n->thread) + return; + + kthread_stop(n->thread); + clear_bit(NAPI_STATE_THREADED, &n->state); + n->thread = NULL; +} + static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; @@ -4252,6 +4284,21 @@ int gro_normal_batch __read_mostly = 8; static inline void ____napi_schedule(struct softnet_data *sd, struct napi_struct *napi) { + struct task_struct *thread; + + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { + /* Paired with smp_mb__before_atomic() in + * napi_enable(). Use READ_ONCE() to guarantee + * a complete read on napi->thread. Only call + * wake_up_process() when it's not NULL. + */ + thread = READ_ONCE(napi->thread); + if (thread) { + wake_up_process(thread); + return; + } + } + list_add_tail(&napi->poll_list, &sd->poll_list); __raise_softirq_irqoff(NET_RX_SOFTIRQ); } @@ -6720,6 +6767,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); + /* Create kthread for this napi if dev->threaded is set. + * Clear dev->threaded if kthread creation failed so that + * threaded mode will not be enabled in napi_enable(). + */ + if (dev->threaded && napi_kthread_create(napi)) + dev->threaded = 0; } EXPORT_SYMBOL(netif_napi_add); @@ -6734,12 +6787,31 @@ void napi_disable(struct napi_struct *n) msleep(1); hrtimer_cancel(&n->timer); + napi_kthread_stop(n); clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); clear_bit(NAPI_STATE_DISABLE, &n->state); } EXPORT_SYMBOL(napi_disable); +/** + * napi_enable - enable NAPI scheduling + * @n: NAPI context + * + * Resume NAPI from being scheduled on this context. + * Must be paired with napi_disable. + */ +void napi_enable(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + smp_mb__before_atomic(); + clear_bit(NAPI_STATE_SCHED, &n->state); + clear_bit(NAPI_STATE_NPSVC, &n->state); + if (n->dev->threaded && n->thread) + set_bit(NAPI_STATE_THREADED, &n->state); +} +EXPORT_SYMBOL(napi_enable); + static void flush_gro_hash(struct napi_struct *napi) { int i; @@ -6862,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) return work; } +static int napi_thread_wait(struct napi_struct *napi) +{ + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop() && !napi_disable_pending(napi)) { + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + WARN_ON(!list_empty(&napi->poll_list)); + __set_current_state(TASK_RUNNING); + return 0; + } + + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return -1; +} + +static int napi_threaded_poll(void *data) +{ + struct napi_struct *napi = data; + void *have; + + while (!napi_thread_wait(napi)) { + for (;;) { + bool repoll = false; + + local_bh_disable(); + + have = netpoll_poll_lock(napi); + __napi_poll(napi, &repoll); + netpoll_poll_unlock(have); + + __kfree_skb_flush(); + local_bh_enable(); + + if (!repoll) + break; + + cond_resched(); + } + } + return 0; +} + static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data);