mbox series

[net-next,0/3] flow_offload: hardware offload of TC actions

Message ID 20210722091938.12956-1-simon.horman@corigine.com
Headers show
Series flow_offload: hardware offload of TC actions | expand

Message

Simon Horman July 22, 2021, 9:19 a.m. UTC
Baowen Zheng says:

Allow use of flow_indr_dev_register/flow_indr_dev_setup_offload to offload
tc actions independent of flows.

The motivation for this work is to prepare for using TC police action
instances to provide hardware offload of OVS metering feature - which calls
for policers that may be used by multiple flows and whose lifecycle is
independent of any flows that use them.

This patch includes basic changes to offload drivers to return EOPNOTSUPP
if this feature is used - it is not yet supported by any driver. 

Changes since RFC:
- Fix robot test failure.
- Change actions offload process in action add function rather than action
  init.
- Change actions offload delete process after tcf_del_notify to keep
  undeleted actions.
- Add process to update actions stats from hardware.

Baowen Zheng (3):
  flow_offload: allow user to offload tc action to net device
  flow_offload: add process to delete offloaded actions from net device
  flow_offload: add process to update action stats from hardware

 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c  |   2 +-
 .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |   3 +
 .../ethernet/netronome/nfp/flower/offload.c   |   3 +
 include/linux/netdevice.h                     |   1 +
 include/net/act_api.h                         |   1 +
 include/net/flow_offload.h                    |  15 ++
 include/net/pkt_cls.h                         |  20 +++
 net/core/flow_offload.c                       |  26 ++-
 net/sched/act_api.c                           | 162 +++++++++++++++++-
 net/sched/cls_api.c                           |  42 ++++-
 10 files changed, 264 insertions(+), 11 deletions(-)

Comments

Roi Dayan July 22, 2021, 12:24 p.m. UTC | #1
On 2021-07-22 12:19 PM, Simon Horman wrote:
> From: Baowen Zheng <baowen.zheng@corigine.com>
> 
> Use flow_indr_dev_register/flow_indr_dev_setup_offload to
> offload tc action.
> 
> We offload the tc action mainly for ovs meter configuration.
> Make some basic changes for different vendors to return EOPNOTSUPP.
> 
> We need to call tc_cleanup_flow_action to clean up tc action entry since
> in tc_setup_action, some actions may hold dev refcnt, especially the mirror
> action.
> 
> As per review from the RFC, the kernel test robot will fail to run, so
> we add CONFIG_NET_CLS_ACT control for the action offload.
> 
> Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
> Signed-off-by: Louis Peens <louis.peens@corigine.com>
> Signed-off-by: Simon Horman <simon.horman@corigine.com>
> ---
>   drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c  |  2 +-
>   .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  3 ++
>   .../ethernet/netronome/nfp/flower/offload.c   |  3 ++
>   include/linux/netdevice.h                     |  1 +
>   include/net/flow_offload.h                    | 15 ++++++++
>   include/net/pkt_cls.h                         | 15 ++++++++
>   net/core/flow_offload.c                       | 26 +++++++++++++-
>   net/sched/act_api.c                           | 33 +++++++++++++++++
>   net/sched/cls_api.c                           | 36 ++++++++++++++++---
>   9 files changed, 128 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> index 5e4429b14b8c..edbbf7b4df77 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> @@ -1951,7 +1951,7 @@ static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, v
>   				 void *data,
>   				 void (*cleanup)(struct flow_block_cb *block_cb))
>   {
> -	if (!bnxt_is_netdev_indr_offload(netdev))
> +	if (!netdev || !bnxt_is_netdev_indr_offload(netdev))
>   		return -EOPNOTSUPP;
>   
>   	switch (type) {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> index 059799e4f483..111daacc4cc3 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> @@ -486,6 +486,9 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *
>   			    void *data,
>   			    void (*cleanup)(struct flow_block_cb *block_cb))
>   {
> +	if (!netdev)
> +		return -EOPNOTSUPP;
> +
>   	switch (type) {
>   	case TC_SETUP_BLOCK:
>   		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
> diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
> index 2406d33356ad..88bbc86347b4 100644
> --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
> +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
> @@ -1869,6 +1869,9 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *
>   			    void *data,
>   			    void (*cleanup)(struct flow_block_cb *block_cb))
>   {
> +	if (!netdev)
> +		return -EOPNOTSUPP;
> +
>   	if (!nfp_fl_is_netdev_to_offload(netdev))
>   		return -EOPNOTSUPP;
>   
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 42f6f866d5f3..b138219baf6f 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -923,6 +923,7 @@ enum tc_setup_type {
>   	TC_SETUP_QDISC_TBF,
>   	TC_SETUP_QDISC_FIFO,
>   	TC_SETUP_QDISC_HTB,
> +	TC_SETUP_ACT,
>   };
>   
>   /* These structures hold the attributes of bpf state that are being passed
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 69c9eabf8325..26644596fd54 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -553,6 +553,21 @@ struct flow_cls_offload {
>   	u32 classid;
>   };
>   
> +enum flow_act_command {
> +	FLOW_ACT_REPLACE,
> +	FLOW_ACT_DESTROY,
> +	FLOW_ACT_STATS,
> +};
> +
> +struct flow_offload_action {
> +	struct netlink_ext_ack *extack;
> +	enum flow_act_command command;
> +	struct flow_stats stats;
> +	struct flow_action action;
> +};
> +
> +struct flow_offload_action *flow_action_alloc(unsigned int num_actions);
> +
>   static inline struct flow_rule *
>   flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
>   {
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index ec7823921bd2..cd4cf6b10f5d 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -266,6 +266,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
>   	for (; 0; (void)(i), (void)(a), (void)(exts))
>   #endif
>   
> +#define tcf_act_for_each_action(i, a, actions) \
> +	for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
> +
>   static inline void
>   tcf_exts_stats_update(const struct tcf_exts *exts,
>   		      u64 bytes, u64 packets, u64 drops, u64 lastuse,
> @@ -536,8 +539,19 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
>   	return ifindex == skb->skb_iif;
>   }
>   
> +#ifdef CONFIG_NET_CLS_ACT
>   int tc_setup_flow_action(struct flow_action *flow_action,
>   			 const struct tcf_exts *exts);
> +#else
> +static inline int tc_setup_flow_action(struct flow_action *flow_action,
> +				       const struct tcf_exts *exts)
> +{
> +		return 0;
> +}
> +#endif
> +
> +int tc_setup_action(struct flow_action *flow_action,
> +		    struct tc_action *actions[]);
>   void tc_cleanup_flow_action(struct flow_action *flow_action);
>   
>   int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
> @@ -558,6 +572,7 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
>   			  enum tc_setup_type type, void *type_data,
>   			  void *cb_priv, u32 *flags, unsigned int *in_hw_count);
>   unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
> +unsigned int tcf_act_num_actions(struct tc_action *actions[]);
>   
>   #ifdef CONFIG_NET_CLS_ACT
>   int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
> diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
> index 715b67f6c62f..0fa2f75cc9b3 100644
> --- a/net/core/flow_offload.c
> +++ b/net/core/flow_offload.c
> @@ -27,6 +27,27 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
>   }
>   EXPORT_SYMBOL(flow_rule_alloc);
>   
> +struct flow_offload_action *flow_action_alloc(unsigned int num_actions)
> +{
> +	struct flow_offload_action *fl_action;
> +	int i;
> +
> +	fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions),
> +			    GFP_KERNEL);
> +	if (!fl_action)
> +		return NULL;

Hi Simon,

Our automatic tests got a trace from flow_action_alloc()
introduced in this series.
I don't have specific commands right now but maybe its easy
to reproduce with option CONFIG_DEBUG_ATOMIC_SLEEP=y

fl_dump->fl_hw_update_stats->fl_hw_update_stats->tcf_exts_stats_update
   ->tcf_action_update_hw_stats->tcf_action_offload_cmd_pre->
     ->flow_action_alloc

Thanks,
Roi



[  761.008866] BUG: sleeping function called from invalid context at 
include/linux/sched/mm.h:201
[  761.011047] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 
20194, name: handler1
[  761.013052] INFO: lockdep is turned off.
[  761.014102] CPU: 1 PID: 20194 Comm: handler1 Not tainted 
5.14.0-rc1_2021_07_22_09_58_22_ #1
[  761.015891] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[  761.017190] Call Trace:
[  761.017556]  dump_stack_lvl+0x57/0x7d
[  761.018038]  ___might_sleep.cold+0x15a/0x1b7
[  761.018583]  __kmalloc+0x259/0x430
[  761.019044]  ? flow_action_alloc+0x28/0xd0
[  761.019574]  ? lock_release+0x460/0x750
[  761.020068]  flow_action_alloc+0x28/0xd0
[  761.020586]  tcf_action_offload_cmd_pre+0x25/0x130
[  761.021188]  tcf_action_update_hw_stats+0x9e/0x2e0
[  761.021791]  ? tcf_action_offload_cmd+0xe0/0xe0
[  761.022358]  ? is_bpf_text_address+0x73/0x110
[  761.022918]  ? mlx5e_delete_flower+0xf50/0xf50 [mlx5_core]
[  761.023713]  ? tc_setup_cb_call+0x181/0x270
[  761.024254]  fl_hw_update_stats+0x2e4/0x4e0 [cls_flower]
[  761.024908]  ? fl_hw_replace_filter+0x6a0/0x6a0 [cls_flower]
[  761.025595]  ? lock_release+0x460/0x750
[  761.026095]  ? memcpy+0x39/0x60
[  761.026537]  fl_dump+0x499/0x5f0 [cls_flower]
[  761.027106]  ? fl_tmplt_dump+0x220/0x220 [cls_flower]
[  761.027746]  ? memcpy+0x39/0x60
[  761.028173]  ? nla_put+0x15f/0x1c0
[  761.028634]  tcf_fill_node+0x50f/0x930
[  761.029137]  ? __tcf_get_next_proto+0x470/0x470
[  761.029725]  ? memset+0x20/0x40
[  761.030167]  tfilter_notify+0x15e/0x2d0
[  761.030681]  tc_new_tfilter+0x1128/0x1ea0
[  761.031218]  ? tc_del_tfilter+0x13e0/0x13e0
[  761.031768]  ? security_capable+0x51/0x90
[  761.032280]  ? tc_del_tfilter+0x13e0/0x13e0
[  761.032826]  rtnetlink_rcv_msg+0x637/0x950



> +
> +	fl_action->action.num_entries = num_actions;
> +	/* Pre-fill each action hw_stats with DONT_CARE.
> +	 * Caller can override this if it wants stats for a given action.
> +	 */
> +	for (i = 0; i < num_actions; i++)
> +		fl_action->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
> +
> +	return fl_action;
> +}
> +EXPORT_SYMBOL(flow_action_alloc);
> +
>   #define FLOW_DISSECTOR_MATCH(__rule, __type, __out)				\
>   	const struct flow_match *__m = &(__rule)->match;			\
>   	struct flow_dissector *__d = (__m)->dissector;				\
> @@ -476,6 +497,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
>   
>   	mutex_unlock(&flow_indr_block_lock);
>   
> -	return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
> +	if (bo)
> +		return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
> +	else
> +		return 0;
>   }
>   EXPORT_SYMBOL(flow_indr_dev_setup_offload);
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 998a2374f7ae..185f17ea60d5 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -1060,6 +1060,36 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
>   	return ERR_PTR(err);
>   }
>   
> +/* offload the tc command after inserted */
> +int tcf_action_offload_cmd(struct tc_action *actions[],
> +			   struct netlink_ext_ack *extack)
> +{
> +	struct flow_offload_action *fl_act;
> +	int err = 0;
> +
> +	fl_act = flow_action_alloc(tcf_act_num_actions(actions));
> +	if (!fl_act)
> +		return -ENOMEM;
> +
> +	fl_act->extack = extack;
> +	err = tc_setup_action(&fl_act->action, actions);
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack,
> +				   "Failed to setup tc actions for offload\n");
> +		goto err_out;
> +	}
> +	fl_act->command = FLOW_ACT_REPLACE;
> +
> +	flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, fl_act, NULL, NULL);
> +
> +	tc_cleanup_flow_action(&fl_act->action);
> +
> +err_out:
> +	kfree(fl_act);
> +	return err;
> +}
> +EXPORT_SYMBOL(tcf_action_offload_cmd);
> +
>   /* Returns numbers of initialized actions or negative error. */
>   
>   int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
> @@ -1514,6 +1544,9 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
>   		return ret;
>   	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
>   
> +	/* offload actions to hardware if possible */
> +	tcf_action_offload_cmd(actions, extack);
> +
>   	/* only put existing actions */
>   	for (i = 0; i < TCA_ACT_MAX_PRIO; i++)
>   		if (init_res[i] == ACT_P_CREATED)
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index c8cb59a11098..9b9770dab5e8 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -3544,8 +3544,8 @@ static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
>   	return hw_stats;
>   }
>   
> -int tc_setup_flow_action(struct flow_action *flow_action,
> -			 const struct tcf_exts *exts)
> +int tc_setup_action(struct flow_action *flow_action,
> +		    struct tc_action *actions[])
>   {
>   	struct tc_action *act;
>   	int i, j, k, err = 0;
> @@ -3554,11 +3554,11 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>   	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
>   	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
>   
> -	if (!exts)
> +	if (!actions)
>   		return 0;
>   
>   	j = 0;
> -	tcf_exts_for_each_action(i, act, exts) {
> +	tcf_act_for_each_action(i, act, actions) {
>   		struct flow_action_entry *entry;
>   
>   		entry = &flow_action->entries[j];
> @@ -3725,7 +3725,19 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>   	spin_unlock_bh(&act->tcfa_lock);
>   	goto err_out;
>   }
> +EXPORT_SYMBOL(tc_setup_action);
> +
> +#ifdef CONFIG_NET_CLS_ACT
> +int tc_setup_flow_action(struct flow_action *flow_action,
> +			 const struct tcf_exts *exts)
> +{
> +	if (!exts)
> +		return 0;
> +
> +	return tc_setup_action(flow_action, exts->actions);
> +}
>   EXPORT_SYMBOL(tc_setup_flow_action);
> +#endif
>   
>   unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
>   {
> @@ -3743,6 +3755,22 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
>   }
>   EXPORT_SYMBOL(tcf_exts_num_actions);
>   
> +unsigned int tcf_act_num_actions(struct tc_action *actions[])
> +{
> +	unsigned int num_acts = 0;
> +	struct tc_action *act;
> +	int i;
> +
> +	tcf_act_for_each_action(i, act, actions) {
> +		if (is_tcf_pedit(act))
> +			num_acts += tcf_pedit_nkeys(act);
> +		else
> +			num_acts++;
> +	}
> +	return num_acts;
> +}
> +EXPORT_SYMBOL(tcf_act_num_actions);
> +
>   #ifdef CONFIG_NET_CLS_ACT
>   static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
>   					u32 *p_block_index,
>
Simon Horman July 22, 2021, 1:19 p.m. UTC | #2
On Thu, Jul 22, 2021 at 03:24:07PM +0300, Roi Dayan wrote:
> 
> 
> On 2021-07-22 12:19 PM, Simon Horman wrote:
> > From: Baowen Zheng <baowen.zheng@corigine.com>
> > 
> > Use flow_indr_dev_register/flow_indr_dev_setup_offload to
> > offload tc action.
> > 
> > We offload the tc action mainly for ovs meter configuration.
> > Make some basic changes for different vendors to return EOPNOTSUPP.
> > 
> > We need to call tc_cleanup_flow_action to clean up tc action entry since
> > in tc_setup_action, some actions may hold dev refcnt, especially the mirror
> > action.
> > 
> > As per review from the RFC, the kernel test robot will fail to run, so
> > we add CONFIG_NET_CLS_ACT control for the action offload.
> > 
> > Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
> > Signed-off-by: Louis Peens <louis.peens@corigine.com>
> > Signed-off-by: Simon Horman <simon.horman@corigine.com>
> > ---
> >   drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c  |  2 +-
> >   .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  3 ++
> >   .../ethernet/netronome/nfp/flower/offload.c   |  3 ++
> >   include/linux/netdevice.h                     |  1 +
> >   include/net/flow_offload.h                    | 15 ++++++++
> >   include/net/pkt_cls.h                         | 15 ++++++++
> >   net/core/flow_offload.c                       | 26 +++++++++++++-
> >   net/sched/act_api.c                           | 33 +++++++++++++++++
> >   net/sched/cls_api.c                           | 36 ++++++++++++++++---
> >   9 files changed, 128 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> > index 5e4429b14b8c..edbbf7b4df77 100644
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
> > @@ -1951,7 +1951,7 @@ static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, v
> >   				 void *data,
> >   				 void (*cleanup)(struct flow_block_cb *block_cb))
> >   {
> > -	if (!bnxt_is_netdev_indr_offload(netdev))
> > +	if (!netdev || !bnxt_is_netdev_indr_offload(netdev))
> >   		return -EOPNOTSUPP;
> >   	switch (type) {
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> > index 059799e4f483..111daacc4cc3 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> > @@ -486,6 +486,9 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *
> >   			    void *data,
> >   			    void (*cleanup)(struct flow_block_cb *block_cb))
> >   {
> > +	if (!netdev)
> > +		return -EOPNOTSUPP;
> > +
> >   	switch (type) {
> >   	case TC_SETUP_BLOCK:
> >   		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
> > diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
> > index 2406d33356ad..88bbc86347b4 100644
> > --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
> > +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
> > @@ -1869,6 +1869,9 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *
> >   			    void *data,
> >   			    void (*cleanup)(struct flow_block_cb *block_cb))
> >   {
> > +	if (!netdev)
> > +		return -EOPNOTSUPP;
> > +
> >   	if (!nfp_fl_is_netdev_to_offload(netdev))
> >   		return -EOPNOTSUPP;
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 42f6f866d5f3..b138219baf6f 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -923,6 +923,7 @@ enum tc_setup_type {
> >   	TC_SETUP_QDISC_TBF,
> >   	TC_SETUP_QDISC_FIFO,
> >   	TC_SETUP_QDISC_HTB,
> > +	TC_SETUP_ACT,
> >   };
> >   /* These structures hold the attributes of bpf state that are being passed
> > diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> > index 69c9eabf8325..26644596fd54 100644
> > --- a/include/net/flow_offload.h
> > +++ b/include/net/flow_offload.h
> > @@ -553,6 +553,21 @@ struct flow_cls_offload {
> >   	u32 classid;
> >   };
> > +enum flow_act_command {
> > +	FLOW_ACT_REPLACE,
> > +	FLOW_ACT_DESTROY,
> > +	FLOW_ACT_STATS,
> > +};
> > +
> > +struct flow_offload_action {
> > +	struct netlink_ext_ack *extack;
> > +	enum flow_act_command command;
> > +	struct flow_stats stats;
> > +	struct flow_action action;
> > +};
> > +
> > +struct flow_offload_action *flow_action_alloc(unsigned int num_actions);
> > +
> >   static inline struct flow_rule *
> >   flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
> >   {
> > diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> > index ec7823921bd2..cd4cf6b10f5d 100644
> > --- a/include/net/pkt_cls.h
> > +++ b/include/net/pkt_cls.h
> > @@ -266,6 +266,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
> >   	for (; 0; (void)(i), (void)(a), (void)(exts))
> >   #endif
> > +#define tcf_act_for_each_action(i, a, actions) \
> > +	for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
> > +
> >   static inline void
> >   tcf_exts_stats_update(const struct tcf_exts *exts,
> >   		      u64 bytes, u64 packets, u64 drops, u64 lastuse,
> > @@ -536,8 +539,19 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
> >   	return ifindex == skb->skb_iif;
> >   }
> > +#ifdef CONFIG_NET_CLS_ACT
> >   int tc_setup_flow_action(struct flow_action *flow_action,
> >   			 const struct tcf_exts *exts);
> > +#else
> > +static inline int tc_setup_flow_action(struct flow_action *flow_action,
> > +				       const struct tcf_exts *exts)
> > +{
> > +		return 0;
> > +}
> > +#endif
> > +
> > +int tc_setup_action(struct flow_action *flow_action,
> > +		    struct tc_action *actions[]);
> >   void tc_cleanup_flow_action(struct flow_action *flow_action);
> >   int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
> > @@ -558,6 +572,7 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
> >   			  enum tc_setup_type type, void *type_data,
> >   			  void *cb_priv, u32 *flags, unsigned int *in_hw_count);
> >   unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
> > +unsigned int tcf_act_num_actions(struct tc_action *actions[]);
> >   #ifdef CONFIG_NET_CLS_ACT
> >   int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
> > diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
> > index 715b67f6c62f..0fa2f75cc9b3 100644
> > --- a/net/core/flow_offload.c
> > +++ b/net/core/flow_offload.c
> > @@ -27,6 +27,27 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
> >   }
> >   EXPORT_SYMBOL(flow_rule_alloc);
> > +struct flow_offload_action *flow_action_alloc(unsigned int num_actions)
> > +{
> > +	struct flow_offload_action *fl_action;
> > +	int i;
> > +
> > +	fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions),
> > +			    GFP_KERNEL);
> > +	if (!fl_action)
> > +		return NULL;
> 
> Hi Simon,
> 
> Our automatic tests got a trace from flow_action_alloc()
> introduced in this series.
> I don't have specific commands right now but maybe its easy
> to reproduce with option CONFIG_DEBUG_ATOMIC_SLEEP=y
> 
> fl_dump->fl_hw_update_stats->fl_hw_update_stats->tcf_exts_stats_update
>   ->tcf_action_update_hw_stats->tcf_action_offload_cmd_pre->
>     ->flow_action_alloc
> 
> Thanks,
> Roi

Thanks Roi,

we'll look into this.
Vlad Buslov July 22, 2021, 2:55 p.m. UTC | #3
On Thu 22 Jul 2021 at 12:19, Simon Horman <simon.horman@corigine.com> wrote:
> From: Baowen Zheng <baowen.zheng@corigine.com>
>
> When collecting stats for actions update them using both
> both hardware and software counters.
>
> Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
> Signed-off-by: Louis Peens <louis.peens@corigine.com>
> Signed-off-by: Simon Horman <simon.horman@corigine.com>
> ---
>  include/net/act_api.h      |  1 +
>  include/net/flow_offload.h |  2 +-
>  include/net/pkt_cls.h      |  4 ++++
>  net/sched/act_api.c        | 49 ++++++++++++++++++++++++++++++++++----
>  4 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/act_api.h b/include/net/act_api.h
> index 086b291e9530..fe8331b5efce 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -233,6 +233,7 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
>  void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
>  			     u64 drops, bool hw);
>  int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
> +int tcf_action_update_hw_stats(struct tc_action *action);
>  
>  int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
>  			     struct tcf_chain **handle,
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 26644596fd54..467688fff7ce 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -560,7 +560,7 @@ enum flow_act_command {
>  };
>  
>  struct flow_offload_action {
> -	struct netlink_ext_ack *extack;
> +	struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/
>  	enum flow_act_command command;
>  	struct flow_stats stats;
>  	struct flow_action action;
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index 03dae225d64f..569c9294b15b 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -282,6 +282,10 @@ tcf_exts_stats_update(const struct tcf_exts *exts,
>  	for (i = 0; i < exts->nr_actions; i++) {
>  		struct tc_action *a = exts->actions[i];
>  
> +		/* if stats from hw, just skip */
> +		if (!tcf_action_update_hw_stats(a))
> +			continue;
> +

Is it okay to call this inside preempt disable section?

>  		tcf_action_stats_update(a, bytes, packets, drops,
>  					lastuse, true);
>  		a->used_hw_stats = used_hw_stats;
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 23a4538916af..7d5535bc2c13 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -1089,15 +1089,18 @@ int tcf_action_offload_cmd_pre(struct tc_action *actions[],
>  EXPORT_SYMBOL(tcf_action_offload_cmd_pre);
>  
>  int tcf_action_offload_cmd_post(struct flow_offload_action *fl_act,
> -				struct netlink_ext_ack *extack)
> +				struct netlink_ext_ack *extack,
> +				bool keep_fl_act)
>  {
>  	if (IS_ERR(fl_act))
>  		return PTR_ERR(fl_act);
>  
>  	flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, fl_act, NULL, NULL);
>  
> -	tc_cleanup_flow_action(&fl_act->action);
> -	kfree(fl_act);
> +	if (!keep_fl_act) {
> +		tc_cleanup_flow_action(&fl_act->action);
> +		kfree(fl_act);
> +	}
>  	return 0;
>  }
>  
> @@ -1115,10 +1118,45 @@ int tcf_action_offload_cmd(struct tc_action *actions[],
>  	if (err)
>  		return err;
>  
> -	return tcf_action_offload_cmd_post(fl_act, extack);
> +	return tcf_action_offload_cmd_post(fl_act, extack, false);
>  }
>  EXPORT_SYMBOL(tcf_action_offload_cmd);
>  
> +int tcf_action_update_hw_stats(struct tc_action *action)
> +{
> +	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
> +		[0] = action,
> +	};
> +	struct flow_offload_action *fl_act;
> +	int err = 0;
> +

Having some way to distinguish offloaded actions would also be useful
here to skip this function. I wonder how this affects dump rate when
executed for every single action, even when none of them were offloaded
through action API.

> +	err = tcf_action_offload_cmd_pre(actions,
> +					 FLOW_ACT_STATS,
> +					 NULL,
> +					 &fl_act);
> +	if (err)
> +		goto err_out;
> +
> +	err = tcf_action_offload_cmd_post(fl_act, NULL, true);
> +
> +	if (fl_act->stats.lastused) {
> +		tcf_action_stats_update(action, fl_act->stats.bytes,
> +					fl_act->stats.pkts,
> +					fl_act->stats.drops,
> +					fl_act->stats.lastused,
> +					true);
> +		err = 0;
> +	} else {
> +		err = -EOPNOTSUPP;
> +	}
> +	tc_cleanup_flow_action(&fl_act->action);
> +	kfree(fl_act);
> +
> +err_out:
> +	return err;
> +}
> +EXPORT_SYMBOL(tcf_action_update_hw_stats);
> +
>  /* offload the tc command after deleted */
>  int tcf_action_offload_del_post(struct flow_offload_action *fl_act,
>  				struct tc_action *actions[],
> @@ -1255,6 +1293,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
>  	if (p == NULL)
>  		goto errout;
>  
> +	/* update hw stats for this action */
> +	tcf_action_update_hw_stats(p);
> +
>  	/* compat_mode being true specifies a call that is supposed
>  	 * to add additional backward compatibility statistic TLVs.
>  	 */