Message ID | 86b8a508d7e782b003d60acb06536681f0d4c721.1631660727.git.leonro@nvidia.com |
---|---|
State | New |
Headers | show |
Series | Optional counter statistics support | expand |
On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: > +static int stat_get_doit_default_counter(struct sk_buff *skb, > + struct nlmsghdr *nlh, > + struct netlink_ext_ack *extack, > + struct nlattr *tb[]) > +{ > + struct rdma_hw_stats *stats; > + struct ib_device *device; > + u32 index, port; > + int ret; > + > + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) > + return -EINVAL; > + > + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); > + device = ib_device_get_by_index(sock_net(skb->sk), index); > + if (!device) > + return -EINVAL; > + > + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); > + if (!rdma_is_port_valid(device, port)) { > + ret = -EINVAL; > + goto end; > + } > + > + stats = ib_get_hw_stats_port(device, port); > + if (!stats) { > + ret = -EINVAL; > + goto end; > + } > + > + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) > + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, > + device, port, stats); > + else > + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, > + port, stats); This seems strange, why is the output of a get contingent on a ignored input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be emitted? Jason
On 9/28/2021 1:30 AM, Jason Gunthorpe wrote: > On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: >> +static int stat_get_doit_default_counter(struct sk_buff *skb, >> + struct nlmsghdr *nlh, >> + struct netlink_ext_ack *extack, >> + struct nlattr *tb[]) >> +{ >> + struct rdma_hw_stats *stats; >> + struct ib_device *device; >> + u32 index, port; >> + int ret; >> + >> + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) >> + return -EINVAL; >> + >> + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); >> + device = ib_device_get_by_index(sock_net(skb->sk), index); >> + if (!device) >> + return -EINVAL; >> + >> + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); >> + if (!rdma_is_port_valid(device, port)) { >> + ret = -EINVAL; >> + goto end; >> + } >> + >> + stats = ib_get_hw_stats_port(device, port); >> + if (!stats) { >> + ret = -EINVAL; >> + goto end; >> + } >> + >> + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) >> + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, >> + device, port, stats); >> + else >> + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, >> + port, stats); > > This seems strange, why is the output of a get contingent on a ignored > input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be > emitted? The CMD_STAT_GET is originally used to get the default hwcounter statistic (the value of all hwstats), now we also want to use this command to get a list of counters (just name and status), so kernel differentiates these 2 cases based on HWCOUNTER_DYNAMIC attr.
On Tue, Sep 28, 2021 at 05:12:39PM +0800, Mark Zhang wrote: > On 9/28/2021 1:30 AM, Jason Gunthorpe wrote: > > On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: > > > +static int stat_get_doit_default_counter(struct sk_buff *skb, > > > + struct nlmsghdr *nlh, > > > + struct netlink_ext_ack *extack, > > > + struct nlattr *tb[]) > > > +{ > > > + struct rdma_hw_stats *stats; > > > + struct ib_device *device; > > > + u32 index, port; > > > + int ret; > > > + > > > + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) > > > + return -EINVAL; > > > + > > > + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); > > > + device = ib_device_get_by_index(sock_net(skb->sk), index); > > > + if (!device) > > > + return -EINVAL; > > > + > > > + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); > > > + if (!rdma_is_port_valid(device, port)) { > > > + ret = -EINVAL; > > > + goto end; > > > + } > > > + > > > + stats = ib_get_hw_stats_port(device, port); > > > + if (!stats) { > > > + ret = -EINVAL; > > > + goto end; > > > + } > > > + > > > + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) > > > + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, > > > + device, port, stats); > > > + else > > > + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, > > > + port, stats); > > > > This seems strange, why is the output of a get contingent on a ignored > > input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be > > emitted? > > The CMD_STAT_GET is originally used to get the default hwcounter statistic > (the value of all hwstats), now we also want to use this command to get a > list of counters (just name and status), so kernel differentiates these 2 > cases based on HWCOUNTER_DYNAMIC attr. Don't do that, it is not how netlink works. Either the whole attribute should be returned or you need a new get command Jason
On 9/28/2021 7:52 PM, Jason Gunthorpe wrote: > On Tue, Sep 28, 2021 at 05:12:39PM +0800, Mark Zhang wrote: >> On 9/28/2021 1:30 AM, Jason Gunthorpe wrote: >>> On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: >>>> +static int stat_get_doit_default_counter(struct sk_buff *skb, >>>> + struct nlmsghdr *nlh, >>>> + struct netlink_ext_ack *extack, >>>> + struct nlattr *tb[]) >>>> +{ >>>> + struct rdma_hw_stats *stats; >>>> + struct ib_device *device; >>>> + u32 index, port; >>>> + int ret; >>>> + >>>> + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) >>>> + return -EINVAL; >>>> + >>>> + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); >>>> + device = ib_device_get_by_index(sock_net(skb->sk), index); >>>> + if (!device) >>>> + return -EINVAL; >>>> + >>>> + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); >>>> + if (!rdma_is_port_valid(device, port)) { >>>> + ret = -EINVAL; >>>> + goto end; >>>> + } >>>> + >>>> + stats = ib_get_hw_stats_port(device, port); >>>> + if (!stats) { >>>> + ret = -EINVAL; >>>> + goto end; >>>> + } >>>> + >>>> + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) >>>> + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, >>>> + device, port, stats); >>>> + else >>>> + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, >>>> + port, stats); >>> >>> This seems strange, why is the output of a get contingent on a ignored >>> input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be >>> emitted? >> >> The CMD_STAT_GET is originally used to get the default hwcounter statistic >> (the value of all hwstats), now we also want to use this command to get a >> list of counters (just name and status), so kernel differentiates these 2 >> cases based on HWCOUNTER_DYNAMIC attr. > > Don't do that, it is not how netlink works. Either the whole attribute > should be returned or you need a new get command Will add a new get command for backward compatibility, thanks.
On Tue, Sep 28, 2021 at 08:52:17AM -0300, Jason Gunthorpe wrote: > On Tue, Sep 28, 2021 at 05:12:39PM +0800, Mark Zhang wrote: > > On 9/28/2021 1:30 AM, Jason Gunthorpe wrote: > > > On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: > > > > +static int stat_get_doit_default_counter(struct sk_buff *skb, > > > > + struct nlmsghdr *nlh, > > > > + struct netlink_ext_ack *extack, > > > > + struct nlattr *tb[]) > > > > +{ > > > > + struct rdma_hw_stats *stats; > > > > + struct ib_device *device; > > > > + u32 index, port; > > > > + int ret; > > > > + > > > > + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) > > > > + return -EINVAL; > > > > + > > > > + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); > > > > + device = ib_device_get_by_index(sock_net(skb->sk), index); > > > > + if (!device) > > > > + return -EINVAL; > > > > + > > > > + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); > > > > + if (!rdma_is_port_valid(device, port)) { > > > > + ret = -EINVAL; > > > > + goto end; > > > > + } > > > > + > > > > + stats = ib_get_hw_stats_port(device, port); > > > > + if (!stats) { > > > > + ret = -EINVAL; > > > > + goto end; > > > > + } > > > > + > > > > + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) > > > > + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, > > > > + device, port, stats); > > > > + else > > > > + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, > > > > + port, stats); > > > > > > This seems strange, why is the output of a get contingent on a ignored > > > input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be > > > emitted? > > > > The CMD_STAT_GET is originally used to get the default hwcounter statistic > > (the value of all hwstats), now we also want to use this command to get a > > list of counters (just name and status), so kernel differentiates these 2 > > cases based on HWCOUNTER_DYNAMIC attr. > > Don't do that, it is not how netlink works. Either the whole attribute > should be returned or you need a new get command The netlink way is to be independent on returned parameter, if it not supported, this parameter won't be available at all. This makes HWCOUNTER_DYNAMIC to work exactly as netlink would do. Thanks > > Jason
On Wed, Sep 29, 2021 at 03:26:25PM +0300, Leon Romanovsky wrote: > On Tue, Sep 28, 2021 at 08:52:17AM -0300, Jason Gunthorpe wrote: > > On Tue, Sep 28, 2021 at 05:12:39PM +0800, Mark Zhang wrote: > > > On 9/28/2021 1:30 AM, Jason Gunthorpe wrote: > > > > On Wed, Sep 15, 2021 at 02:07:25AM +0300, Leon Romanovsky wrote: > > > > > +static int stat_get_doit_default_counter(struct sk_buff *skb, > > > > > + struct nlmsghdr *nlh, > > > > > + struct netlink_ext_ack *extack, > > > > > + struct nlattr *tb[]) > > > > > +{ > > > > > + struct rdma_hw_stats *stats; > > > > > + struct ib_device *device; > > > > > + u32 index, port; > > > > > + int ret; > > > > > + > > > > > + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) > > > > > + return -EINVAL; > > > > > + > > > > > + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); > > > > > + device = ib_device_get_by_index(sock_net(skb->sk), index); > > > > > + if (!device) > > > > > + return -EINVAL; > > > > > + > > > > > + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); > > > > > + if (!rdma_is_port_valid(device, port)) { > > > > > + ret = -EINVAL; > > > > > + goto end; > > > > > + } > > > > > + > > > > > + stats = ib_get_hw_stats_port(device, port); > > > > > + if (!stats) { > > > > > + ret = -EINVAL; > > > > > + goto end; > > > > > + } > > > > > + > > > > > + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) > > > > > + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, > > > > > + device, port, stats); > > > > > + else > > > > > + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, > > > > > + port, stats); > > > > > > > > This seems strange, why is the output of a get contingent on a ignored > > > > input attribute? Shouldn't the HWCOUNTER_DYNAMIC just always be > > > > emitted? > > > > > > The CMD_STAT_GET is originally used to get the default hwcounter statistic > > > (the value of all hwstats), now we also want to use this command to get a > > > list of counters (just name and status), so kernel differentiates these 2 > > > cases based on HWCOUNTER_DYNAMIC attr. > > > > Don't do that, it is not how netlink works. Either the whole attribute > > should be returned or you need a new get command > > The netlink way is to be independent on returned parameter, if it not > supported, this parameter won't be available at all. This makes HWCOUNTER_DYNAMIC > to work exactly as netlink would do. The issue is making the output dependent on the input: + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) Setting HWCOUNTER_DYNAMIC as an input flag to get the GET to return a completely different output format is not netlinky Either always return HWCOUNTER_DYNAMIC or make another query to get it Jason
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 67519730b1ac..d9443983efdc 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -154,6 +154,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -2046,49 +2048,90 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } -static int stat_get_doit_default_counter(struct sk_buff *skb, - struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, - struct nlattr *tb[]) +static int stat_get_doit_stats_list(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port, + struct rdma_hw_stats *stats) { - struct rdma_hw_stats *stats; - struct nlattr *table_attr; - struct ib_device *device; - int ret, num_cnts, i; + struct nlattr *table, *entry; struct sk_buff *msg; - u32 index, port; - u64 v; + int i; - if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) - return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; - index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(sock_net(skb->sk), index); - if (!device) - return -EINVAL; + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); - if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { - ret = -EINVAL; - goto err; - } + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + goto err_msg; - port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - stats = ib_get_hw_stats_port(device, port); - if (!stats) { - ret = -EINVAL; - goto err; + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table) + goto err_msg; + + mutex_lock(&stats->lock); + for (i = 0; i < stats->num_counters; i++) { + entry = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry) + goto err_msg_table; + + if (nla_put_string(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + stats->descs[i].name) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) + goto err_msg_entry; + + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, + !test_bit(i, stats->is_disabled)))) + goto err_msg_entry; + + nla_nest_end(msg, entry); } + mutex_unlock(&stats->lock); + + nla_nest_end(msg, table); + nlmsg_end(msg, nlh); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); + +err_msg_entry: + nla_nest_cancel(msg, entry); +err_msg_table: + mutex_unlock(&stats->lock); + nla_nest_cancel(msg, table); +err_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +static int stat_get_doit_stats_values(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port, + struct rdma_hw_stats *stats) +{ + struct nlattr *table_attr; + int ret, num_cnts, i; + struct sk_buff *msg; + u64 v; + + if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) + return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto err; - } + if (!msg) + return -ENOMEM; - nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, - RDMA_NLDEV_CMD_STAT_GET), - 0, 0); + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { @@ -2098,7 +2141,8 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, mutex_lock(&stats->lock); - num_cnts = device->ops.get_hw_stats(device, stats, port, 0); + num_cnts = device->ops.get_hw_stats(device, stats, port, + stats->num_counters); if (num_cnts < 0) { ret = -EINVAL; goto err_stats; @@ -2125,7 +2169,6 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, mutex_unlock(&stats->lock); nlmsg_end(msg, nlh); - ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_table: @@ -2134,7 +2177,46 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, mutex_unlock(&stats->lock); err_msg: nlmsg_free(msg); -err: + return ret; +} + +static int stat_get_doit_default_counter(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr *tb[]) +{ + struct rdma_hw_stats *stats; + struct ib_device *device; + u32 index, port; + int ret; + + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto end; + } + + stats = ib_get_hw_stats_port(device, port); + if (!stats) { + ret = -EINVAL; + goto end; + } + + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC]) + ret = stat_get_doit_stats_list(skb, nlh, extack, tb, + device, port, stats); + else + ret = stat_get_doit_stats_values(skb, nlh, extack, tb, device, + port, stats); +end: ib_device_put(device); return ret; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 75a1ae2311d8..2017970279ed 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -549,6 +549,9 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, /* u32 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, /* u8 */ + /* * Always the end */