Message ID | 893e22e2ad6413a98ca76134b332c8962fcd3b6a.1612815058.git.petrm@nvidia.com |
---|---|
State | Superseded |
Headers | show |
Series | nexthop: Resilient next-hop groups | expand |
On 2/8/21 1:42 PM, Petr Machata wrote: > @@ -52,8 +53,50 @@ enum { > NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ > /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ > > + /* nested; resilient nexthop group attributes */ > + NHA_RES_GROUP, > + /* nested; nexthop bucket attributes */ > + NHA_RES_BUCKET, > + > __NHA_MAX, > }; > > #define NHA_MAX (__NHA_MAX - 1) > + > +enum { > + NHA_RES_GROUP_UNSPEC, > + /* Pad attribute for 64-bit alignment. */ > + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, > + > + /* u32; number of nexthop buckets in a resilient nexthop group */ > + NHA_RES_GROUP_BUCKETS, u32 is overkill; arguably u16 (64k) should be more than enough buckets for any real use case.
On Sat, Feb 13, 2021 at 12:16:45PM -0700, David Ahern wrote: > On 2/8/21 1:42 PM, Petr Machata wrote: > > @@ -52,8 +53,50 @@ enum { > > NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ > > /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ > > > > + /* nested; resilient nexthop group attributes */ > > + NHA_RES_GROUP, > > + /* nested; nexthop bucket attributes */ > > + NHA_RES_BUCKET, > > + > > __NHA_MAX, > > }; > > > > #define NHA_MAX (__NHA_MAX - 1) > > + > > +enum { > > + NHA_RES_GROUP_UNSPEC, > > + /* Pad attribute for 64-bit alignment. */ > > + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, > > + > > + /* u32; number of nexthop buckets in a resilient nexthop group */ > > + NHA_RES_GROUP_BUCKETS, > > u32 is overkill; arguably u16 (64k) should be more than enough buckets > for any real use case. We wanted to make it future-proof, but I think we can live with 64k. At least in Spectrum the maximum is 4k. I don't know about other devices, but I guess it is not more than 64k.
Ido Schimmel <idosch@idosch.org> writes: > On Sat, Feb 13, 2021 at 12:16:45PM -0700, David Ahern wrote: >> On 2/8/21 1:42 PM, Petr Machata wrote: >> > @@ -52,8 +53,50 @@ enum { >> > NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ >> > /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ >> > >> > + /* nested; resilient nexthop group attributes */ >> > + NHA_RES_GROUP, >> > + /* nested; nexthop bucket attributes */ >> > + NHA_RES_BUCKET, >> > + >> > __NHA_MAX, >> > }; >> > >> > #define NHA_MAX (__NHA_MAX - 1) >> > + >> > +enum { >> > + NHA_RES_GROUP_UNSPEC, >> > + /* Pad attribute for 64-bit alignment. */ >> > + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, >> > + >> > + /* u32; number of nexthop buckets in a resilient nexthop group */ >> > + NHA_RES_GROUP_BUCKETS, >> >> u32 is overkill; arguably u16 (64k) should be more than enough buckets >> for any real use case. > > We wanted to make it future-proof, but I think we can live with 64k. At > least in Spectrum the maximum is 4k. I don't know about other devices, > but I guess it is not more than 64k. OK, no problem. I was thinking of keeping the API as u32, and tracking as u16 internally, but let's not add baggage at this stage already. Push comes to shove there can be another u32 attribute mutexed with this one.
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 2d4a1e784cf0..624460bc2d93 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -22,6 +22,7 @@ struct nexthop_grp { enum { NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ + NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */ __NEXTHOP_GRP_TYPE_MAX, }; @@ -52,8 +53,50 @@ enum { NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + /* nested; resilient nexthop group attributes */ + NHA_RES_GROUP, + /* nested; nexthop bucket attributes */ + NHA_RES_BUCKET, + __NHA_MAX, }; #define NHA_MAX (__NHA_MAX - 1) + +enum { + NHA_RES_GROUP_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, + + /* u32; number of nexthop buckets in a resilient nexthop group */ + NHA_RES_GROUP_BUCKETS, + /* clock_t as u32; nexthop bucket idle timer (per-group) */ + NHA_RES_GROUP_IDLE_TIMER, + /* clock_t as u32; nexthop unbalanced timer */ + NHA_RES_GROUP_UNBALANCED_TIMER, + /* clock_t as u64; nexthop unbalanced time */ + NHA_RES_GROUP_UNBALANCED_TIME, + + __NHA_RES_GROUP_MAX, +}; + +#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1) + +enum { + NHA_RES_BUCKET_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC, + + /* u32; nexthop bucket index */ + NHA_RES_BUCKET_INDEX, + /* clock_t as u64; nexthop bucket idle time */ + NHA_RES_BUCKET_IDLE_TIME, + /* u32; nexthop id assigned to the nexthop bucket */ + NHA_RES_BUCKET_NH_ID, + + __NHA_RES_BUCKET_MAX, +}; + +#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) + #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 91e4ca064d61..d35953bc7d53 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -178,6 +178,13 @@ enum { RTM_GETVLAN, #define RTM_GETVLAN RTM_GETVLAN + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7b687bca0b87..5d560d381070 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1486,6 +1486,8 @@ static struct nexthop *nexthop_create_group(struct net *net, if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) nhg->mpath = 1; + else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) + goto out_no_nh; WARN_ON_ONCE(nhg->mpath != 1); diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index b69231918686..d59276f48d4f 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -88,6 +88,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] = { RTM_NEWVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETVLAN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_NEWNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_GETNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = @@ -171,7 +174,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ - BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3)); + BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break;