@@ -179,6 +179,39 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
return true;
}
+static inline int red_get_flags(unsigned char qopt_flags,
+ unsigned char historic_mask,
+ struct nlattr *flags_attr,
+ unsigned char supported_mask,
+ struct nla_bitfield32 *p_flags,
+ unsigned char *p_userbits,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 flags;
+
+ if (qopt_flags && flags_attr) {
+ NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
+ return -EINVAL;
+ }
+
+ if (flags_attr) {
+ flags = nla_get_bitfield32(flags_attr);
+ } else {
+ flags.selector = historic_mask;
+ flags.value = qopt_flags & historic_mask;
+ }
+
+ *p_flags = flags;
+ *p_userbits = qopt_flags & ~historic_mask;
+ return 0;
+}
+
+static inline int red_validate_flags(unsigned char flags,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
static inline void red_set_parms(struct red_parms *p,
u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
u8 Scell_log, u8 *stab, u32 max_P)
@@ -256,6 +256,7 @@ enum {
TCA_RED_PARMS,
TCA_RED_STAB,
TCA_RED_MAX_P,
+ TCA_RED_FLAGS, /* bitfield32 */
__TCA_RED_MAX,
};
@@ -268,12 +269,27 @@ struct tc_red_qopt {
unsigned char Wlog; /* log(W) */
unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
unsigned char Scell_log; /* cell size for idle damping */
+
+ /* This field can be used for flags that a RED-like qdisc has
+ * historically supported. E.g. when configuring RED, it can be used for
+ * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN,
+ * HARDDROP. Etc. Because this field has not been validated, and is
+ * copied back on dump, any bits besides those to which a given qdisc
+ * has assigned a historical meaning need to be considered for free use
+ * by userspace tools.
+ *
+ * Any further flags need to be passed differently, e.g. through an
+ * attribute (such as TCA_RED_FLAGS above). Such attribute should allow
+ * passing both recent and historic flags in one value.
+ */
unsigned char flags;
#define TC_RED_ECN 1
#define TC_RED_HARDDROP 2
#define TC_RED_ADAPTATIVE 4
};
+#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
+
struct tc_red_xstats {
__u32 early; /* Early drops */
__u32 pdrop; /* Drops due to queue limits */
@@ -35,7 +35,11 @@
struct red_sched_data {
u32 limit; /* HARD maximal queue length */
+
unsigned char flags;
+ /* Non-flags in tc_red_qopt.flags. */
+ unsigned char userbits;
+
struct timer_list adapt_timer;
struct Qdisc *sch;
struct red_parms parms;
@@ -44,6 +48,8 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
+static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS;
+
static inline int red_use_ecn(struct red_sched_data *q)
{
return q->flags & TC_RED_ECN;
@@ -183,9 +189,12 @@ static void red_destroy(struct Qdisc *sch)
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
+ [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
+ [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
+ .validation_data = &red_supported_flags },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -194,7 +203,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *old_child = NULL, *child = NULL;
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
+ struct nla_bitfield32 flags_bf;
struct tc_red_qopt *ctl;
+ unsigned char userbits;
+ unsigned char flags;
int err;
u32 max_P;
@@ -216,6 +228,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
return -EINVAL;
+ err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
+ tb[TCA_RED_FLAGS], red_supported_flags,
+ &flags_bf, &userbits, extack);
+ if (err)
+ return err;
+
if (ctl->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
extack);
@@ -227,7 +245,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_lock(sch);
- q->flags = ctl->flags;
+
+ flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
+ err = red_validate_flags(flags, extack);
+ if (err)
+ goto unlock_out;
+
+ q->flags = flags;
+ q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
qdisc_tree_flush_backlog(q->qdisc);
@@ -256,6 +281,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (old_child)
qdisc_put(old_child);
return 0;
+
+unlock_out:
+ sch_tree_unlock(sch);
+ if (child)
+ qdisc_put(child);
+ return err;
}
static inline void red_adaptative_timer(struct timer_list *t)
@@ -299,10 +330,15 @@ static int red_dump_offload_stats(struct Qdisc *sch)
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct nla_bitfield32 flags_bf = {
+ .selector = red_supported_flags,
+ .value = q->flags,
+ };
struct nlattr *opts = NULL;
struct tc_red_qopt opt = {
.limit = q->limit,
- .flags = q->flags,
+ .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
+ q->userbits,
.qth_min = q->parms.qth_min >> q->parms.Wlog,
.qth_max = q->parms.qth_max >> q->parms.Wlog,
.Wlog = q->parms.Wlog,
@@ -319,7 +355,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
- nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
+ nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
+ nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf))
goto nla_put_failure;
return nla_nest_end(skb, opts);
The qdiscs RED, GRED, SFQ and CHOKE use different subsets of the same pool of global RED flags. These are passed in tc_red_qopt.flags. However none of these qdiscs validate the flag field, and just copy it over wholesale to internal structures, and later dump it back. (An exception is GRED, which does validate for VQs -- however not for the main setup.) A broken userspace can therefore configure a qdisc with arbitrary unsupported flags, and later expect to see the flags on qdisc dump. The current ABI therefore allows storage of several bits of custom data to qdisc instances of the types mentioned above. How many bits, depends on which flags are meaningful for the qdisc in question. E.g. SFQ recognizes flags ECN and HARDDROP, and the rest is not interpreted. If SFQ ever needs to support ADAPTATIVE, it needs another way of doing it, and at the same time it needs to retain the possibility to store 6 bits of uninterpreted data. Likewise RED, which adds a new flag later in this patchset. To that end, this patch adds a new function, red_get_flags(), to split the passed flags of RED-like qdiscs to flags and user bits, and red_validate_flags() to validate the resulting configuration. It further adds a new attribute, TCA_RED_FLAGS, to pass arbitrary flags. Signed-off-by: Petr Machata <petrm@mellanox.com> --- Notes: v4: - Declare .strict_start_type at element with index zero, not at the first array element declaration. v3: - Change TCA_RED_FLAGS from NLA_U32 to NLA_BITFIELD32. Change RED_SUPPORTED_FLAGS the macro to red_supported_flags the constant and use as .validation_data. - Set policy's .strict_start_type to TCA_RED_FLAGS - red_get_flags(): Don't modify the passed-in flags until the end of the function. Return errno instead of bool. - Keep red_sched_data.flags as unsigned char. - Because bitfield32 allows only a subset of flags to be set, move the validation of the resulting configuration in red_change() into the critical section. Add a function red_validate_flags() specifically for the validation. - Remove braces when setting tc_red_qopt.flags in red_dump(). - Check nla_put()'s return code when dumping TCA_RED_FLAGS. - Always dump TCA_RED_FLAGS, even if only old flags are active. The BITFIELD32 interface is richer and this way we can communicate to the client which flags are actually supported. v2: - This patch is new. include/net/red.h | 33 ++++++++++++++++++++++++++ include/uapi/linux/pkt_sched.h | 16 +++++++++++++ net/sched/sch_red.c | 43 +++++++++++++++++++++++++++++++--- 3 files changed, 89 insertions(+), 3 deletions(-)