diff mbox series

[v4,net-next,16/16] net: dsa: tag_ocelot_8021q: add support for PTP timestamping

Message ID 20210119230749.1178874-17-olteanv@gmail.com
State New
Headers show
Series tag_8021q for Ocelot switches | expand

Commit Message

Vladimir Oltean Jan. 19, 2021, 11:07 p.m. UTC
From: Vladimir Oltean <vladimir.oltean@nxp.com>

On TX, use the result of the ptp_classify_raw() BPF classifier from
dsa_skb_tx_timestamp() to divert some frames over to the MMIO-based
injection registers.

On RX, set up a VCAP IS2 rule that redirects the frames with an
EtherType for 1588 to the CPU port module (for MMIO based extraction)
and, if the "no XTR IRQ" workaround is in place, copies them to the
dsa_8021q CPU port as well (for notification).

There is a conflict between the VCAP IS2 trapping rule and the semantics
of the BPF classifier. Namely, ptp_classify_raw() deems general messages
as non-timestampable, but still, those are trapped to the CPU port
module since they have an EtherType of ETH_P_1588. So, if the "no XTR
IRQ" workaround is in place, we need to run another BPF classifier on
the frames extracted over MMIO, to avoid duplicates being sent to the
stack (once over Ethernet, once over MMIO). It doesn't look like it's
possible to install VCAP IS2 rules based on keys extracted from the 1588
frame headers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
Changes in v4:
Now draining the CPU queues by continuously reading QS_XTR_READ, same as
Ocelot, instead of one-time asserting QS_XTR_FLUSH, which actually
needed a sleep to be effective.

Changes in v3:
None.

Changes in v2:
Patch is new.

 drivers/net/dsa/ocelot/felix.c             | 81 ++++++++++++++++++++++
 drivers/net/ethernet/mscc/ocelot.c         |  7 ++
 drivers/net/ethernet/mscc/ocelot_vsc7514.c |  3 +-
 include/soc/mscc/ocelot.h                  |  1 +
 net/dsa/tag_ocelot_8021q.c                 | 24 +++++++
 5 files changed, 114 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index cb05d795ef05..72ab34194897 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -16,6 +16,7 @@ 
 #include <linux/dsa/8021q.h>
 #include <linux/platform_device.h>
 #include <linux/packing.h>
+#include <linux/ptp_classify.h>
 #include <linux/module.h>
 #include <linux/of_net.h>
 #include <linux/pci.h>
@@ -312,6 +313,15 @@  static int felix_setup_mmio_filtering(struct felix *felix)
 		return ret;
 	}
 
+	/* The ownership of the CPU port module's queues might have just been
+	 * transferred to the tag_8021q tagger from the NPI-based tagger.
+	 * So there might still be all sorts of crap in the queues. On the
+	 * other hand, the MMIO-based matching of PTP frames is very brittle,
+	 * so we need to be careful that there are no extra frames to be
+	 * dequeued over MMIO, since we would never know to discard them.
+	 */
+	ocelot_drain_cpu_queue(ocelot, 0);
+
 	return 0;
 }
 
@@ -1159,6 +1169,67 @@  static int felix_hwtstamp_set(struct dsa_switch *ds, int port,
 	return ocelot_hwstamp_set(ocelot, port, ifr);
 }
 
+static bool felix_check_xtr_pkt(struct ocelot *ocelot, unsigned int ptp_type)
+{
+	struct felix *felix = ocelot_to_felix(ocelot);
+	int err, grp = 0;
+
+	if (felix->tag_proto == DSA_TAG_PROTO_OCELOT)
+		return false;
+
+	if (!felix->info->quirk_no_xtr_irq)
+		return false;
+
+	if (ptp_type == PTP_CLASS_NONE)
+		return false;
+
+	while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) {
+		struct ocelot_frame_info info = {};
+		struct dsa_port *dp;
+		struct sk_buff *skb;
+		unsigned int type;
+
+		err = ocelot_xtr_poll_xfh(ocelot, grp, &info);
+		if (err)
+			break;
+
+		if (WARN_ON(info.port >= ocelot->num_phys_ports))
+			goto out;
+
+		dp = dsa_to_port(felix->ds, info.port);
+
+		err = ocelot_xtr_poll_frame(ocelot, grp, dp->slave,
+					    &info, &skb);
+		if (err)
+			break;
+
+		/* We trap to the CPU port module all PTP frames, but
+		 * felix_rxtstamp() only gets called for event frames.
+		 * So we need to avoid sending duplicate general
+		 * message frames by running a second BPF classifier
+		 * here and dropping those.
+		 */
+		__skb_push(skb, ETH_HLEN);
+
+		type = ptp_classify_raw(skb);
+
+		__skb_pull(skb, ETH_HLEN);
+
+		if (type == PTP_CLASS_NONE) {
+			kfree_skb(skb);
+			continue;
+		}
+
+		netif_rx(skb);
+	}
+
+out:
+	if (err < 0)
+		ocelot_drain_cpu_queue(ocelot, 0);
+
+	return true;
+}
+
 static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb, unsigned int type)
 {
@@ -1169,6 +1240,16 @@  static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 	struct timespec64 ts;
 	u64 tstamp, val;
 
+	/* If the "no XTR IRQ" workaround is in use, tell DSA to defer this skb
+	 * for RX timestamping. Then free it, and poll for its copy through
+	 * MMIO in the CPU port module, and inject that into the stack from
+	 * ocelot_xtr_poll().
+	 */
+	if (felix_check_xtr_pkt(ocelot, type)) {
+		kfree_skb(skb);
+		return true;
+	}
+
 	ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
 	tstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
 
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 25f51b82e1b8..f41fa0a8e883 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -806,6 +806,13 @@  void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 }
 EXPORT_SYMBOL(ocelot_port_inject_frame);
 
+void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
+{
+	while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp))
+		ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+}
+EXPORT_SYMBOL(ocelot_drain_cpu_queue);
+
 int ocelot_fdb_add(struct ocelot *ocelot, int port,
 		   const unsigned char *addr, u16 vid)
 {
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 317ee2395b89..1e56d12deb0e 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -566,8 +566,7 @@  static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 
 out:
 	if (err < 0)
-		while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp))
-			ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+		ocelot_drain_cpu_queue(ocelot, 0);
 
 	return IRQ_HANDLED;
 }
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 3036ffc6b79d..2a115e7b4610 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -850,5 +850,6 @@  int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp,
 			  struct net_device *dev,
 			  struct ocelot_frame_info *info,
 			  struct sk_buff **skb);
+void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 
 #endif
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 7bd45b66e9a4..e6f2e7931efa 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -9,6 +9,8 @@ 
  *   that on egress
  */
 #include <linux/dsa/8021q.h>
+#include <soc/mscc/ocelot.h>
+#include <soc/mscc/ocelot_ptp.h>
 #include "dsa_priv.h"
 
 static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
@@ -18,6 +20,28 @@  static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+
+	/* TX timestamping was requested, so inject through MMIO */
+	if (clone) {
+		struct ocelot *ocelot = dp->ds->priv;
+		struct ocelot_port *ocelot_port;
+		int port = dp->index;
+		u32 rew_op;
+
+		ocelot_port = ocelot->ports[port];
+		rew_op = ocelot_port->ptp_cmd;
+
+		/* Retrieve timestamp ID populated inside skb->cb[0] of the
+		 * clone by ocelot_port_add_txtstamp_skb
+		 */
+		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+			rew_op |= clone->cb[0] << 3;
+
+		ocelot_port_inject_frame(ocelot, dp->index, 0, rew_op, skb);
+
+		return NULL;
+	}
 
 	return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
 			      ((pcp << VLAN_PRIO_SHIFT) | tx_vid));