diff mbox series

[v2,06/15] bus/fslmc: support memory backed portals with QBMAN 5.0

Message ID 20180926180440.31726-7-shreyansh.jain@nxp.com
State Superseded
Headers show
Series [v2,01/15] net/dpaa2: fix IOVA conversion for congestion memory | expand

Commit Message

Shreyansh Jain Sept. 26, 2018, 6:04 p.m. UTC
From: Nipun Gupta <nipun.gupta@nxp.com>


This new mode is available in LX2160 platform. The code
dynamically detect the underlying qbman version and choose
the mode at runtime.

Signed-off-by: Youri Querry <youri.querry_1@nxp.com>

Signed-off-by: Roy Pledge <roy.pledge@nxp.com>

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>

---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c      | 180 ++---
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h       |   4 +
 drivers/bus/fslmc/qbman/include/compat.h      |   3 +-
 .../fslmc/qbman/include/fsl_qbman_portal.h    |  33 +-
 drivers/bus/fslmc/qbman/qbman_portal.c        | 764 +++++++++++++++---
 drivers/bus/fslmc/qbman/qbman_portal.h        |  30 +-
 drivers/bus/fslmc/qbman/qbman_sys.h           | 100 ++-
 drivers/bus/fslmc/qbman/qbman_sys_decl.h      |   4 +
 8 files changed, 868 insertions(+), 250 deletions(-)

-- 
2.17.1
diff mbox series

Patch

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 99f70be1c..76f80b951 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP
+ *   Copyright 2016-2018 NXP
  *
  */
 #include <unistd.h>
@@ -177,68 +177,6 @@  static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
 }
 #endif
 
-static int
-configure_dpio_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
-{
-	struct qbman_swp_desc p_des;
-	struct dpio_attr attr;
-
-	dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io));
-	if (!dpio_dev->dpio) {
-		DPAA2_BUS_ERR("Memory allocation failure");
-		return -1;
-	}
-
-	dpio_dev->dpio->regs = dpio_dev->mc_portal;
-	if (dpio_open(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->hw_id,
-		      &dpio_dev->token)) {
-		DPAA2_BUS_ERR("Failed to allocate IO space");
-		free(dpio_dev->dpio);
-		return -1;
-	}
-
-	if (dpio_reset(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
-		DPAA2_BUS_ERR("Failed to reset dpio");
-		dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-		free(dpio_dev->dpio);
-		return -1;
-	}
-
-	if (dpio_enable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
-		DPAA2_BUS_ERR("Failed to Enable dpio");
-		dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-		free(dpio_dev->dpio);
-		return -1;
-	}
-
-	if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW,
-				dpio_dev->token, &attr)) {
-		DPAA2_BUS_ERR("DPIO Get attribute failed");
-		dpio_disable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-		dpio_close(dpio_dev->dpio, CMD_PRI_LOW,  dpio_dev->token);
-		free(dpio_dev->dpio);
-		return -1;
-	}
-
-	/* Configure & setup SW portal */
-	p_des.block = NULL;
-	p_des.idx = attr.qbman_portal_id;
-	p_des.cena_bar = (void *)(dpio_dev->qbman_portal_ce_paddr);
-	p_des.cinh_bar = (void *)(dpio_dev->qbman_portal_ci_paddr);
-	p_des.irq = -1;
-	p_des.qman_version = attr.qbman_version;
-
-	dpio_dev->sw_portal = qbman_swp_init(&p_des);
-	if (dpio_dev->sw_portal == NULL) {
-		DPAA2_BUS_ERR("QBMan SW Portal Init failed");
-		dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-		free(dpio_dev->dpio);
-		return -1;
-	}
-
-	return 0;
-}
-
 static int
 dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
 {
@@ -402,15 +340,17 @@  dpaa2_create_dpio_device(int vdev_fd,
 			 struct vfio_device_info *obj_info,
 			 int object_id)
 {
-	struct dpaa2_dpio_dev *dpio_dev;
+	struct dpaa2_dpio_dev *dpio_dev = NULL;
 	struct vfio_region_info reg_info = { .argsz = sizeof(reg_info)};
+	struct qbman_swp_desc p_des;
+	struct dpio_attr attr;
 
 	if (obj_info->num_regions < NUM_DPIO_REGIONS) {
 		DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
 		return -1;
 	}
 
-	dpio_dev = rte_malloc(NULL, sizeof(struct dpaa2_dpio_dev),
+	dpio_dev = rte_zmalloc(NULL, sizeof(struct dpaa2_dpio_dev),
 			      RTE_CACHE_LINE_SIZE);
 	if (!dpio_dev) {
 		DPAA2_BUS_ERR("Memory allocation failed for DPIO Device");
@@ -423,45 +363,33 @@  dpaa2_create_dpio_device(int vdev_fd,
 	/* Using single portal  for all devices */
 	dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
 
-	reg_info.index = 0;
-	if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
-		DPAA2_BUS_ERR("vfio: error getting region info");
-		rte_free(dpio_dev);
-		return -1;
+	dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io));
+	if (!dpio_dev->dpio) {
+		DPAA2_BUS_ERR("Memory allocation failure");
+		goto err;
 	}
 
-	dpio_dev->ce_size = reg_info.size;
-	dpio_dev->qbman_portal_ce_paddr = (size_t)mmap(NULL, reg_info.size,
-				PROT_WRITE | PROT_READ, MAP_SHARED,
-				vdev_fd, reg_info.offset);
-
-	reg_info.index = 1;
-	if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
-		DPAA2_BUS_ERR("vfio: error getting region info");
-		rte_free(dpio_dev);
-		return -1;
+	dpio_dev->dpio->regs = dpio_dev->mc_portal;
+	if (dpio_open(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->hw_id,
+		      &dpio_dev->token)) {
+		DPAA2_BUS_ERR("Failed to allocate IO space");
+		goto err;
 	}
 
-	dpio_dev->ci_size = reg_info.size;
-	dpio_dev->qbman_portal_ci_paddr = (size_t)mmap(NULL, reg_info.size,
-				PROT_WRITE | PROT_READ, MAP_SHARED,
-				vdev_fd, reg_info.offset);
-
-	if (configure_dpio_qbman_swp(dpio_dev)) {
-		DPAA2_BUS_ERR(
-			     "Fail to configure the dpio qbman portal for %d",
-			     dpio_dev->hw_id);
-		rte_free(dpio_dev);
-		return -1;
+	if (dpio_reset(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
+		DPAA2_BUS_ERR("Failed to reset dpio");
+		goto err;
 	}
 
-	io_space_count++;
-	dpio_dev->index = io_space_count;
+	if (dpio_enable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
+		DPAA2_BUS_ERR("Failed to Enable dpio");
+		goto err;
+	}
 
-	if (rte_dpaa2_vfio_setup_intr(&dpio_dev->intr_handle, vdev_fd, 1)) {
-		DPAA2_BUS_ERR("Fail to setup interrupt for %d",
-			      dpio_dev->hw_id);
-		rte_free(dpio_dev);
+	if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW,
+				dpio_dev->token, &attr)) {
+		DPAA2_BUS_ERR("DPIO Get attribute failed");
+		goto err;
 	}
 
 	/* find the SoC type for the first time */
@@ -483,9 +411,67 @@  dpaa2_create_dpio_device(int vdev_fd,
 		dpaa2_svr_family = (mc_plat_info.svr & 0xffff0000);
 	}
 
+	if (dpaa2_svr_family == SVR_LX2160A)
+		reg_info.index = DPAA2_SWP_CENA_MEM_REGION;
+	else
+		reg_info.index = DPAA2_SWP_CENA_REGION;
+
+	if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+		DPAA2_BUS_ERR("vfio: error getting region info");
+		goto err;
+	}
+
+	dpio_dev->ce_size = reg_info.size;
+	dpio_dev->qbman_portal_ce_paddr = (size_t)mmap(NULL, reg_info.size,
+				PROT_WRITE | PROT_READ, MAP_SHARED,
+				vdev_fd, reg_info.offset);
+
+	reg_info.index = DPAA2_SWP_CINH_REGION;
+	if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+		DPAA2_BUS_ERR("vfio: error getting region info");
+		goto err;
+	}
+
+	dpio_dev->ci_size = reg_info.size;
+	dpio_dev->qbman_portal_ci_paddr = (size_t)mmap(NULL, reg_info.size,
+				PROT_WRITE | PROT_READ, MAP_SHARED,
+				vdev_fd, reg_info.offset);
+
+	/* Configure & setup SW portal */
+	p_des.block = NULL;
+	p_des.idx = attr.qbman_portal_id;
+	p_des.cena_bar = (void *)(dpio_dev->qbman_portal_ce_paddr);
+	p_des.cinh_bar = (void *)(dpio_dev->qbman_portal_ci_paddr);
+	p_des.irq = -1;
+	p_des.qman_version = attr.qbman_version;
+
+	dpio_dev->sw_portal = qbman_swp_init(&p_des);
+	if (dpio_dev->sw_portal == NULL) {
+		DPAA2_BUS_ERR("QBMan SW Portal Init failed");
+		goto err;
+	}
+
+	io_space_count++;
+	dpio_dev->index = io_space_count;
+
+	if (rte_dpaa2_vfio_setup_intr(&dpio_dev->intr_handle, vdev_fd, 1)) {
+		DPAA2_BUS_ERR("Fail to setup interrupt for %d",
+			      dpio_dev->hw_id);
+		goto err;
+	}
+
 	TAILQ_INSERT_TAIL(&dpio_dev_list, dpio_dev, next);
 
 	return 0;
+
+err:
+	if (dpio_dev->dpio) {
+		dpio_disable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
+		dpio_close(dpio_dev->dpio, CMD_PRI_LOW,  dpio_dev->token);
+		free(dpio_dev->dpio);
+	}
+	rte_free(dpio_dev);
+	return -1;
 }
 
 void
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 820759360..f2eebe65d 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -37,6 +37,10 @@ 
 #define DPAA2_DQRR_RING_SIZE	16
 	/** <Maximum number of slots available in RX ring*/
 
+#define DPAA2_SWP_CENA_REGION		0
+#define DPAA2_SWP_CINH_REGION		1
+#define DPAA2_SWP_CENA_MEM_REGION	2
+
 #define MC_PORTAL_INDEX		0
 #define NUM_DPIO_REGIONS	2
 #define NUM_DQS_PER_QUEUE       2
diff --git a/drivers/bus/fslmc/qbman/include/compat.h b/drivers/bus/fslmc/qbman/include/compat.h
index 7be8f54c5..655bff4b6 100644
--- a/drivers/bus/fslmc/qbman/include/compat.h
+++ b/drivers/bus/fslmc/qbman/include/compat.h
@@ -78,13 +78,14 @@  do { \
 #define lower_32_bits(x) ((uint32_t)(x))
 #define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16))
 
-
 #define __iomem
 
 #define __raw_readb(p)	(*(const volatile unsigned char *)(p))
 #define __raw_readl(p)	(*(const volatile unsigned int *)(p))
 #define __raw_writel(v, p) {*(volatile unsigned int *)(p) = (v); }
 
+#define dma_wmb()		rte_smp_mb()
+
 #define atomic_t                rte_atomic32_t
 #define atomic_read(v)          rte_atomic32_read(v)
 #define atomic_set(v, i)        rte_atomic32_set(v, i)
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index 3e63db3ab..10c72e048 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -42,6 +42,15 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
  */
 void qbman_swp_finish(struct qbman_swp *p);
 
+/**
+ * qbman_swp_invalidate() - Invalidate the cache enabled area of the QBMan
+ * portal. This is required to be called if a portal moved to another core
+ * because the QBMan portal area is non coherent
+ * @p: the qbman_swp object to be invalidated
+ *
+ */
+void qbman_swp_invalidate(struct qbman_swp *p);
+
 /**
  * qbman_swp_get_desc() - Get the descriptor of the given portal object.
  * @p: the given portal object.
@@ -172,7 +181,7 @@  void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit);
 /**
  * struct qbman_result - structure for qbman dequeue response and/or
  * notification.
- * @donot_manipulate_directly: the 16 32bit data to represent the whole
+ * @dont_manipulate_directly: the 16 32bit data to represent the whole
  * possible qbman dequeue result.
  */
 struct qbman_result {
@@ -262,7 +271,7 @@  void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable);
  */
 struct qbman_pull_desc {
 	union {
-		uint32_t donot_manipulate_directly[16];
+		uint32_t dont_manipulate_directly[16];
 		struct pull {
 			uint8_t verb;
 			uint8_t numf;
@@ -355,6 +364,14 @@  void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, uint32_t wqid,
 void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid,
 				 enum qbman_pull_type_e dct);
 
+/**
+ * qbman_pull_desc_set_rad() - Decide whether reschedule the fq after dequeue
+ *
+ * @rad: 1 = Reschedule the FQ after dequeue.
+ *	 0 = Allow the FQ to remain active after dequeue.
+ */
+void qbman_pull_desc_set_rad(struct qbman_pull_desc *d, int rad);
+
 /**
  * qbman_swp_pull() - Issue the pull dequeue command
  * @s: the software portal object.
@@ -775,7 +792,7 @@  uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn);
 /* struct qbman_eq_desc - structure of enqueue descriptor */
 struct qbman_eq_desc {
 	union {
-		uint32_t donot_manipulate_directly[8];
+		uint32_t dont_manipulate_directly[8];
 		struct eq {
 			uint8_t verb;
 			uint8_t dca;
@@ -796,11 +813,11 @@  struct qbman_eq_desc {
 
 /**
  * struct qbman_eq_response - structure of enqueue response
- * @donot_manipulate_directly: the 16 32bit data to represent the whole
+ * @dont_manipulate_directly: the 16 32bit data to represent the whole
  * enqueue response.
  */
 struct qbman_eq_response {
-	uint32_t donot_manipulate_directly[16];
+	uint32_t dont_manipulate_directly[16];
 };
 
 /**
@@ -958,6 +975,7 @@  int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
  * @s: the software portal used for enqueue.
  * @d: the enqueue descriptor.
  * @fd: the frame descriptor to be enqueued.
+ * @flags: bit-mask of QBMAN_ENQUEUE_FLAG_*** options
  * @num_frames: the number of the frames to be enqueued.
  *
  * Return the number of enqueued frames, -EBUSY if the EQCR is not ready.
@@ -973,7 +991,6 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
  * @s: the software portal used for enqueue.
  * @d: the enqueue descriptor.
  * @fd: the frame descriptor to be enqueued.
- * @flags: bit-mask of QBMAN_ENQUEUE_FLAG_*** options
  * @num_frames: the number of the frames to be enqueued.
  *
  * Return the number of enqueued frames, -EBUSY if the EQCR is not ready.
@@ -998,12 +1015,12 @@  int qbman_swp_enqueue_thresh(struct qbman_swp *s, unsigned int thresh);
 	/*******************/
 /**
  * struct qbman_release_desc - The structure for buffer release descriptor
- * @donot_manipulate_directly: the 32bit data to represent the whole
+ * @dont_manipulate_directly: the 32bit data to represent the whole
  * possible settings of qbman release descriptor.
  */
 struct qbman_release_desc {
 	union {
-		uint32_t donot_manipulate_directly[16];
+		uint32_t dont_manipulate_directly[16];
 		struct br {
 			uint8_t verb;
 			uint8_t reserved;
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index 071450052..3380e54f5 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -1,39 +1,17 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2018 NXP
  *
  */
 
+#include "qbman_sys.h"
 #include "qbman_portal.h"
 
 /* QBMan portal management command codes */
 #define QBMAN_MC_ACQUIRE       0x30
 #define QBMAN_WQCHAN_CONFIGURE 0x46
 
-/* CINH register offsets */
-#define QBMAN_CINH_SWP_EQCR_PI 0x800
-#define QBMAN_CINH_SWP_EQCR_CI 0x840
-#define QBMAN_CINH_SWP_EQAR    0x8c0
-#define QBMAN_CINH_SWP_DQPI    0xa00
-#define QBMAN_CINH_SWP_DCAP    0xac0
-#define QBMAN_CINH_SWP_SDQCR   0xb00
-#define QBMAN_CINH_SWP_RAR     0xcc0
-#define QBMAN_CINH_SWP_ISR     0xe00
-#define QBMAN_CINH_SWP_IER     0xe40
-#define QBMAN_CINH_SWP_ISDR    0xe80
-#define QBMAN_CINH_SWP_IIR     0xec0
-#define QBMAN_CINH_SWP_DQRR_ITR    0xa80
-#define QBMAN_CINH_SWP_ITPR    0xf40
-
-/* CENA register offsets */
-#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_CR      0x600
-#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((uint32_t)(vb) >> 1))
-#define QBMAN_CENA_SWP_VDQCR   0x780
-#define QBMAN_CENA_SWP_EQCR_CI 0x840
-
 /* Reverse mapping of QBMAN_CENA_SWP_DQRR() */
 #define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)p & 0x1ff) >> 6)
 
@@ -83,6 +61,102 @@  enum qbman_sdqcr_fc {
 #define MAX_QBMAN_PORTALS  64
 static struct qbman_swp *portal_idx_map[MAX_QBMAN_PORTALS];
 
+/* Internal Function declaration */
+static int
+qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+static int
+qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+
+static int
+qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+static int
+qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+
+static int
+qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames);
+static int
+qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames);
+
+static int
+qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames);
+static int
+qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames);
+
+static int
+qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
+qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
+
+const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
+
+static int
+qbman_swp_release_direct(struct qbman_swp *s,
+		const struct qbman_release_desc *d,
+		const uint64_t *buffers, unsigned int num_buffers);
+static int
+qbman_swp_release_mem_back(struct qbman_swp *s,
+		const struct qbman_release_desc *d,
+		const uint64_t *buffers, unsigned int num_buffers);
+
+/* Function pointers */
+static int (*qbman_swp_enqueue_array_mode_ptr)(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd)
+	= qbman_swp_enqueue_array_mode_direct;
+
+static int (*qbman_swp_enqueue_ring_mode_ptr)(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd)
+	= qbman_swp_enqueue_ring_mode_direct;
+
+static int (*qbman_swp_enqueue_multiple_ptr)(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames)
+	= qbman_swp_enqueue_multiple_direct;
+
+static int (*qbman_swp_enqueue_multiple_desc_ptr)(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames)
+	= qbman_swp_enqueue_multiple_desc_direct;
+
+static int (*qbman_swp_pull_ptr)(struct qbman_swp *s,
+		struct qbman_pull_desc *d)
+	= qbman_swp_pull_direct;
+
+const struct qbman_result *(*qbman_swp_dqrr_next_ptr)(struct qbman_swp *s)
+		= qbman_swp_dqrr_next_direct;
+
+static int (*qbman_swp_release_ptr)(struct qbman_swp *s,
+			const struct qbman_release_desc *d,
+			const uint64_t *buffers, unsigned int num_buffers)
+			= qbman_swp_release_direct;
+
 /*********************************/
 /* Portal constructor/destructor */
 /*********************************/
@@ -104,25 +178,30 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 {
 	int ret;
 	uint32_t eqcr_pi;
+	uint32_t mask_size;
 	struct qbman_swp *p = malloc(sizeof(*p));
 
 	if (!p)
 		return NULL;
+
+	memset(p, 0, sizeof(struct qbman_swp));
+
 	p->desc = *d;
 #ifdef QBMAN_CHECKING
 	p->mc.check = swp_mc_can_start;
 #endif
 	p->mc.valid_bit = QB_VALID_BIT;
-	p->sdq = 0;
 	p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
 	p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
 	p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+		p->mr.valid_bit = QB_VALID_BIT;
 
 	atomic_set(&p->vdq.busy, 1);
 	p->vdq.valid_bit = QB_VALID_BIT;
-	p->dqrr.next_idx = 0;
 	p->dqrr.valid_bit = QB_VALID_BIT;
-	if ((p->desc.qman_version & 0xFFFF0000) < QMAN_REV_4100) {
+	qman_version = p->desc.qman_version;
+	if ((qman_version & 0xFFFF0000) < QMAN_REV_4100) {
 		p->dqrr.dqrr_size = 4;
 		p->dqrr.reset_bug = 1;
 	} else {
@@ -136,18 +215,54 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 		pr_err("qbman_swp_sys_init() failed %d\n", ret);
 		return NULL;
 	}
+
+	/* Verify that the DQRRPI is 0 - if it is not the portal isn't
+	 * in default state which is an error
+	 */
+	if (qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_DQPI) & 0xF) {
+		pr_err("qbman DQRR PI is not zero, portal is not clean\n");
+		free(p);
+		return NULL;
+	}
+
 	/* SDQCR needs to be initialized to 0 when no channels are
 	 * being dequeued from or else the QMan HW will indicate an
 	 * error.  The values that were calculated above will be
 	 * applied when dequeues from a specific channel are enabled.
 	 */
 	qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_SDQCR, 0);
+
+	p->eqcr.pi_ring_size = 8;
+	if ((qman_version & 0xFFFF0000) >= QMAN_REV_5000) {
+		p->eqcr.pi_ring_size = 32;
+		qbman_swp_enqueue_array_mode_ptr =
+				qbman_swp_enqueue_array_mode_mem_back;
+		qbman_swp_enqueue_ring_mode_ptr =
+				qbman_swp_enqueue_ring_mode_mem_back;
+		qbman_swp_enqueue_multiple_ptr =
+				qbman_swp_enqueue_multiple_mem_back;
+		qbman_swp_enqueue_multiple_desc_ptr =
+				qbman_swp_enqueue_multiple_desc_mem_back;
+		qbman_swp_pull_ptr = qbman_swp_pull_mem_back;
+		qbman_swp_dqrr_next_ptr = qbman_swp_dqrr_next_mem_back;
+		qbman_swp_release_ptr = qbman_swp_release_mem_back;
+	}
+
+	for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
+		p->eqcr.pi_mask = (p->eqcr.pi_mask<<1) + 1;
 	eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI);
-	p->eqcr.pi = eqcr_pi & 0xF;
+	p->eqcr.pi = eqcr_pi & p->eqcr.pi_mask;
 	p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT;
-	p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) & 0xF;
-	p->eqcr.available = QBMAN_EQCR_SIZE - qm_cyc_diff(QBMAN_EQCR_SIZE,
-						p->eqcr.ci, p->eqcr.pi);
+	if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+		p->eqcr.ci = qbman_cinh_read(&p->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & p->eqcr.pi_mask;
+	else
+		p->eqcr.ci = qbman_cinh_read(&p->sys,
+				QBMAN_CINH_SWP_EQCR_PI) & p->eqcr.pi_mask;
+	p->eqcr.available = p->eqcr.pi_ring_size -
+				qm_cyc_diff(p->eqcr.pi_ring_size,
+				p->eqcr.ci & (p->eqcr.pi_mask<<1),
+				p->eqcr.pi & (p->eqcr.pi_mask<<1));
 
 	portal_idx_map[p->desc.idx] = p;
 	return p;
@@ -229,7 +344,8 @@  int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p)
 
 void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit)
 {
-	qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0);
+	qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR,
+			 inhibit ? 0xffffffff : 0);
 }
 
 /***********************/
@@ -246,7 +362,10 @@  void *qbman_swp_mc_start(struct qbman_swp *p)
 #ifdef QBMAN_CHECKING
 	QBMAN_BUG_ON(p->mc.check != swp_mc_can_start);
 #endif
-	ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
+	if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+		ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
+	else
+		ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR_MEM);
 #ifdef QBMAN_CHECKING
 	if (!ret)
 		p->mc.check = swp_mc_can_submit;
@@ -266,8 +385,17 @@  void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
 	 * caller wants to OR but has forgotten to do so.
 	 */
 	QBMAN_BUG_ON((*v & cmd_verb) != *v);
-	*v = cmd_verb | p->mc.valid_bit;
-	qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+	if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
+		dma_wmb();
+		*v = cmd_verb | p->mc.valid_bit;
+		qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+		clean(cmd);
+	} else {
+		*v = cmd_verb | p->mr.valid_bit;
+		qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR_MEM, cmd);
+		dma_wmb();
+		qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_CR_RT, QMAN_RT_MODE);
+	}
 #ifdef QBMAN_CHECKING
 	p->mc.check = swp_mc_can_poll;
 #endif
@@ -279,17 +407,34 @@  void *qbman_swp_mc_result(struct qbman_swp *p)
 #ifdef QBMAN_CHECKING
 	QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
 #endif
-	qbman_cena_invalidate_prefetch(&p->sys,
-				       QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-	ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-	/* Remove the valid-bit - command completed if the rest is non-zero */
-	verb = ret[0] & ~QB_VALID_BIT;
-	if (!verb)
-		return NULL;
+	if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
+		qbman_cena_invalidate_prefetch(&p->sys,
+				QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+		ret = qbman_cena_read(&p->sys,
+				QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+		/* Remove the valid-bit -
+		 * command completed iff the rest is non-zero
+		 */
+		verb = ret[0] & ~QB_VALID_BIT;
+		if (!verb)
+			return NULL;
+		p->mc.valid_bit ^= QB_VALID_BIT;
+	} else {
+		ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR_MEM);
+		/* Command completed if the valid bit is toggled */
+		if (p->mr.valid_bit != (ret[0] & QB_VALID_BIT))
+			return NULL;
+		/* Remove the valid-bit -
+		 * command completed iff the rest is non-zero
+		 */
+		verb = ret[0] & ~QB_VALID_BIT;
+		if (!verb)
+			return NULL;
+		p->mr.valid_bit ^= QB_VALID_BIT;
+	}
 #ifdef QBMAN_CHECKING
 	p->mc.check = swp_mc_can_start;
 #endif
-	p->mc.valid_bit ^= QB_VALID_BIT;
 	return ret;
 }
 
@@ -417,13 +562,26 @@  void qbman_eq_desc_set_dca(struct qbman_eq_desc *d, int enable,
 	}
 }
 
-#define EQAR_IDX(eqar)     ((eqar) & 0x7)
+#define EQAR_IDX(eqar)     ((eqar) & 0x1f)
 #define EQAR_VB(eqar)      ((eqar) & 0x80)
 #define EQAR_SUCCESS(eqar) ((eqar) & 0x100)
 
-static int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
-					const struct qbman_eq_desc *d,
-					const struct qbman_fd *fd)
+static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
+						   uint8_t idx)
+{
+	if (idx < 16)
+		qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT + idx * 4,
+				     QMAN_RT_MODE);
+	else
+		qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT2 +
+				     (idx - 16) * 4,
+				     QMAN_RT_MODE);
+}
+
+
+static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
+					       const struct qbman_eq_desc *d,
+					       const struct qbman_fd *fd)
 {
 	uint32_t *p;
 	const uint32_t *cl = qb_cl(d);
@@ -433,39 +591,69 @@  static int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
 	if (!EQAR_SUCCESS(eqar))
 		return -EBUSY;
 	p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+			QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
 	memcpy(&p[1], &cl[1], 28);
 	memcpy(&p[8], fd, sizeof(*fd));
+
 	/* Set the verb byte, have to substitute in the valid-bit */
-	lwsync();
+	dma_wmb();
 	p[0] = cl[0] | EQAR_VB(eqar);
 	qbman_cena_write_complete_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+				QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
 	return 0;
 }
+static int qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s,
+						 const struct qbman_eq_desc *d,
+						 const struct qbman_fd *fd)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_EQAR);
 
-static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
-				       const struct qbman_eq_desc *d,
-				       const struct qbman_fd *fd)
+	pr_debug("EQAR=%08x\n", eqar);
+	if (!EQAR_SUCCESS(eqar))
+		return -EBUSY;
+	p = qbman_cena_write_start_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+	memcpy(&p[1], &cl[1], 28);
+	memcpy(&p[8], fd, sizeof(*fd));
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p[0] = cl[0] | EQAR_VB(eqar);
+	dma_wmb();
+	qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar));
+	return 0;
+}
+
+static inline int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
+					       const struct qbman_eq_desc *d,
+					       const struct qbman_fd *fd)
+{
+	return qbman_swp_enqueue_array_mode_ptr(s, d, fd);
+}
+
+static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
+					      const struct qbman_eq_desc *d,
+					      const struct qbman_fd *fd)
 {
 	uint32_t *p;
 	const uint32_t *cl = qb_cl(d);
-	uint32_t eqcr_ci;
-	uint8_t diff;
+	uint32_t eqcr_ci, full_mask, half_mask;
 
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
 	if (!s->eqcr.available) {
 		eqcr_ci = s->eqcr.ci;
 		s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-				QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-		diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-				   eqcr_ci, s->eqcr.ci);
-		s->eqcr.available += diff;
-		if (!diff)
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
 			return -EBUSY;
 	}
 
 	p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7));
+			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
 	memcpy(&p[1], &cl[1], 28);
 	memcpy(&p[8], fd, sizeof(*fd));
 	lwsync();
@@ -473,16 +661,61 @@  static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
 	/* Set the verb byte, have to substitute in the valid-bit */
 	p[0] = cl[0] | s->eqcr.pi_vb;
 	qbman_cena_write_complete_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7));
+			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
 	s->eqcr.pi++;
-	s->eqcr.pi &= 0xF;
+	s->eqcr.pi &= full_mask;
 	s->eqcr.available--;
-	if (!(s->eqcr.pi & 7))
+	if (!(s->eqcr.pi & half_mask))
 		s->eqcr.pi_vb ^= QB_VALID_BIT;
 
 	return 0;
 }
 
+static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
+						const struct qbman_eq_desc *d,
+						const struct qbman_fd *fd)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, full_mask, half_mask;
+
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return -EBUSY;
+	}
+
+	p = qbman_cena_write_start_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+	memcpy(&p[1], &cl[1], 28);
+	memcpy(&p[8], fd, sizeof(*fd));
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p[0] = cl[0] | s->eqcr.pi_vb;
+	s->eqcr.pi++;
+	s->eqcr.pi &= full_mask;
+	s->eqcr.available--;
+	if (!(s->eqcr.pi & half_mask))
+		s->eqcr.pi_vb ^= QB_VALID_BIT;
+	dma_wmb();
+	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+				(QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+	return 0;
+}
+
+static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
+				       const struct qbman_eq_desc *d,
+				       const struct qbman_fd *fd)
+{
+	return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+}
+
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
 		      const struct qbman_fd *fd)
 {
@@ -492,27 +725,27 @@  int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
 		return qbman_swp_enqueue_ring_mode(s, d, fd);
 }
 
-int qbman_swp_enqueue_multiple(struct qbman_swp *s,
-			       const struct qbman_eq_desc *d,
-			       const struct qbman_fd *fd,
-			       uint32_t *flags,
-			       int num_frames)
+static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
+					     const struct qbman_eq_desc *d,
+					     const struct qbman_fd *fd,
+					     uint32_t *flags,
+					     int num_frames)
 {
-	uint32_t *p;
+	uint32_t *p = NULL;
 	const uint32_t *cl = qb_cl(d);
-	uint32_t eqcr_ci, eqcr_pi;
-	uint8_t diff;
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
 	int i, num_enqueued = 0;
 	uint64_t addr_cena;
 
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
 	if (!s->eqcr.available) {
 		eqcr_ci = s->eqcr.ci;
 		s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-				QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-		diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-				   eqcr_ci, s->eqcr.ci);
-		s->eqcr.available += diff;
-		if (!diff)
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
 			return 0;
 	}
 
@@ -523,11 +756,10 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 	/* Fill in the EQCR ring */
 	for (i = 0; i < num_enqueued; i++) {
 		p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
 		memcpy(&p[1], &cl[1], 28);
 		memcpy(&p[8], &fd[i], sizeof(*fd));
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
 	}
 
 	lwsync();
@@ -536,7 +768,7 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 	eqcr_pi = s->eqcr.pi;
 	for (i = 0; i < num_enqueued; i++) {
 		p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
 		p[0] = cl[0] | s->eqcr.pi_vb;
 		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
 			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
@@ -545,8 +777,7 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
 		}
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
-		if (!(eqcr_pi & 7))
+		if (!(eqcr_pi & half_mask))
 			s->eqcr.pi_vb ^= QB_VALID_BIT;
 	}
 
@@ -554,35 +785,104 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 	eqcr_pi = s->eqcr.pi;
 	addr_cena = (size_t)s->sys.addr_cena;
 	for (i = 0; i < num_enqueued; i++) {
-		dcbf((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)));
+		dcbf((uintptr_t)(addr_cena +
+			QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
 	}
-	s->eqcr.pi = eqcr_pi;
+	s->eqcr.pi = eqcr_pi & full_mask;
 
 	return num_enqueued;
 }
 
-int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
-				    const struct qbman_eq_desc *d,
-				    const struct qbman_fd *fd,
-				    int num_frames)
+static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+					       const struct qbman_eq_desc *d,
+					       const struct qbman_fd *fd,
+					       uint32_t *flags,
+					       int num_frames)
+{
+	uint32_t *p = NULL;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+					eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		memcpy(&p[1], &cl[1], 28);
+		memcpy(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+			d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+		}
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	dma_wmb();
+	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+				(QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+	return num_enqueued;
+}
+
+inline int qbman_swp_enqueue_multiple(struct qbman_swp *s,
+				      const struct qbman_eq_desc *d,
+				      const struct qbman_fd *fd,
+				      uint32_t *flags,
+				      int num_frames)
+{
+	return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+}
+
+static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+					const struct qbman_eq_desc *d,
+					const struct qbman_fd *fd,
+					int num_frames)
 {
 	uint32_t *p;
 	const uint32_t *cl;
-	uint32_t eqcr_ci, eqcr_pi;
-	uint8_t diff;
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
 	int i, num_enqueued = 0;
 	uint64_t addr_cena;
 
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
 	if (!s->eqcr.available) {
 		eqcr_ci = s->eqcr.ci;
 		s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-				QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-		diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-				   eqcr_ci, s->eqcr.ci);
-		s->eqcr.available += diff;
-		if (!diff)
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+					eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
 			return 0;
 	}
 
@@ -593,12 +893,11 @@  int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
 	/* Fill in the EQCR ring */
 	for (i = 0; i < num_enqueued; i++) {
 		p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
 		cl = qb_cl(&d[i]);
 		memcpy(&p[1], &cl[1], 28);
 		memcpy(&p[8], &fd[i], sizeof(*fd));
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
 	}
 
 	lwsync();
@@ -607,12 +906,11 @@  int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
 	eqcr_pi = s->eqcr.pi;
 	for (i = 0; i < num_enqueued; i++) {
 		p = qbman_cena_write_start_wo_shadow(&s->sys,
-					QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
 		cl = qb_cl(&d[i]);
 		p[0] = cl[0] | s->eqcr.pi_vb;
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
-		if (!(eqcr_pi & 7))
+		if (!(eqcr_pi & half_mask))
 			s->eqcr.pi_vb ^= QB_VALID_BIT;
 	}
 
@@ -620,14 +918,78 @@  int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
 	eqcr_pi = s->eqcr.pi;
 	addr_cena = (size_t)s->sys.addr_cena;
 	for (i = 0; i < num_enqueued; i++) {
-		dcbf((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)));
+		dcbf((uintptr_t)(addr_cena +
+			QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
+		eqcr_pi++;
+	}
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
+					const struct qbman_eq_desc *d,
+					const struct qbman_fd *fd,
+					int num_frames)
+{
+	uint32_t *p;
+	const uint32_t *cl;
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+
+	half_mask = (s->eqcr.pi_mask>>1);
+	full_mask = s->eqcr.pi_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+					eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		memcpy(&p[1], &cl[1], 28);
+		memcpy(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		p[0] = cl[0] | s->eqcr.pi_vb;
 		eqcr_pi++;
-		eqcr_pi &= 0xF;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
 	}
-	s->eqcr.pi = eqcr_pi;
+
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	dma_wmb();
+	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+				(QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
 
 	return num_enqueued;
 }
+inline int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
+					   const struct qbman_eq_desc *d,
+					   const struct qbman_fd *fd,
+					   int num_frames)
+{
+	return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+}
 
 /*************************/
 /* Static (push) dequeue */
@@ -670,6 +1032,7 @@  void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable)
 #define QB_VDQCR_VERB_DT_SHIFT     2
 #define QB_VDQCR_VERB_RLS_SHIFT    4
 #define QB_VDQCR_VERB_WAE_SHIFT    5
+#define QB_VDQCR_VERB_RAD_SHIFT    6
 
 enum qb_pull_dt_e {
 	qb_pull_dt_channel,
@@ -702,7 +1065,8 @@  void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
 	d->pull.rsp_addr = storage_phys;
 }
 
-void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, uint8_t numframes)
+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d,
+				   uint8_t numframes)
 {
 	d->pull.numf = numframes - 1;
 }
@@ -735,7 +1099,20 @@  void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid,
 	d->pull.dq_src = chid;
 }
 
-int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+void qbman_pull_desc_set_rad(struct qbman_pull_desc *d, int rad)
+{
+	if (d->pull.verb & (1 << QB_VDQCR_VERB_RLS_SHIFT)) {
+		if (rad)
+			d->pull.verb |= 1 << QB_VDQCR_VERB_RAD_SHIFT;
+		else
+			d->pull.verb &= ~(1 << QB_VDQCR_VERB_RAD_SHIFT);
+	} else {
+		printf("The RAD feature is not valid when RLS = 0\n");
+	}
+}
+
+static int qbman_swp_pull_direct(struct qbman_swp *s,
+				 struct qbman_pull_desc *d)
 {
 	uint32_t *p;
 	uint32_t *cl = qb_cl(d);
@@ -759,6 +1136,36 @@  int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
 	return 0;
 }
 
+static int qbman_swp_pull_mem_back(struct qbman_swp *s,
+				   struct qbman_pull_desc *d)
+{
+	uint32_t *p;
+	uint32_t *cl = qb_cl(d);
+
+	if (!atomic_dec_and_test(&s->vdq.busy)) {
+		atomic_inc(&s->vdq.busy);
+		return -EBUSY;
+	}
+
+	d->pull.tok = s->sys.idx + 1;
+	s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+	p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR_MEM);
+	memcpy(&p[1], &cl[1], 12);
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p[0] = cl[0] | s->vdq.valid_bit;
+	s->vdq.valid_bit ^= QB_VALID_BIT;
+	dma_wmb();
+	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_VDQCR_RT, QMAN_RT_MODE);
+
+	return 0;
+}
+
+inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+{
+	return qbman_swp_pull_ptr(s, d);
+}
+
 /****************/
 /* Polling DQRR */
 /****************/
@@ -791,7 +1198,12 @@  void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
  * only once, so repeated calls can return a sequence of DQRR entries, without
  * requiring they be consumed immediately or in any particular order.
  */
-const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
+inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
+{
+	return qbman_swp_dqrr_next_ptr(s);
+}
+
+const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
 {
 	uint32_t verb;
 	uint32_t response_verb;
@@ -801,7 +1213,7 @@  const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 	/* Before using valid-bit to detect if something is there, we have to
 	 * handle the case of the DQRR reset bug...
 	 */
-	if (unlikely(s->dqrr.reset_bug)) {
+	if (s->dqrr.reset_bug) {
 		/* We pick up new entries by cache-inhibited producer index,
 		 * which means that a non-coherent mapping would require us to
 		 * invalidate and read *only* once that PI has indicated that
@@ -833,7 +1245,8 @@  const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 					QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
 	}
 	p = qbman_cena_read_wo_shadow(&s->sys,
-				      QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+			QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
 	verb = p->dq.verb;
 
 	/* If the valid-bit isn't of the expected polarity, nothing there. Note,
@@ -867,11 +1280,54 @@  const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 	return p;
 }
 
+const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
+{
+	uint32_t verb;
+	uint32_t response_verb;
+	uint32_t flags;
+	const struct qbman_result *p;
+
+	p = qbman_cena_read_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_DQRR_MEM(s->dqrr.next_idx));
+
+	verb = p->dq.verb;
+
+	/* If the valid-bit isn't of the expected polarity, nothing there. Note,
+	 * in the DQRR reset bug workaround, we shouldn't need to skip these
+	 * check, because we've already determined that a new entry is available
+	 * and we've invalidated the cacheline before reading it, so the
+	 * valid-bit behaviour is repaired and should tell us what we already
+	 * knew from reading PI.
+	 */
+	if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+		return NULL;
+
+	/* There's something there. Move "next_idx" attention to the next ring
+	 * entry (and prefetch it) before returning what we found.
+	 */
+	s->dqrr.next_idx++;
+	if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+		s->dqrr.next_idx = 0;
+		s->dqrr.valid_bit ^= QB_VALID_BIT;
+	}
+	/* If this is the final response to a volatile dequeue command
+	 * indicate that the vdq is no longer busy
+	 */
+	flags = p->dq.stat;
+	response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+	if ((response_verb == QBMAN_RESULT_DQ) &&
+	    (flags & QBMAN_DQ_STAT_VOLATILE) &&
+	    (flags & QBMAN_DQ_STAT_EXPIRED))
+		atomic_inc(&s->vdq.busy);
+	return p;
+}
+
 /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */
 void qbman_swp_dqrr_consume(struct qbman_swp *s,
 			    const struct qbman_result *dq)
 {
-	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
+	qbman_cinh_write(&s->sys,
+			QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
 }
 
 /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */
@@ -884,6 +1340,7 @@  void qbman_swp_dqrr_idx_consume(struct qbman_swp *s,
 /*********************************/
 /* Polling user-provided storage */
 /*********************************/
+
 int qbman_result_has_new_result(struct qbman_swp *s,
 				struct qbman_result *dq)
 {
@@ -898,11 +1355,11 @@  int qbman_result_has_new_result(struct qbman_swp *s,
 	((struct qbman_result *)dq)->dq.tok = 0;
 
 	/*
-	 * VDQCR "no longer busy" hook - not quite the same as DQRR, because the
-	 * fact "VDQCR" shows busy doesn't mean that we hold the result that
-	 * makes it available. Eg. we may be looking at our 10th dequeue result,
-	 * having released VDQCR after the 1st result and it is now busy due to
-	 * some other command!
+	 * VDQCR "no longer busy" hook - not quite the same as DQRR, because
+	 * the fact "VDQCR" shows busy doesn't mean that we hold the result
+	 * that makes it available. Eg. we may be looking at our 10th dequeue
+	 * result, having released VDQCR after the 1st result and it is now
+	 * busy due to some other command!
 	 */
 	if (s->vdq.storage == dq) {
 		s->vdq.storage = NULL;
@@ -936,11 +1393,11 @@  int qbman_check_command_complete(struct qbman_result *dq)
 
 	s = portal_idx_map[dq->dq.tok - 1];
 	/*
-	 * VDQCR "no longer busy" hook - not quite the same as DQRR, because the
-	 * fact "VDQCR" shows busy doesn't mean that we hold the result that
-	 * makes it available. Eg. we may be looking at our 10th dequeue result,
-	 * having released VDQCR after the 1st result and it is now busy due to
-	 * some other command!
+	 * VDQCR "no longer busy" hook - not quite the same as DQRR, because
+	 * the fact "VDQCR" shows busy doesn't mean that we hold the result
+	 * that makes it available. Eg. we may be looking at our 10th dequeue
+	 * result, having released VDQCR after the 1st result and it is now
+	 * busy due to some other command!
 	 */
 	if (s->vdq.storage == dq) {
 		s->vdq.storage = NULL;
@@ -1142,8 +1599,10 @@  void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable)
 #define RAR_VB(rar)      ((rar) & 0x80)
 #define RAR_SUCCESS(rar) ((rar) & 0x100)
 
-int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
-		      const uint64_t *buffers, unsigned int num_buffers)
+static int qbman_swp_release_direct(struct qbman_swp *s,
+				    const struct qbman_release_desc *d,
+				    const uint64_t *buffers,
+				    unsigned int num_buffers)
 {
 	uint32_t *p;
 	const uint32_t *cl = qb_cl(d);
@@ -1157,22 +1616,63 @@  int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
 
 	/* Start the release command */
 	p = qbman_cena_write_start_wo_shadow(&s->sys,
-					     QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+				     QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
 
 	/* Copy the caller's buffer pointers to the command */
 	u64_to_le32_copy(&p[2], buffers, num_buffers);
 
-	/* Set the verb byte, have to substitute in the valid-bit and the number
-	 * of buffers.
+	/* Set the verb byte, have to substitute in the valid-bit and the
+	 * number of buffers.
 	 */
 	lwsync();
 	p[0] = cl[0] | RAR_VB(rar) | num_buffers;
 	qbman_cena_write_complete_wo_shadow(&s->sys,
-					    QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+				    QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
 
 	return 0;
 }
 
+static int qbman_swp_release_mem_back(struct qbman_swp *s,
+				      const struct qbman_release_desc *d,
+				      const uint64_t *buffers,
+				      unsigned int num_buffers)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+	pr_debug("RAR=%08x\n", rar);
+	if (!RAR_SUCCESS(rar))
+		return -EBUSY;
+
+	QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+	/* Start the release command */
+	p = qbman_cena_write_start_wo_shadow(&s->sys,
+		QBMAN_CENA_SWP_RCR_MEM(RAR_IDX(rar)));
+
+	/* Copy the caller's buffer pointers to the command */
+	u64_to_le32_copy(&p[2], buffers, num_buffers);
+
+	/* Set the verb byte, have to substitute in the valid-bit and the
+	 * number of buffers.
+	 */
+	p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+	lwsync();
+	qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_RCR_AM_RT +
+		RAR_IDX(rar) * 4, QMAN_RT_MODE);
+
+	return 0;
+}
+
+inline int qbman_swp_release(struct qbman_swp *s,
+			     const struct qbman_release_desc *d,
+			     const uint64_t *buffers,
+			     unsigned int num_buffers)
+{
+	return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+}
+
 /*******************/
 /* Buffer acquires */
 /*******************/
@@ -1214,7 +1714,7 @@  int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
 
 	/* Complete the management command */
 	r = qbman_swp_mc_complete(s, p, QBMAN_MC_ACQUIRE);
-	if (unlikely(!r)) {
+	if (!r) {
 		pr_err("qbman: acquire from BPID %d failed, no response\n",
 		       bpid);
 		return -EIO;
@@ -1224,7 +1724,7 @@  int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
 	QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
 
 	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+	if (r->rslt != QBMAN_MC_RSLT_OK) {
 		pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
 		       bpid, r->rslt);
 		return -EIO;
@@ -1271,7 +1771,7 @@  static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid,
 
 	/* Complete the management command */
 	r = qbman_swp_mc_complete(s, p, alt_fq_verb);
-	if (unlikely(!r)) {
+	if (!r) {
 		pr_err("qbman: mgmt cmd failed, no response (verb=0x%x)\n",
 		       alt_fq_verb);
 		return -EIO;
@@ -1281,7 +1781,7 @@  static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid,
 	QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != alt_fq_verb);
 
 	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+	if (r->rslt != QBMAN_MC_RSLT_OK) {
 		pr_err("ALT FQID %d failed: verb = 0x%08x, code = 0x%02x\n",
 		       fqid, alt_fq_verb, r->rslt);
 		return -EIO;
@@ -1362,7 +1862,7 @@  static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid,
 
 	/* Complete the management command */
 	r = qbman_swp_mc_complete(s, p, QBMAN_WQCHAN_CONFIGURE);
-	if (unlikely(!r)) {
+	if (!r) {
 		pr_err("qbman: wqchan config failed, no response\n");
 		return -EIO;
 	}
@@ -1372,7 +1872,7 @@  static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid,
 		     != QBMAN_WQCHAN_CONFIGURE);
 
 	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+	if (r->rslt != QBMAN_MC_RSLT_OK) {
 		pr_err("CDAN cQID %d failed: code = 0x%02x\n",
 		       channelid, r->rslt);
 		return -EIO;
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h b/drivers/bus/fslmc/qbman/qbman_portal.h
index dbea22a1b..3b0fc540b 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -1,12 +1,17 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2018 NXP
  *
  */
 
+#ifndef _QBMAN_PORTAL_H_
+#define _QBMAN_PORTAL_H_
+
 #include "qbman_sys.h"
 #include <fsl_qbman_portal.h>
 
+uint32_t qman_version;
 #define QMAN_REV_4000   0x04000000
 #define QMAN_REV_4100   0x04010000
 #define QMAN_REV_4101   0x04010001
@@ -14,13 +19,14 @@ 
 /* All QBMan command and result structures use this "valid bit" encoding */
 #define QB_VALID_BIT ((uint32_t)0x80)
 
+/* All QBMan command use this "Read trigger bit" encoding */
+#define QB_RT_BIT ((uint32_t)0x100)
+
 /* Management command result codes */
 #define QBMAN_MC_RSLT_OK      0xf0
 
 /* QBMan DQRR size is set at runtime in qbman_portal.c */
 
-#define QBMAN_EQCR_SIZE 8
-
 static inline uint8_t qm_cyc_diff(uint8_t ringsize, uint8_t first,
 				  uint8_t last)
 {
@@ -51,6 +57,10 @@  struct qbman_swp {
 #endif
 		uint32_t valid_bit; /* 0x00 or 0x80 */
 	} mc;
+	/* Management response */
+	struct {
+		uint32_t valid_bit; /* 0x00 or 0x80 */
+	} mr;
 	/* Push dequeues */
 	uint32_t sdq;
 	/* Volatile dequeues */
@@ -87,6 +97,8 @@  struct qbman_swp {
 	struct {
 		uint32_t pi;
 		uint32_t pi_vb;
+		uint32_t pi_ring_size;
+		uint32_t pi_mask;
 		uint32_t ci;
 		int available;
 	} eqcr;
@@ -141,4 +153,16 @@  static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
  * an inline) is necessary to work with different descriptor types and to work
  * correctly with const and non-const inputs (and similarly-qualified outputs).
  */
-#define qb_cl(d) (&(d)->donot_manipulate_directly[0])
+#define qb_cl(d) (&(d)->dont_manipulate_directly[0])
+
+#ifdef RTE_ARCH_ARM64
+	#define clean(p) \
+			{ asm volatile("dc cvac, %0;" : : "r" (p) : "memory"); }
+	#define invalidate(p) \
+			{ asm volatile("dc ivac, %0" : : "r"(p) : "memory"); }
+#else
+	#define clean(p)
+	#define invalidate(p)
+#endif
+
+#endif
diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h b/drivers/bus/fslmc/qbman/qbman_sys.h
index 2bd33ea56..d41af8358 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys.h
@@ -18,11 +18,51 @@ 
  * *not* to provide linux compatibility.
  */
 
+#ifndef _QBMAN_SYS_H_
+#define _QBMAN_SYS_H_
+
 #include "qbman_sys_decl.h"
 
 #define CENA_WRITE_ENABLE 0
 #define CINH_WRITE_ENABLE 1
 
+/* CINH register offsets */
+#define QBMAN_CINH_SWP_EQCR_PI      0x800
+#define QBMAN_CINH_SWP_EQCR_CI      0x840
+#define QBMAN_CINH_SWP_EQAR         0x8c0
+#define QBMAN_CINH_SWP_CR_RT        0x900
+#define QBMAN_CINH_SWP_VDQCR_RT     0x940
+#define QBMAN_CINH_SWP_EQCR_AM_RT   0x980
+#define QBMAN_CINH_SWP_RCR_AM_RT    0x9c0
+#define QBMAN_CINH_SWP_DQPI         0xa00
+#define QBMAN_CINH_SWP_DQRR_ITR     0xa80
+#define QBMAN_CINH_SWP_DCAP         0xac0
+#define QBMAN_CINH_SWP_SDQCR        0xb00
+#define QBMAN_CINH_SWP_EQCR_AM_RT2  0xb40
+#define QBMAN_CINH_SWP_RCR_PI       0xc00
+#define QBMAN_CINH_SWP_RAR          0xcc0
+#define QBMAN_CINH_SWP_ISR          0xe00
+#define QBMAN_CINH_SWP_IER          0xe40
+#define QBMAN_CINH_SWP_ISDR         0xe80
+#define QBMAN_CINH_SWP_IIR          0xec0
+#define QBMAN_CINH_SWP_ITPR         0xf40
+
+/* CENA register offsets */
+#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_CR      0x600
+#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((uint32_t)(vb) >> 1))
+#define QBMAN_CENA_SWP_VDQCR   0x780
+#define QBMAN_CENA_SWP_EQCR_CI 0x840
+
+/* CENA register offsets in memory-backed mode */
+#define QBMAN_CENA_SWP_DQRR_MEM(n)  (0x800 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_RCR_MEM(n)   (0x1400 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_CR_MEM       0x1600
+#define QBMAN_CENA_SWP_RR_MEM       0x1680
+#define QBMAN_CENA_SWP_VDQCR_MEM    0x1780
+
 /* Debugging assists */
 static inline void __hexdump(unsigned long start, unsigned long end,
 			     unsigned long p, size_t sz, const unsigned char *c)
@@ -125,8 +165,8 @@  struct qbman_swp_sys {
 	 * place-holder.
 	 */
 	uint8_t *cena;
-	uint8_t __iomem *addr_cena;
-	uint8_t __iomem *addr_cinh;
+	uint8_t *addr_cena;
+	uint8_t *addr_cinh;
 	uint32_t idx;
 	enum qbman_eqcr_mode eqcr_mode;
 };
@@ -292,13 +332,16 @@  static inline void qbman_cena_prefetch(struct qbman_swp_sys *s,
  * qbman_portal.c. So use of it is declared locally here.
  */
 #define QBMAN_CINH_SWP_CFG   0xd00
-#define QBMAN_CINH_SWP_CFG   0xd00
+
 #define SWP_CFG_DQRR_MF_SHIFT 20
 #define SWP_CFG_EST_SHIFT     16
+#define SWP_CFG_CPBS_SHIFT    15
 #define SWP_CFG_WN_SHIFT      14
 #define SWP_CFG_RPM_SHIFT     12
 #define SWP_CFG_DCM_SHIFT     10
 #define SWP_CFG_EPM_SHIFT     8
+#define SWP_CFG_VPM_SHIFT     7
+#define SWP_CFG_CPM_SHIFT     6
 #define SWP_CFG_SD_SHIFT      5
 #define SWP_CFG_SP_SHIFT      4
 #define SWP_CFG_SE_SHIFT      3
@@ -329,11 +372,20 @@  static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn,
 	return reg;
 }
 
+#define QMAN_RT_MODE	0x00000100
+
+#define QMAN_REV_4000	0x04000000
+#define QMAN_REV_4100	0x04010000
+#define QMAN_REV_4101	0x04010001
+#define QMAN_REV_5000	0x05000000
+#define QMAN_REV_MASK	0xffff0000
+
 static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 				     const struct qbman_swp_desc *d,
 				     uint8_t dqrr_size)
 {
 	uint32_t reg;
+	int i;
 #ifdef RTE_ARCH_64
 	uint8_t wn = CENA_WRITE_ENABLE;
 #else
@@ -343,7 +395,7 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 	s->addr_cena = d->cena_bar;
 	s->addr_cinh = d->cinh_bar;
 	s->idx = (uint32_t)d->idx;
-	s->cena = malloc(4096);
+	s->cena = malloc(64*1024);
 	if (!s->cena) {
 		pr_err("Could not allocate page for cena shadow\n");
 		return -1;
@@ -358,12 +410,34 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
 	QBMAN_BUG_ON(reg);
 #endif
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+		memset(s->addr_cena, 0, 64*1024);
+	else {
+		/* Invalidate the portal memory.
+		 * This ensures no stale cache lines
+		 */
+		for (i = 0; i < 0x1000; i += 64)
+			dccivac(s->addr_cena + i);
+	}
+
 	if (s->eqcr_mode == qman_eqcr_vb_array)
-		reg = qbman_set_swp_cfg(dqrr_size, wn, 0, 3, 2, 3, 1, 1, 1, 1,
-					1, 1);
-	else
-		reg = qbman_set_swp_cfg(dqrr_size, wn, 1, 3, 2, 2, 1, 1, 1, 1,
-					1, 1);
+		reg = qbman_set_swp_cfg(dqrr_size, wn,
+					0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+	else {
+		if ((d->qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						1, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+		else
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
+		reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+		       1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
+		       1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
+	}
+
 	qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
 	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
 	if (!reg) {
@@ -371,6 +445,12 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 		free(s->cena);
 		return -1;
 	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
+		qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+		qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+	}
+
 	return 0;
 }
 
@@ -378,3 +458,5 @@  static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)
 {
 	free(s->cena);
 }
+
+#endif /* _QBMAN_SYS_H_ */
diff --git a/drivers/bus/fslmc/qbman/qbman_sys_decl.h b/drivers/bus/fslmc/qbman/qbman_sys_decl.h
index fa6977fee..a29f5b469 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys_decl.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys_decl.h
@@ -3,6 +3,9 @@ 
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
  *
  */
+#ifndef _QBMAN_SYS_DECL_H_
+#define _QBMAN_SYS_DECL_H_
+
 #include <compat.h>
 #include <fsl_qbman_base.h>
 
@@ -51,3 +54,4 @@  static inline void prefetch_for_store(void *p)
 	RTE_SET_USED(p);
 }
 #endif
+#endif /* _QBMAN_SYS_DECL_H_ */