@@ -53,10 +53,6 @@ static uint32_t io_space_count;
/* Variable to store DPAA2 platform type */
uint32_t dpaa2_svr_family;
-/* Physical core id for lcores running on dpaa2. */
-/* DPAA2 only support 1 lcore to 1 phy cpu mapping */
-static unsigned int dpaa2_cpu[RTE_MAX_LCORE];
-
/* Variable to store DPAA2 DQRR size */
uint8_t dpaa2_dqrr_size;
/* Variable to store DPAA2 EQCR size */
@@ -159,7 +155,7 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
return;
}
- cpu_mask = cpu_mask << dpaa2_cpu[cpu_id];
+ cpu_mask = cpu_mask << cpu_id;
snprintf(command, COMMAND_LEN, "echo %X > /proc/irq/%s/smp_affinity",
cpu_mask, token);
ret = system(command);
@@ -228,17 +224,9 @@ static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
#endif
static int
-dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev)
+dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
{
int sdest, ret;
- int cpu_id;
-
- /* Set the Stashing Destination */
- cpu_id = dpaa2_get_core_id();
- if (cpu_id < 0) {
- DPAA2_BUS_ERR("Thread not affined to a single core");
- return -1;
- }
/* Set the STASH Destination depending on Current CPU ID.
* Valid values of SDEST are 4,5,6,7. Where,
@@ -277,6 +265,7 @@ static void dpaa2_put_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
{
struct dpaa2_dpio_dev *dpio_dev = NULL;
+ int cpu_id;
int ret;
/* Get DPIO dev handle from list using index */
@@ -292,11 +281,19 @@ static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
DPAA2_BUS_DEBUG("New Portal %p (%d) affined thread - %lu",
dpio_dev, dpio_dev->index, syscall(SYS_gettid));
- ret = dpaa2_configure_stashing(dpio_dev);
- if (ret) {
- DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
- rte_atomic16_clear(&dpio_dev->ref_count);
- return NULL;
+ /* Set the Stashing Destination */
+ cpu_id = dpaa2_get_core_id();
+ if (cpu_id < 0) {
+ DPAA2_BUS_WARN("Thread not affined to a single core");
+ if (dpaa2_svr_family != SVR_LX2160A)
+ qbman_swp_update(dpio_dev->sw_portal, 1);
+ } else {
+ ret = dpaa2_configure_stashing(dpio_dev, cpu_id);
+ if (ret) {
+ DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
+ rte_atomic16_clear(&dpio_dev->ref_count);
+ return NULL;
+ }
}
ret = pthread_setspecific(dpaa2_portal_key, (void *)dpio_dev);
@@ -363,46 +360,6 @@ static void dpaa2_portal_finish(void *arg)
pthread_setspecific(dpaa2_portal_key, NULL);
}
-/*
- * This checks for not supported lcore mappings as well as get the physical
- * cpuid for the lcore.
- * one lcore can only map to 1 cpu i.e. 1@10-14 not supported.
- * one cpu can be mapped to more than one lcores.
- */
-static int
-dpaa2_check_lcore_cpuset(void)
-{
- unsigned int lcore_id, i;
- int ret = 0;
-
- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
- dpaa2_cpu[lcore_id] = 0xffffffff;
-
- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
- rte_cpuset_t cpuset = rte_lcore_cpuset(lcore_id);
-
- for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &cpuset))
- continue;
- if (i >= RTE_MAX_LCORE) {
- DPAA2_BUS_ERR("ERR:lcore map to core %u (>= %u) not supported",
- i, RTE_MAX_LCORE);
- ret = -1;
- continue;
- }
- RTE_LOG(DEBUG, EAL, "lcore id = %u cpu=%u\n",
- lcore_id, i);
- if (dpaa2_cpu[lcore_id] != 0xffffffff) {
- DPAA2_BUS_ERR("ERR:lcore map to multi-cpu not supported");
- ret = -1;
- continue;
- }
- dpaa2_cpu[lcore_id] = i;
- }
- }
- return ret;
-}
-
static int
dpaa2_create_dpio_device(int vdev_fd,
struct vfio_device_info *obj_info,
@@ -413,7 +370,6 @@ dpaa2_create_dpio_device(int vdev_fd,
struct qbman_swp_desc p_des;
struct dpio_attr attr;
int ret;
- static int check_lcore_cpuset;
if (obj_info->num_regions < NUM_DPIO_REGIONS) {
DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
@@ -433,13 +389,6 @@ dpaa2_create_dpio_device(int vdev_fd,
/* Using single portal for all devices */
dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
- if (!check_lcore_cpuset) {
- check_lcore_cpuset = 1;
-
- if (dpaa2_check_lcore_cpuset() < 0)
- goto err;
- }
-
dpio_dev->dpio = rte_zmalloc(NULL, sizeof(struct fsl_mc_io),
RTE_CACHE_LINE_SIZE);
if (!dpio_dev->dpio) {
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 2014 Freescale Semiconductor, Inc.
- * Copyright 2015-2019 NXP
+ * Copyright 2015-2020 NXP
*
*/
#ifndef _FSL_QBMAN_PORTAL_H
@@ -43,6 +43,12 @@ extern uint32_t dpaa2_svr_family;
*/
struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
+/**
+ * qbman_swp_update() - Update portal cacheability attributes.
+ * @p: the given qbman swp portal
+ */
+int qbman_swp_update(struct qbman_swp *p, int stash_off);
+
/**
* qbman_swp_finish() - Create and destroy a functional object representing
* the given QBMan portal descriptor.
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
*
*/
@@ -82,6 +82,10 @@ qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd);
static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd);
+static int
qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd);
@@ -99,6 +103,12 @@ qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
uint32_t *flags,
int num_frames);
static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd,
+ uint32_t *flags,
+ int num_frames);
+static int
qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@@ -118,6 +128,12 @@ qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
uint32_t *flags,
int num_frames);
static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ struct qbman_fd **fd,
+ uint32_t *flags,
+ int num_frames);
+static int
qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
struct qbman_fd **fd,
@@ -135,6 +151,11 @@ qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
const struct qbman_fd *fd,
int num_frames);
static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd,
+ int num_frames);
+static int
qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@@ -143,9 +164,12 @@ qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
static int
qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
static int
+qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s);
const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
static int
@@ -153,6 +177,10 @@ qbman_swp_release_direct(struct qbman_swp *s,
const struct qbman_release_desc *d,
const uint64_t *buffers, unsigned int num_buffers);
static int
+qbman_swp_release_cinh_direct(struct qbman_swp *s,
+ const struct qbman_release_desc *d,
+ const uint64_t *buffers, unsigned int num_buffers);
+static int
qbman_swp_release_mem_back(struct qbman_swp *s,
const struct qbman_release_desc *d,
const uint64_t *buffers, unsigned int num_buffers);
@@ -327,6 +355,28 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
return p;
}
+int qbman_swp_update(struct qbman_swp *p, int stash_off)
+{
+ const struct qbman_swp_desc *d = &p->desc;
+ struct qbman_swp_sys *s = &p->sys;
+ int ret;
+
+ /* Nothing needs to be done for QBMAN rev > 5000 with fast access */
+ if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+ && (d->cena_access_mode == qman_cena_fastest_access))
+ return 0;
+
+ ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off);
+ if (ret) {
+ pr_err("qbman_swp_sys_init() failed %d\n", ret);
+ return ret;
+ }
+
+ p->stash_off = stash_off;
+
+ return 0;
+}
+
void qbman_swp_finish(struct qbman_swp *p)
{
#ifdef QBMAN_CHECKING
@@ -462,6 +512,27 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
#endif
}
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
+{
+ uint8_t *v = cmd;
+#ifdef QBMAN_CHECKING
+ QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit));
+#endif
+ /* TBD: "|=" is going to hurt performance. Need to move as many fields
+ * out of word zero, and for those that remain, the "OR" needs to occur
+ * at the caller side. This debug check helps to catch cases where the
+ * caller wants to OR but has forgotten to do so.
+ */
+ QBMAN_BUG_ON((*v & cmd_verb) != *v);
+ dma_wmb();
+ *v = cmd_verb | p->mc.valid_bit;
+ qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+ clean(cmd);
+#ifdef QBMAN_CHECKING
+ p->mc.check = swp_mc_can_poll;
+#endif
+}
+
void *qbman_swp_mc_result(struct qbman_swp *p)
{
uint32_t *ret, verb;
@@ -500,6 +571,27 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
return ret;
}
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p)
+{
+ uint32_t *ret, verb;
+#ifdef QBMAN_CHECKING
+ QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
+#endif
+ ret = qbman_cinh_read_shadow(&p->sys,
+ QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+ /* Remove the valid-bit -
+ * command completed iff the rest is non-zero
+ */
+ verb = ret[0] & ~QB_VALID_BIT;
+ if (!verb)
+ return NULL;
+ p->mc.valid_bit ^= QB_VALID_BIT;
+#ifdef QBMAN_CHECKING
+ p->mc.check = swp_mc_can_start;
+#endif
+ return ret;
+}
+
/***********/
/* Enqueue */
/***********/
@@ -640,6 +732,16 @@ static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
QMAN_RT_MODE);
}
+static void memcpy_byte_by_byte(void *to, const void *from, size_t n)
+{
+ const uint8_t *src = from;
+ volatile uint8_t *dest = to;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ dest[i] = src[i];
+}
+
static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
@@ -754,7 +856,7 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(
return -EBUSY;
}
- p = qbman_cena_write_start_wo_shadow(&s->sys,
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
memcpy(&p[1], &cl[1], 28);
memcpy(&p[8], fd, sizeof(*fd));
@@ -762,8 +864,44 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(
/* Set the verb byte, have to substitute in the valid-bit */
p[0] = cl[0] | s->eqcr.pi_vb;
- qbman_cena_write_complete_wo_shadow(&s->sys,
+ s->eqcr.pi++;
+ s->eqcr.pi &= full_mask;
+ s->eqcr.available--;
+ if (!(s->eqcr.pi & half_mask))
+ s->eqcr.pi_vb ^= QB_VALID_BIT;
+
+ return 0;
+}
+
+static int qbman_swp_enqueue_ring_mode_cinh_direct(
+ struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd)
+{
+ uint32_t *p;
+ const uint32_t *cl = qb_cl(d);
+ uint32_t eqcr_ci, full_mask, half_mask;
+
+ half_mask = (s->eqcr.pi_ci_mask>>1);
+ full_mask = s->eqcr.pi_ci_mask;
+ if (!s->eqcr.available) {
+ eqcr_ci = s->eqcr.ci;
+ s->eqcr.ci = qbman_cinh_read(&s->sys,
+ QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+ eqcr_ci, s->eqcr.ci);
+ if (!s->eqcr.available)
+ return -EBUSY;
+ }
+
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+ memcpy_byte_by_byte(&p[1], &cl[1], 28);
+ memcpy_byte_by_byte(&p[8], fd, sizeof(*fd));
+ lwsync();
+
+ /* Set the verb byte, have to substitute in the valid-bit */
+ p[0] = cl[0] | s->eqcr.pi_vb;
s->eqcr.pi++;
s->eqcr.pi &= full_mask;
s->eqcr.available--;
@@ -815,7 +953,10 @@ static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd)
{
- return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+ if (!s->stash_off)
+ return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+ else
+ return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd);
}
int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
@@ -966,6 +1107,67 @@ static int qbman_swp_enqueue_multiple_cinh_direct(
return num_enqueued;
}
+static int qbman_swp_enqueue_multiple_cinh_direct(
+ struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd,
+ uint32_t *flags,
+ int num_frames)
+{
+ uint32_t *p = NULL;
+ const uint32_t *cl = qb_cl(d);
+ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+ int i, num_enqueued = 0;
+
+ half_mask = (s->eqcr.pi_ci_mask>>1);
+ full_mask = s->eqcr.pi_ci_mask;
+ if (!s->eqcr.available) {
+ eqcr_ci = s->eqcr.ci;
+ s->eqcr.ci = qbman_cinh_read(&s->sys,
+ QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+ eqcr_ci, s->eqcr.ci);
+ if (!s->eqcr.available)
+ return 0;
+ }
+
+ eqcr_pi = s->eqcr.pi;
+ num_enqueued = (s->eqcr.available < num_frames) ?
+ s->eqcr.available : num_frames;
+ s->eqcr.available -= num_enqueued;
+ /* Fill in the EQCR ring */
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ memcpy_byte_by_byte(&p[1], &cl[1], 28);
+ memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+ eqcr_pi++;
+ }
+
+ lwsync();
+
+ /* Set the verb byte, have to substitute in the valid-bit */
+ eqcr_pi = s->eqcr.pi;
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ p[0] = cl[0] | s->eqcr.pi_vb;
+ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+ d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+ }
+ eqcr_pi++;
+ if (!(eqcr_pi & half_mask))
+ s->eqcr.pi_vb ^= QB_VALID_BIT;
+ }
+
+ s->eqcr.pi = eqcr_pi & full_mask;
+
+ return num_enqueued;
+}
+
static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@@ -1025,7 +1227,12 @@ inline int qbman_swp_enqueue_multiple(struct qbman_swp *s,
uint32_t *flags,
int num_frames)
{
- return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+ if (!s->stash_off)
+ return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags,
+ num_frames);
+ else
+ return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags,
+ num_frames);
}
static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
@@ -1167,6 +1374,67 @@ static int qbman_swp_enqueue_multiple_fd_cinh_direct(
return num_enqueued;
}
+static int qbman_swp_enqueue_multiple_fd_cinh_direct(
+ struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ struct qbman_fd **fd,
+ uint32_t *flags,
+ int num_frames)
+{
+ uint32_t *p = NULL;
+ const uint32_t *cl = qb_cl(d);
+ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+ int i, num_enqueued = 0;
+
+ half_mask = (s->eqcr.pi_ci_mask>>1);
+ full_mask = s->eqcr.pi_ci_mask;
+ if (!s->eqcr.available) {
+ eqcr_ci = s->eqcr.ci;
+ s->eqcr.ci = qbman_cinh_read(&s->sys,
+ QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+ eqcr_ci, s->eqcr.ci);
+ if (!s->eqcr.available)
+ return 0;
+ }
+
+ eqcr_pi = s->eqcr.pi;
+ num_enqueued = (s->eqcr.available < num_frames) ?
+ s->eqcr.available : num_frames;
+ s->eqcr.available -= num_enqueued;
+ /* Fill in the EQCR ring */
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ memcpy_byte_by_byte(&p[1], &cl[1], 28);
+ memcpy_byte_by_byte(&p[8], fd[i], sizeof(struct qbman_fd));
+ eqcr_pi++;
+ }
+
+ lwsync();
+
+ /* Set the verb byte, have to substitute in the valid-bit */
+ eqcr_pi = s->eqcr.pi;
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ p[0] = cl[0] | s->eqcr.pi_vb;
+ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+ d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+ }
+ eqcr_pi++;
+ if (!(eqcr_pi & half_mask))
+ s->eqcr.pi_vb ^= QB_VALID_BIT;
+ }
+
+ s->eqcr.pi = eqcr_pi & full_mask;
+
+ return num_enqueued;
+}
+
static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
struct qbman_fd **fd,
@@ -1233,7 +1501,12 @@ inline int qbman_swp_enqueue_multiple_fd(struct qbman_swp *s,
uint32_t *flags,
int num_frames)
{
- return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags, num_frames);
+ if (!s->stash_off)
+ return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags,
+ num_frames);
+ else
+ return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd,
+ flags, num_frames);
}
static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
@@ -1365,6 +1638,62 @@ static int qbman_swp_enqueue_multiple_desc_cinh_direct(
return num_enqueued;
}
+static int qbman_swp_enqueue_multiple_desc_cinh_direct(
+ struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct qbman_fd *fd,
+ int num_frames)
+{
+ uint32_t *p;
+ const uint32_t *cl;
+ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+ int i, num_enqueued = 0;
+
+ half_mask = (s->eqcr.pi_ci_mask>>1);
+ full_mask = s->eqcr.pi_ci_mask;
+ if (!s->eqcr.available) {
+ eqcr_ci = s->eqcr.ci;
+ s->eqcr.ci = qbman_cinh_read(&s->sys,
+ QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+ eqcr_ci, s->eqcr.ci);
+ if (!s->eqcr.available)
+ return 0;
+ }
+
+ eqcr_pi = s->eqcr.pi;
+ num_enqueued = (s->eqcr.available < num_frames) ?
+ s->eqcr.available : num_frames;
+ s->eqcr.available -= num_enqueued;
+ /* Fill in the EQCR ring */
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ cl = qb_cl(&d[i]);
+ memcpy_byte_by_byte(&p[1], &cl[1], 28);
+ memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+ eqcr_pi++;
+ }
+
+ lwsync();
+
+ /* Set the verb byte, have to substitute in the valid-bit */
+ eqcr_pi = s->eqcr.pi;
+ for (i = 0; i < num_enqueued; i++) {
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+ cl = qb_cl(&d[i]);
+ p[0] = cl[0] | s->eqcr.pi_vb;
+ eqcr_pi++;
+ if (!(eqcr_pi & half_mask))
+ s->eqcr.pi_vb ^= QB_VALID_BIT;
+ }
+
+ s->eqcr.pi = eqcr_pi & full_mask;
+
+ return num_enqueued;
+}
+
static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@@ -1426,7 +1755,13 @@ inline int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
const struct qbman_fd *fd,
int num_frames)
{
- return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+ if (!s->stash_off)
+ return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd,
+ num_frames);
+ else
+ return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd,
+ num_frames);
+
}
/*************************/
@@ -1574,6 +1909,30 @@ static int qbman_swp_pull_direct(struct qbman_swp *s,
return 0;
}
+static int qbman_swp_pull_cinh_direct(struct qbman_swp *s,
+ struct qbman_pull_desc *d)
+{
+ uint32_t *p;
+ uint32_t *cl = qb_cl(d);
+
+ if (!atomic_dec_and_test(&s->vdq.busy)) {
+ atomic_inc(&s->vdq.busy);
+ return -EBUSY;
+ }
+
+ d->pull.tok = s->sys.idx + 1;
+ s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+ p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR);
+ memcpy_byte_by_byte(&p[1], &cl[1], 12);
+
+ /* Set the verb byte, have to substitute in the valid-bit */
+ lwsync();
+ p[0] = cl[0] | s->vdq.valid_bit;
+ s->vdq.valid_bit ^= QB_VALID_BIT;
+
+ return 0;
+}
+
static int qbman_swp_pull_mem_back(struct qbman_swp *s,
struct qbman_pull_desc *d)
{
@@ -1601,7 +1960,10 @@ static int qbman_swp_pull_mem_back(struct qbman_swp *s,
inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
{
- return qbman_swp_pull_ptr(s, d);
+ if (!s->stash_off)
+ return qbman_swp_pull_ptr(s, d);
+ else
+ return qbman_swp_pull_cinh_direct(s, d);
}
/****************/
@@ -1638,7 +2000,10 @@ void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
*/
inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
{
- return qbman_swp_dqrr_next_ptr(s);
+ if (!s->stash_off)
+ return qbman_swp_dqrr_next_ptr(s);
+ else
+ return qbman_swp_dqrr_next_cinh_direct(s);
}
const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
@@ -1718,6 +2083,81 @@ const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
return p;
}
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s)
+{
+ uint32_t verb;
+ uint32_t response_verb;
+ uint32_t flags;
+ const struct qbman_result *p;
+
+ /* Before using valid-bit to detect if something is there, we have to
+ * handle the case of the DQRR reset bug...
+ */
+ if (s->dqrr.reset_bug) {
+ /* We pick up new entries by cache-inhibited producer index,
+ * which means that a non-coherent mapping would require us to
+ * invalidate and read *only* once that PI has indicated that
+ * there's an entry here. The first trip around the DQRR ring
+ * will be much less efficient than all subsequent trips around
+ * it...
+ */
+ uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) &
+ QMAN_DQRR_PI_MASK;
+
+ /* there are new entries if pi != next_idx */
+ if (pi == s->dqrr.next_idx)
+ return NULL;
+
+ /* if next_idx is/was the last ring index, and 'pi' is
+ * different, we can disable the workaround as all the ring
+ * entries have now been DMA'd to so valid-bit checking is
+ * repaired. Note: this logic needs to be based on next_idx
+ * (which increments one at a time), rather than on pi (which
+ * can burst and wrap-around between our snapshots of it).
+ */
+ QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0);
+ if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) {
+ pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n",
+ s->dqrr.next_idx, pi);
+ s->dqrr.reset_bug = 0;
+ }
+ }
+ p = qbman_cinh_read_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
+ verb = p->dq.verb;
+
+ /* If the valid-bit isn't of the expected polarity, nothing there. Note,
+ * in the DQRR reset bug workaround, we shouldn't need to skip these
+ * check, because we've already determined that a new entry is available
+ * and we've invalidated the cacheline before reading it, so the
+ * valid-bit behaviour is repaired and should tell us what we already
+ * knew from reading PI.
+ */
+ if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+ return NULL;
+
+ /* There's something there. Move "next_idx" attention to the next ring
+ * entry (and prefetch it) before returning what we found.
+ */
+ s->dqrr.next_idx++;
+ if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+ s->dqrr.next_idx = 0;
+ s->dqrr.valid_bit ^= QB_VALID_BIT;
+ }
+ /* If this is the final response to a volatile dequeue command
+ * indicate that the vdq is no longer busy
+ */
+ flags = p->dq.stat;
+ response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+ if ((response_verb == QBMAN_RESULT_DQ) &&
+ (flags & QBMAN_DQ_STAT_VOLATILE) &&
+ (flags & QBMAN_DQ_STAT_EXPIRED))
+ atomic_inc(&s->vdq.busy);
+
+ return p;
+}
+
const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
{
uint32_t verb;
@@ -2096,6 +2536,37 @@ static int qbman_swp_release_direct(struct qbman_swp *s,
return 0;
}
+static int qbman_swp_release_cinh_direct(struct qbman_swp *s,
+ const struct qbman_release_desc *d,
+ const uint64_t *buffers,
+ unsigned int num_buffers)
+{
+ uint32_t *p;
+ const uint32_t *cl = qb_cl(d);
+ uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+ pr_debug("RAR=%08x\n", rar);
+ if (!RAR_SUCCESS(rar))
+ return -EBUSY;
+
+ QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+ /* Start the release command */
+ p = qbman_cinh_write_start_wo_shadow(&s->sys,
+ QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+
+ /* Copy the caller's buffer pointers to the command */
+ memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t));
+
+ /* Set the verb byte, have to substitute in the valid-bit and the
+ * number of buffers.
+ */
+ lwsync();
+ p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+
+ return 0;
+}
+
static int qbman_swp_release_mem_back(struct qbman_swp *s,
const struct qbman_release_desc *d,
const uint64_t *buffers,
@@ -2134,7 +2605,11 @@ inline int qbman_swp_release(struct qbman_swp *s,
const uint64_t *buffers,
unsigned int num_buffers)
{
- return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+ if (!s->stash_off)
+ return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+ else
+ return qbman_swp_release_cinh_direct(s, d, buffers,
+ num_buffers);
}
/*******************/
@@ -2157,8 +2632,8 @@ struct qbman_acquire_rslt {
uint64_t buf[7];
};
-int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
- unsigned int num_buffers)
+static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid,
+ uint64_t *buffers, unsigned int num_buffers)
{
struct qbman_acquire_desc *p;
struct qbman_acquire_rslt *r;
@@ -2202,6 +2677,61 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
return (int)r->num;
}
+static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid,
+ uint64_t *buffers, unsigned int num_buffers)
+{
+ struct qbman_acquire_desc *p;
+ struct qbman_acquire_rslt *r;
+
+ if (!num_buffers || (num_buffers > 7))
+ return -EINVAL;
+
+ /* Start the management command */
+ p = qbman_swp_mc_start(s);
+
+ if (!p)
+ return -EBUSY;
+
+ /* Encode the caller-provided attributes */
+ p->bpid = bpid;
+ p->num = num_buffers;
+
+ /* Complete the management command */
+ r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE);
+ if (!r) {
+ pr_err("qbman: acquire from BPID %d failed, no response\n",
+ bpid);
+ return -EIO;
+ }
+
+ /* Decode the outcome */
+ QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
+
+ /* Determine success or failure */
+ if (r->rslt != QBMAN_MC_RSLT_OK) {
+ pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
+ bpid, r->rslt);
+ return -EIO;
+ }
+
+ QBMAN_BUG_ON(r->num > num_buffers);
+
+ /* Copy the acquired buffers to the caller's array */
+ u64_from_le32_copy(buffers, &r->buf[0], r->num);
+
+ return (int)r->num;
+}
+
+int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
+ unsigned int num_buffers)
+{
+ if (!s->stash_off)
+ return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers);
+ else
+ return qbman_swp_acquire_cinh_direct(s, bpid, buffers,
+ num_buffers);
+}
+
/*****************/
/* FQ management */
/*****************/
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
*
*/
@@ -102,6 +102,7 @@ struct qbman_swp {
uint32_t ci;
int available;
} eqcr;
+ uint8_t stash_off;
};
/* -------------------------- */
@@ -118,7 +119,9 @@ struct qbman_swp {
*/
void *qbman_swp_mc_start(struct qbman_swp *p);
void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
void *qbman_swp_mc_result(struct qbman_swp *p);
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p);
/* Wraps up submit + poll-for-result */
static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
@@ -135,6 +138,20 @@ static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
return cmd;
}
+static inline void *qbman_swp_mc_complete_cinh(struct qbman_swp *swp, void *cmd,
+ uint8_t cmd_verb)
+{
+ int loopvar = 1000;
+
+ qbman_swp_mc_submit_cinh(swp, cmd, cmd_verb);
+ do {
+ cmd = qbman_swp_mc_result_cinh(swp);
+ } while (!cmd && loopvar--);
+ QBMAN_BUG_ON(!loopvar);
+
+ return cmd;
+}
+
/* ---------------------- */
/* Descriptors/cachelines */
/* ---------------------- */
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2019 NXP
+ * Copyright 2019-2020 NXP
*/
/* qbman_sys_decl.h and qbman_sys.h are the two platform-specific files in the
* driver. They are only included via qbman_private.h, which is itself a
@@ -190,6 +190,34 @@ static inline void qbman_cinh_write(struct qbman_swp_sys *s, uint32_t offset,
#endif
}
+static inline void *qbman_cinh_write_start_wo_shadow(struct qbman_swp_sys *s,
+ uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+ pr_info("qbman_cinh_write_start(%p:%d:0x%03x)\n",
+ s->addr_cinh, s->idx, offset);
+#endif
+ QBMAN_BUG_ON(offset & 63);
+ return (s->addr_cinh + offset);
+}
+
+static inline void qbman_cinh_write_complete(struct qbman_swp_sys *s,
+ uint32_t offset, void *cmd)
+{
+ const uint32_t *shadow = cmd;
+ int loop;
+#ifdef QBMAN_CINH_TRACE
+ pr_info("qbman_cinh_write_complete(%p:%d:0x%03x) %p\n",
+ s->addr_cinh, s->idx, offset, shadow);
+ hexdump(cmd, 64);
+#endif
+ for (loop = 15; loop >= 1; loop--)
+ __raw_writel(shadow[loop], s->addr_cinh +
+ offset + loop * 4);
+ lwsync();
+ __raw_writel(shadow[0], s->addr_cinh + offset);
+}
+
static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
{
uint32_t reg = __raw_readl(s->addr_cinh + offset);
@@ -200,6 +228,35 @@ static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
return reg;
}
+static inline void *qbman_cinh_read_shadow(struct qbman_swp_sys *s,
+ uint32_t offset)
+{
+ uint32_t *shadow = (uint32_t *)(s->cena + offset);
+ unsigned int loop;
+#ifdef QBMAN_CINH_TRACE
+ pr_info(" %s (%p:%d:0x%03x) %p\n", __func__,
+ s->addr_cinh, s->idx, offset, shadow);
+#endif
+
+ for (loop = 0; loop < 16; loop++)
+ shadow[loop] = __raw_readl(s->addr_cinh + offset
+ + loop * 4);
+#ifdef QBMAN_CINH_TRACE
+ hexdump(shadow, 64);
+#endif
+ return shadow;
+}
+
+static inline void *qbman_cinh_read_wo_shadow(struct qbman_swp_sys *s,
+ uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+ pr_info("qbman_cinh_read(%p:%d:0x%03x)\n",
+ s->addr_cinh, s->idx, offset);
+#endif
+ return s->addr_cinh + offset;
+}
+
static inline void *qbman_cena_write_start(struct qbman_swp_sys *s,
uint32_t offset)
{
@@ -476,6 +533,82 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
return 0;
}
+static inline int qbman_swp_sys_update(struct qbman_swp_sys *s,
+ const struct qbman_swp_desc *d,
+ uint8_t dqrr_size,
+ int stash_off)
+{
+ uint32_t reg;
+ int i;
+ int cena_region_size = 4*1024;
+ uint8_t est = 1;
+#ifdef RTE_ARCH_64
+ uint8_t wn = CENA_WRITE_ENABLE;
+#else
+ uint8_t wn = CINH_WRITE_ENABLE;
+#endif
+
+ if (stash_off)
+ wn = CINH_WRITE_ENABLE;
+
+ QBMAN_BUG_ON(d->idx < 0);
+#ifdef QBMAN_CHECKING
+ /* We should never be asked to initialise for a portal that isn't in
+ * the power-on state. (Ie. don't forget to reset portals when they are
+ * decommissioned!)
+ */
+ reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+ QBMAN_BUG_ON(reg);
+#endif
+ if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+ && (d->cena_access_mode == qman_cena_fastest_access))
+ memset(s->addr_cena, 0, cena_region_size);
+ else {
+ /* Invalidate the portal memory.
+ * This ensures no stale cache lines
+ */
+ for (i = 0; i < cena_region_size; i += 64)
+ dccivac(s->addr_cena + i);
+ }
+
+ if (dpaa2_svr_family == SVR_LS1080A)
+ est = 0;
+
+ if (s->eqcr_mode == qman_eqcr_vb_array) {
+ reg = qbman_set_swp_cfg(dqrr_size, wn,
+ 0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+ } else {
+ if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 &&
+ (d->cena_access_mode == qman_cena_fastest_access))
+ reg = qbman_set_swp_cfg(dqrr_size, wn,
+ 1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+ else
+ reg = qbman_set_swp_cfg(dqrr_size, wn,
+ est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+ }
+
+ if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+ && (d->cena_access_mode == qman_cena_fastest_access))
+ reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+ 1 << SWP_CFG_VPM_SHIFT | /* VDQCR read triggered mode */
+ 1 << SWP_CFG_CPM_SHIFT; /* CR read triggered mode */
+
+ qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
+ reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+ if (!reg) {
+ pr_err("The portal %d is not enabled!\n", s->idx);
+ return -1;
+ }
+
+ if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+ && (d->cena_access_mode == qman_cena_fastest_access)) {
+ qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+ qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+ }
+
+ return 0;
+}
+
static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)
{
free(s->cena);