Message ID | 1369893156-3954-1-git-send-email-rajeshwari.s@samsung.com |
---|---|
State | New |
Headers | show |
On Thu, May 30, 2013 at 11:22 AM, Rajeshwari Shinde <rajeshwari.s@samsung.com> wrote: > Since SPI register access is so expensive, it is worth transferring data > a word at a time if we can. This complicates the driver unfortunately. > > Use the byte-swapping feature to avoid having to convert to/from big > endian in software. > > This change increases speed from about 2MB/s to about 4.5MB/s. > > Based on "[PATCH V2] spi: exynos: Minimise access to SPI FIFO level" > > Signed-off-by: Simon Glass <sjg@chromium.org> > Signed-off-by: Rajeshwari Shinde <rajeshwari.s@samsung.com> > --- > Changes in V2: > - Rebased on [PATCH V2] spi: exynos: Minimise access to SPI FIFO level > arch/arm/include/asm/arch-exynos/spi.h | 11 ++++- > drivers/spi/exynos_spi.c | 79 ++++++++++++++++++++++++++------ > 2 files changed, 74 insertions(+), 16 deletions(-) > > diff --git a/arch/arm/include/asm/arch-exynos/spi.h b/arch/arm/include/asm/arch-exynos/spi.h > index 7cab1e9..e67ad27 100644 > --- a/arch/arm/include/asm/arch-exynos/spi.h > +++ b/arch/arm/include/asm/arch-exynos/spi.h > @@ -34,7 +34,7 @@ struct exynos_spi { > unsigned int rx_data; /* 0x1c */ > unsigned int pkt_cnt; /* 0x20 */ > unsigned char reserved2[4]; > - unsigned char reserved3[4]; > + unsigned int swap_cfg; /* 0x28 */ > unsigned int fb_clk; /* 0x2c */ > unsigned char padding[0xffd0]; > }; > @@ -74,5 +74,14 @@ struct exynos_spi { > /* Packet Count */ > #define SPI_PACKET_CNT_EN (1 << 16) > > +/* Swap config */ > +#define SPI_TX_SWAP_EN (1 << 0) > +#define SPI_TX_BYTE_SWAP (1 << 2) > +#define SPI_TX_HWORD_SWAP (1 << 3) > +#define SPI_TX_BYTE_SWAP (1 << 2) > +#define SPI_RX_SWAP_EN (1 << 4) > +#define SPI_RX_BYTE_SWAP (1 << 6) > +#define SPI_RX_HWORD_SWAP (1 << 7) > + > #endif /* __ASSEMBLY__ */ > #endif > diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c > index bcca3d6..4dda23b 100644 > --- a/drivers/spi/exynos_spi.c > +++ b/drivers/spi/exynos_spi.c > @@ -216,12 +216,29 @@ static void spi_get_fifo_levels(struct exynos_spi *regs, > * > * @param regs SPI peripheral registers > * @param count Number of bytes to transfer > + * @param step Number of bytes to transfer in each packet (1 or 4) > */ > -static void spi_request_bytes(struct exynos_spi *regs, int count) > +static void spi_request_bytes(struct exynos_spi *regs, int count, int step) > { > + /* For word address we need to swap bytes */ > + if (step == 4) { > + setbits_le32(®s->mode_cfg, > + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); > + count /= 4; > + setbits_le32(®s->swap_cfg, SPI_TX_SWAP_EN | SPI_RX_SWAP_EN | > + SPI_TX_BYTE_SWAP | SPI_RX_BYTE_SWAP | > + SPI_TX_HWORD_SWAP | SPI_RX_HWORD_SWAP); > + } else { > + /* Select byte access and clear the swap configuration */ > + clrbits_le32(®s->mode_cfg, > + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); > + writel(0, ®s->swap_cfg); > + } > + > assert(count && count < (1 << 16)); > setbits_le32(®s->ch_cfg, SPI_CH_RST); > clrbits_le32(®s->ch_cfg, SPI_CH_RST); > + > writel(count | SPI_PACKET_CNT_EN, ®s->pkt_cnt); > } > > @@ -236,6 +253,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, > int toread; > unsigned start = get_timer(0); > int stopping; > + int step; > > out_bytes = in_bytes = todo; > > @@ -243,10 +261,19 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, > !(spi_slave->mode & SPI_SLAVE); > > /* > + * Try to transfer words if we can. This helps read performance at > + * SPI clock speeds above about 20MHz. > + */ > + step = 1; > + if (!((todo | (uintptr_t)rxp | (uintptr_t)txp) & 3) && > + !spi_slave->skip_preamble) > + step = 4; > + > + /* > * If there's something to send, do a software reset and set a > * transaction size. > */ > - spi_request_bytes(regs, todo); > + spi_request_bytes(regs, todo, step); > > /* > * Bytes are transmitted/received in pairs. Wait to receive all the > @@ -259,14 +286,26 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, > > /* Keep the fifos full/empty. */ > spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl); > + > + /* > + * Don't completely fill the txfifo, since we don't want our > + * rxfifo to overflow, and it may already contain data. > + */ > while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) { > - temp = txp ? *txp++ : 0xff; > + if (!txp) > + temp = -1; > + else if (step == 4) > + temp = *(uint32_t *)txp; > + else > + temp = *txp; > writel(temp, ®s->tx_data); > - out_bytes--; > - tx_lvl++; > + out_bytes -= step; > + if (txp) > + txp += step; > + tx_lvl += step; > } > - if (rx_lvl > 0) { > - while (rx_lvl > 0) { > + if (rx_lvl >= step) { > + while (rx_lvl >= step) { > temp = readl(®s->rx_data); > if (spi_slave->skip_preamble) { > if (temp == SPI_PREAMBLE_END_BYTE) { > @@ -274,12 +313,18 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, > stopping = 0; > } > } else { > - if (rxp || stopping) > - *rxp++ = temp; > - in_bytes--; Please see below Tag code, seems like some code style issues with checkpatch. ---------------------- Tag+ > + if (rxp || stopping) { > + if (step == 4) > + *(uint32_t *)rxp = temp; > + else > + *rxp = temp; > + rxp += step; > + } ------------------------- Tag- > + in_bytes -= step; > } > - toread--; > - rx_lvl--; > + toread -= step; > + rx_lvl -= step; > + } > } else if (!toread) { > /* > * We have run out of input data, but haven't read > @@ -291,7 +336,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, > out_bytes = in_bytes; > toread = in_bytes; > txp = NULL; > - spi_request_bytes(regs, toread); > + spi_request_bytes(regs, toread, step); > } > if (spi_slave->skip_preamble && get_timer(start) > 100) { > printf("SPI timeout: in_bytes=%d, out_bytes=%d, ", > @@ -335,10 +380,14 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, > if ((flags & SPI_XFER_BEGIN)) > spi_cs_activate(slave); > > - /* Exynos SPI limits each transfer to 65535 bytes */ > + /* > + * Exynos SPI limits each transfer to 65535 transfers. To keep > + * things simple, allow a maximum of 65532 bytes. We could allow > + * more in word mode, but the performance difference is small. > + */ > bytelen = bitlen / 8; > for (upto = 0; !ret && upto < bytelen; upto += todo) { > - todo = min(bytelen - upto, (1 << 16) - 1); > + todo = min(bytelen - upto, (1 << 16) - 4); > ret = spi_rx_tx(spi_slave, todo, &din, &dout, flags); > if (ret) > break; > -- > 1.7.4.4 > -- Thanks, Jagan.
diff --git a/arch/arm/include/asm/arch-exynos/spi.h b/arch/arm/include/asm/arch-exynos/spi.h index 7cab1e9..e67ad27 100644 --- a/arch/arm/include/asm/arch-exynos/spi.h +++ b/arch/arm/include/asm/arch-exynos/spi.h @@ -34,7 +34,7 @@ struct exynos_spi { unsigned int rx_data; /* 0x1c */ unsigned int pkt_cnt; /* 0x20 */ unsigned char reserved2[4]; - unsigned char reserved3[4]; + unsigned int swap_cfg; /* 0x28 */ unsigned int fb_clk; /* 0x2c */ unsigned char padding[0xffd0]; }; @@ -74,5 +74,14 @@ struct exynos_spi { /* Packet Count */ #define SPI_PACKET_CNT_EN (1 << 16) +/* Swap config */ +#define SPI_TX_SWAP_EN (1 << 0) +#define SPI_TX_BYTE_SWAP (1 << 2) +#define SPI_TX_HWORD_SWAP (1 << 3) +#define SPI_TX_BYTE_SWAP (1 << 2) +#define SPI_RX_SWAP_EN (1 << 4) +#define SPI_RX_BYTE_SWAP (1 << 6) +#define SPI_RX_HWORD_SWAP (1 << 7) + #endif /* __ASSEMBLY__ */ #endif diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c index bcca3d6..4dda23b 100644 --- a/drivers/spi/exynos_spi.c +++ b/drivers/spi/exynos_spi.c @@ -216,12 +216,29 @@ static void spi_get_fifo_levels(struct exynos_spi *regs, * * @param regs SPI peripheral registers * @param count Number of bytes to transfer + * @param step Number of bytes to transfer in each packet (1 or 4) */ -static void spi_request_bytes(struct exynos_spi *regs, int count) +static void spi_request_bytes(struct exynos_spi *regs, int count, int step) { + /* For word address we need to swap bytes */ + if (step == 4) { + setbits_le32(®s->mode_cfg, + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); + count /= 4; + setbits_le32(®s->swap_cfg, SPI_TX_SWAP_EN | SPI_RX_SWAP_EN | + SPI_TX_BYTE_SWAP | SPI_RX_BYTE_SWAP | + SPI_TX_HWORD_SWAP | SPI_RX_HWORD_SWAP); + } else { + /* Select byte access and clear the swap configuration */ + clrbits_le32(®s->mode_cfg, + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); + writel(0, ®s->swap_cfg); + } + assert(count && count < (1 << 16)); setbits_le32(®s->ch_cfg, SPI_CH_RST); clrbits_le32(®s->ch_cfg, SPI_CH_RST); + writel(count | SPI_PACKET_CNT_EN, ®s->pkt_cnt); } @@ -236,6 +253,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, int toread; unsigned start = get_timer(0); int stopping; + int step; out_bytes = in_bytes = todo; @@ -243,10 +261,19 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, !(spi_slave->mode & SPI_SLAVE); /* + * Try to transfer words if we can. This helps read performance at + * SPI clock speeds above about 20MHz. + */ + step = 1; + if (!((todo | (uintptr_t)rxp | (uintptr_t)txp) & 3) && + !spi_slave->skip_preamble) + step = 4; + + /* * If there's something to send, do a software reset and set a * transaction size. */ - spi_request_bytes(regs, todo); + spi_request_bytes(regs, todo, step); /* * Bytes are transmitted/received in pairs. Wait to receive all the @@ -259,14 +286,26 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, /* Keep the fifos full/empty. */ spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl); + + /* + * Don't completely fill the txfifo, since we don't want our + * rxfifo to overflow, and it may already contain data. + */ while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) { - temp = txp ? *txp++ : 0xff; + if (!txp) + temp = -1; + else if (step == 4) + temp = *(uint32_t *)txp; + else + temp = *txp; writel(temp, ®s->tx_data); - out_bytes--; - tx_lvl++; + out_bytes -= step; + if (txp) + txp += step; + tx_lvl += step; } - if (rx_lvl > 0) { - while (rx_lvl > 0) { + if (rx_lvl >= step) { + while (rx_lvl >= step) { temp = readl(®s->rx_data); if (spi_slave->skip_preamble) { if (temp == SPI_PREAMBLE_END_BYTE) { @@ -274,12 +313,18 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, stopping = 0; } } else { - if (rxp || stopping) - *rxp++ = temp; - in_bytes--; + if (rxp || stopping) { + if (step == 4) + *(uint32_t *)rxp = temp; + else + *rxp = temp; + rxp += step; + } + in_bytes -= step; } - toread--; - rx_lvl--; + toread -= step; + rx_lvl -= step; + } } else if (!toread) { /* * We have run out of input data, but haven't read @@ -291,7 +336,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, out_bytes = in_bytes; toread = in_bytes; txp = NULL; - spi_request_bytes(regs, toread); + spi_request_bytes(regs, toread, step); } if (spi_slave->skip_preamble && get_timer(start) > 100) { printf("SPI timeout: in_bytes=%d, out_bytes=%d, ", @@ -335,10 +380,14 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, if ((flags & SPI_XFER_BEGIN)) spi_cs_activate(slave); - /* Exynos SPI limits each transfer to 65535 bytes */ + /* + * Exynos SPI limits each transfer to 65535 transfers. To keep + * things simple, allow a maximum of 65532 bytes. We could allow + * more in word mode, but the performance difference is small. + */ bytelen = bitlen / 8; for (upto = 0; !ret && upto < bytelen; upto += todo) { - todo = min(bytelen - upto, (1 << 16) - 1); + todo = min(bytelen - upto, (1 << 16) - 4); ret = spi_rx_tx(spi_slave, todo, &din, &dout, flags); if (ret) break;