diff mbox series

[3/3] spi: apple: Add driver for Apple SPI controller

Message ID 20211212034726.26306-4-marcan@marcan.st
State New
Headers show
Series Apple SPI controller driver | expand

Commit Message

Hector Martin Dec. 12, 2021, 3:47 a.m. UTC
This SPI controller is present in Apple SoCs such as the M1 (t8103) and
M1 Pro/Max (t600x). It is a relatively straightforward design with two
16-entry FIFOs, arbitrary transfer sizes (up to 2**32 - 1) and fully
configurable word size up to 32 bits. It supports one hardware CS line
which can also be driven via the pinctrl/GPIO driver instead, if
desired. TX and RX can be independently enabled.

There are a surprising number of knobs for tweaking details of the
transfer, most of which we do not use right now. Hardware CS control
is available, but we haven't found a way to make it stay low across
multiple logical transfers, so we just use software CS control for now.

There is also a shared DMA offload coprocessor that can be used to handle
larger transfers without requiring an IRQ every 8-16 words, but that
feature depends on a bunch of scaffolding that isn't ready to be
upstreamed yet, so leave it for later.

The hardware shares some register bit definitions with spi-s3c24xx which
suggests it has a shared legacy with Samsung SoCs, but it is too
different to warrant sharing a driver.

Signed-off-by: Hector Martin <marcan@marcan.st>
---
 drivers/spi/Kconfig     |   8 +
 drivers/spi/Makefile    |   1 +
 drivers/spi/spi-apple.c | 566 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 575 insertions(+)
 create mode 100644 drivers/spi/spi-apple.c

Comments

Sven Peter Dec. 12, 2021, 12:39 p.m. UTC | #1
On Sun, Dec 12, 2021, at 04:47, Hector Martin wrote:
> This SPI controller is present in Apple SoCs such as the M1 (t8103) and
> M1 Pro/Max (t600x). It is a relatively straightforward design with two
> 16-entry FIFOs, arbitrary transfer sizes (up to 2**32 - 1) and fully
> configurable word size up to 32 bits. It supports one hardware CS line
> which can also be driven via the pinctrl/GPIO driver instead, if
> desired. TX and RX can be independently enabled.
>
> There are a surprising number of knobs for tweaking details of the
> transfer, most of which we do not use right now. Hardware CS control
> is available, but we haven't found a way to make it stay low across
> multiple logical transfers, so we just use software CS control for now.
>
> There is also a shared DMA offload coprocessor that can be used to handle
> larger transfers without requiring an IRQ every 8-16 words, but that
> feature depends on a bunch of scaffolding that isn't ready to be
> upstreamed yet, so leave it for later.
>
> The hardware shares some register bit definitions with spi-s3c24xx which
> suggests it has a shared legacy with Samsung SoCs, but it is too
> different to warrant sharing a driver.
>
> Signed-off-by: Hector Martin <marcan@marcan.st>
> ---

Looks good to me, I just have a few nits below.

>  drivers/spi/Kconfig     |   8 +
>  drivers/spi/Makefile    |   1 +
>  drivers/spi/spi-apple.c | 566 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 575 insertions(+)
>  create mode 100644 drivers/spi/spi-apple.c
>
> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> index b2a8821971e1..d4369c73d4ea 100644
> --- a/drivers/spi/Kconfig
> +++ b/drivers/spi/Kconfig
> @@ -79,6 +79,14 @@ config SPI_ALTERA_DFL
>  	  Altera SPI master controller.  The SPI master is connected
>  	  to a SPI slave to Avalon bridge in a Intel MAX BMC.
> 
> +config SPI_APPLE
> +	tristate "Apple SoC SPI Controller platform driver"
> +	depends on ARCH_APPLE || COMPILE_TEST
> +	help
> +	  This enables support for the SPI controller present on
> +	  many Apple SoCs, including the t8103 (M1) and t600x
> +	  (M1 Pro/Max).
> +
>  config SPI_AR934X
>  	tristate "Qualcomm Atheros AR934X/QCA95XX SPI controller driver"
>  	depends on ATH79 || COMPILE_TEST
> diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
> index dd7393a6046f..35624999d6aa 100644
> --- a/drivers/spi/Makefile
> +++ b/drivers/spi/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_SPI_LOOPBACK_TEST)		+= 
> spi-loopback-test.o
>  obj-$(CONFIG_SPI_ALTERA)		+= spi-altera-platform.o
>  obj-$(CONFIG_SPI_ALTERA_CORE)		+= spi-altera-core.o
>  obj-$(CONFIG_SPI_ALTERA_DFL)		+= spi-altera-dfl.o
> +obj-$(CONFIG_SPI_APPLE)			+= spi-apple.o
>  obj-$(CONFIG_SPI_AR934X)		+= spi-ar934x.o
>  obj-$(CONFIG_SPI_ARMADA_3700)		+= spi-armada-3700.o
>  obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
> diff --git a/drivers/spi/spi-apple.c b/drivers/spi/spi-apple.c
> new file mode 100644
> index 000000000000..67d93048bb58
> --- /dev/null
> +++ b/drivers/spi/spi-apple.c
> @@ -0,0 +1,566 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Apple SoC SPI device driver
> + *
> + * Copyright The Asahi Linux Contributors
> + *
> + * Based on spi-sifive.c, Copyright 2018 SiFive, Inc.
> + */
> +

#include <linux/bits.h> for GENMASK even though it's probably
pulled in by one of the #includes below

> +#include <linux/bitfield.h>
> +#include <linux/clk.h>
> +#include <linux/module.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/spi/spi.h>
> +
> +#define APPLE_SPI_DRIVER_NAME           "apple_spi"
> +
> +#define APPLE_SPI_CTRL			0x000
> +#define APPLE_SPI_CTRL_RUN		BIT(0)
> +#define APPLE_SPI_CTRL_TX_RESET		BIT(2)
> +#define APPLE_SPI_CTRL_RX_RESET		BIT(3)
> +
> +#define APPLE_SPI_CFG			0x004
> +#define APPLE_SPI_CFG_CPHA		BIT(1)
> +#define APPLE_SPI_CFG_CPOL		BIT(2)
> +#define APPLE_SPI_CFG_MODE		GENMASK(6, 5)
> +#define APPLE_SPI_CFG_MODE_POLLED	0
> +#define APPLE_SPI_CFG_MODE_IRQ		1
> +#define APPLE_SPI_CFG_MODE_DMA		2
> +#define APPLE_SPI_CFG_IE_RXCOMPLETE	BIT(7)
> +#define APPLE_SPI_CFG_IE_TXRXTHRESH	BIT(8)
> +#define APPLE_SPI_CFG_LSB_FIRST		BIT(13)
> +#define APPLE_SPI_CFG_WORD_SIZE		GENMASK(16, 15)
> +#define APPLE_SPI_CFG_WORD_SIZE_8B	0
> +#define APPLE_SPI_CFG_WORD_SIZE_16B	1
> +#define APPLE_SPI_CFG_WORD_SIZE_32B	2
> +#define APPLE_SPI_CFG_FIFO_THRESH	GENMASK(18, 17)
> +#define APPLE_SPI_CFG_FIFO_THRESH_8B	0
> +#define APPLE_SPI_CFG_FIFO_THRESH_4B	1
> +#define APPLE_SPI_CFG_FIFO_THRESH_1B	2
> +#define APPLE_SPI_CFG_IE_TXCOMPLETE	BIT(21)
> +
> +#define APPLE_SPI_STATUS		0x008
> +#define APPLE_SPI_STATUS_RXCOMPLETE	BIT(0)
> +#define APPLE_SPI_STATUS_TXRXTHRESH	BIT(1)
> +#define APPLE_SPI_STATUS_TXCOMPLETE	BIT(2)
> +
> +#define APPLE_SPI_PIN			0x00c
> +#define APPLE_SPI_PIN_KEEP_MOSI		BIT(0)
> +#define APPLE_SPI_PIN_CS		BIT(1)
> +
> +#define APPLE_SPI_TXDATA		0x010
> +#define APPLE_SPI_RXDATA		0x020
> +#define APPLE_SPI_CLKDIV		0x030
> +#define APPLE_SPI_CLKDIV_MAX		0x7ff
> +#define APPLE_SPI_RXCNT			0x034
> +#define APPLE_SPI_WORD_DELAY		0x038
> +#define APPLE_SPI_TXCNT			0x04c
> +
> +#define APPLE_SPI_FIFOSTAT		0x10c
> +#define APPLE_SPI_FIFOSTAT_TXFULL	BIT(4)
> +#define APPLE_SPI_FIFOSTAT_LEVEL_TX	GENMASK(15, 8)
> +#define APPLE_SPI_FIFOSTAT_RXEMPTY	BIT(20)
> +#define APPLE_SPI_FIFOSTAT_LEVEL_RX	GENMASK(31, 24)
> +
> +#define APPLE_SPI_IE_XFER		0x130
> +#define APPLE_SPI_IF_XFER		0x134
> +#define APPLE_SPI_XFER_RXCOMPLETE	BIT(0)
> +#define APPLE_SPI_XFER_TXCOMPLETE	BIT(1)
> +
> +#define APPLE_SPI_IE_FIFO		0x138
> +#define APPLE_SPI_IF_FIFO		0x13c
> +#define APPLE_SPI_FIFO_RXTHRESH		BIT(4)
> +#define APPLE_SPI_FIFO_TXTHRESH		BIT(5)
> +#define APPLE_SPI_FIFO_RXFULL		BIT(8)
> +#define APPLE_SPI_FIFO_TXEMPTY		BIT(9)
> +#define APPLE_SPI_FIFO_RXUNDERRUN	BIT(16)
> +#define APPLE_SPI_FIFO_TXOVERFLOW	BIT(17)
> +
> +#define APPLE_SPI_SHIFTCFG		0x150
> +#define APPLE_SPI_SHIFTCFG_CLK_ENABLE	BIT(0)
> +#define APPLE_SPI_SHIFTCFG_CS_ENABLE	BIT(1)
> +#define APPLE_SPI_SHIFTCFG_AND_CLK_DATA	BIT(8)
> +#define APPLE_SPI_SHIFTCFG_CS_AS_DATA	BIT(9)
> +#define APPLE_SPI_SHIFTCFG_TX_ENABLE	BIT(10)
> +#define APPLE_SPI_SHIFTCFG_RX_ENABLE	BIT(11)
> +#define APPLE_SPI_SHIFTCFG_BITS		GENMASK(21, 16)
> +#define APPLE_SPI_SHIFTCFG_OVERRIDE_CS	BIT(24)
> +
> +#define APPLE_SPI_PINCFG		0x154
> +#define APPLE_SPI_PINCFG_KEEP_CLK	BIT(0)
> +#define APPLE_SPI_PINCFG_KEEP_CS	BIT(1)
> +#define APPLE_SPI_PINCFG_KEEP_MOSI	BIT(2)
> +#define APPLE_SPI_PINCFG_CLK_IDLE_VAL	BIT(8)
> +#define APPLE_SPI_PINCFG_CS_IDLE_VAL	BIT(9)
> +#define APPLE_SPI_PINCFG_MOSI_IDLE_VAL	BIT(10)
> +
> +#define APPLE_SPI_DELAY_PRE		0x160
> +#define APPLE_SPI_DELAY_POST		0x168
> +#define APPLE_SPI_DELAY_ENABLE		BIT(0)
> +#define APPLE_SPI_DELAY_NO_INTERBYTE	BIT(1)
> +#define APPLE_SPI_DELAY_SET_SCK		BIT(4)
> +#define APPLE_SPI_DELAY_SET_MOSI	BIT(6)
> +#define APPLE_SPI_DELAY_SCK_VAL		BIT(8)
> +#define APPLE_SPI_DELAY_MOSI_VAL	BIT(12)
> +
> +#define APPLE_SPI_FIFO_DEPTH		16
> +
> +/*
> + * The slowest refclock available is 24MHz, the highest divider is 
> 0x7ff,
> + * the largest word size is 32 bits, the FIFO depth is 16, the maximum
> + * intra-word delay is 0xffff refclocks. So the maximum time a transfer
> + * cycle can take is:
> + *
> + * (0x7ff * 32 + 0xffff) * 16 / 24e6 Hz ~= 87ms
> + *
> + * Double it and round it up to 200ms for good measure.
> + */
> +#define APPLE_SPI_TIMEOUT_MS		200
> +
> +struct apple_spi {
> +	void __iomem      *regs;        /* MMIO register address */
> +	struct clk        *clk;         /* bus clock */
> +	struct completion done;         /* wake-up from interrupt */
> +};
> +
> +static inline void reg_write(struct apple_spi *spi, int offset, u32 
> value)
> +{
> +	writel_relaxed(value, spi->regs + offset);
> +}
> +
> +static inline u32 reg_read(struct apple_spi *spi, int offset)
> +{
> +	return readl_relaxed(spi->regs + offset);
> +}
> +
> +static inline void reg_mask(struct apple_spi *spi, int offset, u32 
> clear, u32 set)
> +{
> +	u32 val = reg_read(spi, offset);
> +
> +	val &= ~clear;
> +	val |= set;
> +	reg_write(spi, offset, val);
> +}
> +
> +static void apple_spi_init(struct apple_spi *spi)
> +{
> +	/* Set CS high (inactive) and disable override and auto-CS */
> +	reg_write(spi, APPLE_SPI_PIN, APPLE_SPI_PIN_CS);
> +	reg_mask(spi, APPLE_SPI_SHIFTCFG, APPLE_SPI_SHIFTCFG_OVERRIDE_CS, 0);
> +	reg_mask(spi, APPLE_SPI_PINCFG, APPLE_SPI_PINCFG_CS_IDLE_VAL, 
> APPLE_SPI_PINCFG_KEEP_CS);
> +
> +	/* Reset FIFOs */
> +	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RX_RESET | 
> APPLE_SPI_CTRL_TX_RESET);
> +
> +	/* Configure defaults */
> +	reg_write(spi, APPLE_SPI_CFG,
> +		  FIELD_PREP(APPLE_SPI_CFG_FIFO_THRESH, 
> APPLE_SPI_CFG_FIFO_THRESH_8B) |
> +		  FIELD_PREP(APPLE_SPI_CFG_MODE, APPLE_SPI_CFG_MODE_IRQ) |
> +		  FIELD_PREP(APPLE_SPI_CFG_WORD_SIZE, APPLE_SPI_CFG_WORD_SIZE_8B));
> +
> +	/* Disable IRQs */
> +	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +	reg_write(spi, APPLE_SPI_IE_XFER, 0);
> +
> +	/* Disable delays */
> +	reg_write(spi, APPLE_SPI_DELAY_PRE, 0);
> +	reg_write(spi, APPLE_SPI_DELAY_POST, 0);
> +}
> +
> +static int apple_spi_prepare_message(struct spi_controller *ctlr, 
> struct spi_message *msg)
> +{
> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +	struct spi_device *device = msg->spi;
> +
> +	u32 cfg = ((device->mode & SPI_CPHA ? APPLE_SPI_CFG_CPHA : 0) |
> +		   (device->mode & SPI_CPOL ? APPLE_SPI_CFG_CPOL : 0) |
> +		   (device->mode & SPI_LSB_FIRST ? APPLE_SPI_CFG_LSB_FIRST : 0));
> +
> +	/* Update core config */
> +	reg_mask(spi, APPLE_SPI_CFG,
> +		 APPLE_SPI_CFG_CPHA | APPLE_SPI_CFG_CPOL | APPLE_SPI_CFG_LSB_FIRST, 
> cfg);
> +
> +	return 0;
> +}
> +
> +static void apple_spi_set_cs(struct spi_device *device, bool is_high)
> +{
> +	struct apple_spi *spi = 
> spi_controller_get_devdata(device->controller);
> +
> +	reg_mask(spi, APPLE_SPI_PIN, APPLE_SPI_PIN_CS, is_high ? 
> APPLE_SPI_PIN_CS : 0);
> +}
> +
> +static bool apple_spi_prep_transfer(struct apple_spi *spi, struct 
> spi_transfer *t)
> +{
> +	u32 cr;
> +
> +	/* Calculate and program the clock rate */
> +	cr = DIV_ROUND_UP(clk_get_rate(spi->clk), t->speed_hz);
> +	reg_write(spi, APPLE_SPI_CLKDIV, min_t(u32, cr, 
> APPLE_SPI_CLKDIV_MAX));
> +
> +	/* Update bits per word */
> +	reg_mask(spi, APPLE_SPI_SHIFTCFG, APPLE_SPI_SHIFTCFG_BITS,
> +		 FIELD_PREP(APPLE_SPI_SHIFTCFG_BITS, t->bits_per_word));
> +
> +	/* We will want to poll if the time we need to wait is
> +	 * less than the context switching time.
> +	 * Let's call that threshold 5us. The operation will take:
> +	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
> +	 *    200000 * bits_per_word * fifo_threshold <= hz
> +	 */
> +	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <= 
> t->speed_hz;

Nice :-)

> +}
> +
> +static irqreturn_t apple_spi_irq(int irq, void *dev_id)
> +{
> +	struct apple_spi *spi = dev_id;
> +	u32 fifo = reg_read(spi, APPLE_SPI_IF_FIFO) & reg_read(spi, 
> APPLE_SPI_IE_FIFO);
> +	u32 xfer = reg_read(spi, APPLE_SPI_IF_XFER) & reg_read(spi, 
> APPLE_SPI_IE_XFER);
> +
> +	if (fifo || xfer) {
> +		/* Disable interrupts until next transfer */
> +		reg_write(spi, APPLE_SPI_IE_XFER, 0);
> +		reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +		complete(&spi->done);
> +		return IRQ_HANDLED;
> +	}
> +
> +	return IRQ_NONE;
> +}
> +
> +static int apple_spi_wait(struct apple_spi *spi, u32 fifo_bit, u32 
> xfer_bit, int poll)
> +{
> +	int ret = 0;
> +
> +	if (poll) {
> +		u32 fifo, xfer;
> +		unsigned long timeout = jiffies + APPLE_SPI_TIMEOUT_MS * HZ / 1000;
> +
> +		do {
> +			fifo = reg_read(spi, APPLE_SPI_IF_FIFO);
> +			xfer = reg_read(spi, APPLE_SPI_IF_XFER);
> +			if (time_after(jiffies, timeout)) {
> +				ret = -ETIMEDOUT;
> +				break;
> +			}
> +		} while (!((fifo & fifo_bit) || (xfer & xfer_bit)));
> +	} else {
> +		reinit_completion(&spi->done);
> +		reg_write(spi, APPLE_SPI_IE_XFER, xfer_bit);
> +		reg_write(spi, APPLE_SPI_IE_FIFO, fifo_bit);
> +
> +		if (!wait_for_completion_timeout(&spi->done,
> +						 msecs_to_jiffies(APPLE_SPI_TIMEOUT_MS)))
> +			ret = -ETIMEDOUT;
> +
> +		reg_write(spi, APPLE_SPI_IE_XFER, 0);
> +		reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +	}
> +
> +	return ret;
> +}
> +
> +static void apple_spi_tx(struct apple_spi *spi, const void **tx_ptr, 
> u32 *left,
> +			 unsigned int bytes_per_word)
> +{
> +	u32 inuse, words, wrote;
> +
> +	if (!*tx_ptr)
> +		return;
> +
> +	inuse = FIELD_GET(APPLE_SPI_FIFOSTAT_LEVEL_TX, reg_read(spi, 
> APPLE_SPI_FIFOSTAT));
> +	words = wrote = min_t(u32, *left, APPLE_SPI_FIFO_DEPTH - inuse);
> +
> +	if (!words)
> +		return;
> +
> +	*left -= words;
> +
> +	switch (bytes_per_word) {
> +	case 1: {
> +		const u8 *p = *tx_ptr;
> +
> +		while (words--)
> +			reg_write(spi, APPLE_SPI_TXDATA, *p++);
> +		break;
> +	}
> +	case 2: {
> +		const u16 *p = *tx_ptr;
> +
> +		while (words--)
> +			reg_write(spi, APPLE_SPI_TXDATA, *p++);
> +		break;
> +	}
> +	case 4: {
> +		const u32 *p = *tx_ptr;
> +
> +		while (words--)
> +			reg_write(spi, APPLE_SPI_TXDATA, *p++);
> +		break;
> +	}
> +	default:
> +		WARN_ON(1);
> +	}
> +
> +	*tx_ptr = ((u8 *)*tx_ptr) + bytes_per_word * wrote;
> +}
> +
> +static void apple_spi_rx(struct apple_spi *spi, void **rx_ptr, u32 
> *left,
> +			 unsigned int bytes_per_word)
> +{
> +	u32 words, read;
> +
> +	if (!*rx_ptr)
> +		return;
> +
> +	words = read = FIELD_GET(APPLE_SPI_FIFOSTAT_LEVEL_RX, reg_read(spi, 
> APPLE_SPI_FIFOSTAT));
> +	WARN_ON(words > *left);
> +
> +	if (!words)
> +		return;
> +
> +	*left -= min_t(u32, *left, words);
> +
> +	switch (bytes_per_word) {
> +	case 1: {
> +		u8 *p = *rx_ptr;
> +
> +		while (words--)
> +			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
> +		break;
> +	}
> +	case 2: {
> +		u16 *p = *rx_ptr;
> +
> +		while (words--)
> +			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
> +		break;
> +	}
> +	case 4: {
> +		u32 *p = *rx_ptr;
> +
> +		while (words--)
> +			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
> +		break;
> +	}
> +	default:
> +		WARN_ON(1);
> +	}
> +
> +	*rx_ptr = ((u8 *)*rx_ptr) + bytes_per_word * read;
> +}
> +
> +static int apple_spi_transfer_one(struct spi_controller *ctlr, struct 
> spi_device *device,
> +				  struct spi_transfer *t)
> +{
> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +	bool poll = apple_spi_prep_transfer(spi, t);
> +	const void *tx_ptr = t->tx_buf;
> +	void *rx_ptr = t->rx_buf;
> +	unsigned int bytes_per_word = t->bits_per_word > 16 ? 4 : 
> t->bits_per_word > 8 ? 2 : 1;
> +	u32 words = t->len / bytes_per_word;
> +	u32 remaining_tx = tx_ptr ? words : 0;
> +	u32 remaining_rx = rx_ptr ? words : 0;
> +	u32 xfer_flags = 0;
> +	u32 fifo_flags;
> +	int retries = 100;
> +	int ret = 0;
> +
> +	/* Reset FIFOs */
> +	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RX_RESET | 
> APPLE_SPI_CTRL_TX_RESET);
> +
> +	/* Clear IRQ flags */
> +	reg_write(spi, APPLE_SPI_IF_XFER, ~0);
> +	reg_write(spi, APPLE_SPI_IF_FIFO, ~0);
> +
> +	/* Determine transfer completion flags we wait for */
> +	if (tx_ptr)
> +		xfer_flags |= APPLE_SPI_XFER_TXCOMPLETE;
> +	if (rx_ptr)
> +		xfer_flags |= APPLE_SPI_XFER_RXCOMPLETE;
> +
> +	/* Set transfer length */
> +	reg_write(spi, APPLE_SPI_TXCNT, remaining_tx);
> +	reg_write(spi, APPLE_SPI_RXCNT, remaining_rx);
> +
> +	/* Prime transmit FIFO */
> +	apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
> +
> +	/* Start transfer */
> +	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RUN);
> +
> +	/* TX again since a few words get popped off immediately */
> +	apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
> +
> +	while (xfer_flags) {
> +		u32 fifo_flags = 0;
> +
> +		if (remaining_tx)
> +			fifo_flags |= APPLE_SPI_FIFO_TXTHRESH;
> +		if (remaining_rx)
> +			fifo_flags |= APPLE_SPI_FIFO_RXTHRESH;
> +
> +		/* Wait for anything to happen */
> +		ret = apple_spi_wait(spi, fifo_flags, xfer_flags, poll);
> +		if (ret) {
> +			dev_err(&ctlr->dev, "transfer timed out (remaining %d tx, %d rx)\n",
> +				remaining_tx, remaining_rx);
> +			goto err;
> +		}
> +
> +		/* Stop waiting on transfer halves once they complete */
> +		xfer_flags &= ~reg_read(spi, APPLE_SPI_IF_XFER);
> +
> +		/* Transmit and receive everything we can */
> +		apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
> +		apple_spi_rx(spi, &rx_ptr, &remaining_rx, bytes_per_word);
> +	}
> +
> +	/*
> +	 * Sometimes the transfer completes before the last word is in the RX 
> FIFO.
> +	 * Normally one retry is all it takes to get the last word out.
> +	 */
> +	while (remaining_rx && retries--)
> +		apple_spi_rx(spi, &rx_ptr, &remaining_rx, bytes_per_word);
> +
> +	if (remaining_tx)
> +		dev_err(&ctlr->dev, "transfer completed with %d words left to 
> transmit\n",
> +			remaining_tx);
> +	if (remaining_rx)
> +		dev_err(&ctlr->dev, "transfer completed with %d words left to 
> receive\n",
> +			remaining_rx);
> +
> +err:
> +	fifo_flags = reg_read(spi, APPLE_SPI_IF_FIFO);
> +	WARN_ON(fifo_flags & APPLE_SPI_FIFO_TXOVERFLOW);
> +	WARN_ON(fifo_flags & APPLE_SPI_FIFO_RXUNDERRUN);
> +
> +	/* Stop transfer */
> +	reg_write(spi, APPLE_SPI_CTRL, 0);
> +
> +	return ret;
> +}
> +
> +static int apple_spi_probe(struct platform_device *pdev)
> +{
> +	struct apple_spi *spi;
> +	int ret, irq;
> +	struct spi_controller *ctlr;
> +
> +	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct apple_spi));

devm_spi_alloc_master and then you can get rid of the spi_controller_put
error path.

> +	if (!ctlr) {
> +		dev_err(&pdev->dev, "out of memory\n");
> +		return -ENOMEM;
> +	}
> +
> +	spi = spi_controller_get_devdata(ctlr);
> +	init_completion(&spi->done);
> +	platform_set_drvdata(pdev, ctlr);
> +
> +	spi->regs = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(spi->regs)) {
> +		ret = PTR_ERR(spi->regs);
> +		goto put_ctlr;
> +	}
> +
> +	spi->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (IS_ERR(spi->clk)) {
> +		dev_err(&pdev->dev, "Unable to find bus clock\n");
> +		ret = PTR_ERR(spi->clk);
> +		goto put_ctlr;
> +	}

dev_err_probe can be used here in case devm_clk_get returns -EPROBE_DEFER.

> +
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0) {
> +		ret = irq;
> +		goto put_ctlr;
> +	}
> +
> +	ret = devm_request_irq(&pdev->dev, irq, apple_spi_irq, 0,
> +			       dev_name(&pdev->dev), spi);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Unable to bind to interrupt\n");
> +		goto put_ctlr;
> +	}
> +
> +	ret = clk_prepare_enable(spi->clk);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Unable to enable bus clock\n");
> +		goto put_ctlr;
> +	}

Unfortunately there's no devm_clk_prepare_enable but you could use
devm_add_action_or_reset like almost all watchdog drivers as well.

> +
> +	ctlr->dev.of_node = pdev->dev.of_node;
> +	ctlr->bus_num = pdev->id;
> +	ctlr->num_chipselect = 1;
> +	ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
> +	ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
> +	ctlr->flags = 0;
> +	ctlr->prepare_message = apple_spi_prepare_message;
> +	ctlr->set_cs = apple_spi_set_cs;
> +	ctlr->transfer_one = apple_spi_transfer_one;
> +	ctlr->auto_runtime_pm = true;
> +
> +	pm_runtime_set_active(&pdev->dev);
> +	pm_runtime_enable(&pdev->dev);

You could also use devm_pm_runtime_enable here and then everything
should be devres managed.

> +
> +	pdev->dev.dma_mask = NULL;
> +
> +	apple_spi_init(spi);
> +
> +	ret = devm_spi_register_controller(&pdev->dev, ctlr);
> +	if (ret < 0) {
> +		dev_err(&pdev->dev, "spi_register_ctlr failed\n");
> +		goto disable_pm;
> +	}
> +
> +	return 0;
> +
> +disable_pm:
> +	pm_runtime_disable(&pdev->dev);
> +	clk_disable_unprepare(spi->clk);
> +put_ctlr:
> +	spi_controller_put(ctlr);
> +
> +	return ret;
> +}
> +
> +static int apple_spi_remove(struct platform_device *pdev)
> +{
> +	struct spi_controller *ctlr = platform_get_drvdata(pdev);
> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +
> +	pm_runtime_disable(&pdev->dev);
> +
> +	/* Disable all the interrupts just in case */
> +	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +	reg_write(spi, APPLE_SPI_IE_XFER, 0);
> +
> +	clk_disable_unprepare(spi->clk);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id apple_spi_of_match[] = {
> +	{ .compatible = "apple,spi", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, apple_spi_of_match);
> +
> +static struct platform_driver apple_spi_driver = {
> +	.probe = apple_spi_probe,
> +	.remove = apple_spi_remove,
> +	.driver = {
> +		.name = APPLE_SPI_DRIVER_NAME,
> +		.of_match_table = apple_spi_of_match,
> +	},
> +};
> +module_platform_driver(apple_spi_driver);
> +
> +MODULE_AUTHOR("Hector Martin <marcan@marcan.st>");
> +MODULE_DESCRIPTION("Apple SoC SPI driver");
> +MODULE_LICENSE("GPL");
> -- 
> 2.33.0
Mark Brown Dec. 12, 2021, 11:41 p.m. UTC | #2
On Sun, Dec 12, 2021 at 12:47:26PM +0900, Hector Martin wrote:

This looks pretty good - one small issue and several stylistic nits
below.

> @@ -0,0 +1,566 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Apple SoC SPI device driver
> + *

Please keep the entire comment a C++ one so things look more
intentional.

> +#define APPLE_SPI_DRIVER_NAME           "apple_spi"

This is referenced exactly once, just inline it.

> +	/* We will want to poll if the time we need to wait is
> +	 * less than the context switching time.
> +	 * Let's call that threshold 5us. The operation will take:
> +	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
> +	 *    200000 * bits_per_word * fifo_threshold <= hz
> +	 */
> +	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <= t->speed_hz;

Some brackets or an intermediate variable wouldn't hurt here, especially
given the line length.

> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +	bool poll = apple_spi_prep_transfer(spi, t);
> +	const void *tx_ptr = t->tx_buf;
> +	void *rx_ptr = t->rx_buf;
> +	unsigned int bytes_per_word = t->bits_per_word > 16 ? 4 : t->bits_per_word > 8 ? 2 : 1;

Please don't abuse the ternery operator like this - just write normal
conditional statements, they're much easier to read.  In general the
driver is a bit too enthusiastic about them and this one is next level.

> +static int apple_spi_remove(struct platform_device *pdev)
> +{
> +	struct spi_controller *ctlr = platform_get_drvdata(pdev);
> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +
> +	pm_runtime_disable(&pdev->dev);
> +
> +	/* Disable all the interrupts just in case */
> +	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +	reg_write(spi, APPLE_SPI_IE_XFER, 0);

Since the driver registers with the SPI subsystem using devm and
remove() gets run before devm gets unwound we still potentially have
transfers running when the driver gets removed and this probably isn't
going to cause them to go well - obviously it's an edge case and it's
unclear when someone would be removing the driver but we still shouldn't
do this.

> +static const struct of_device_id apple_spi_of_match[] = {
> +	{ .compatible = "apple,spi", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, apple_spi_of_match);

This is an awfully generic compatible.  It's common to use the SoC name
for the IP compatibles when they're not otherwise identified?
Hector Martin Dec. 13, 2021, 3:32 a.m. UTC | #3
Thanks for the review!

On 12/12/2021 21.39, Sven Peter wrote:
>> +
> 
> #include <linux/bits.h> for GENMASK even though it's probably
> pulled in by one of the #includes below

Ack, fixed for v2.

>> +	/* We will want to poll if the time we need to wait is
>> +	 * less than the context switching time.
>> +	 * Let's call that threshold 5us. The operation will take:
>> +	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
>> +	 *    200000 * bits_per_word * fifo_threshold <= hz
>> +	 */
>> +	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <=
>> t->speed_hz;
> 
> Nice :-)

I stole this one from the sifive driver :-) (slightly adjusted)

>> +static int apple_spi_probe(struct platform_device *pdev)
>> +{
>> +	struct apple_spi *spi;
>> +	int ret, irq;
>> +	struct spi_controller *ctlr;
>> +
>> +	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct apple_spi));
> 
> devm_spi_alloc_master and then you can get rid of the spi_controller_put
> error path.

Ack, fixed for v2. That simplifies a bunch of the error handling.

> 
>> +	if (!ctlr) {
>> +		dev_err(&pdev->dev, "out of memory\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	spi = spi_controller_get_devdata(ctlr);
>> +	init_completion(&spi->done);
>> +	platform_set_drvdata(pdev, ctlr);
>> +
>> +	spi->regs = devm_platform_ioremap_resource(pdev, 0);
>> +	if (IS_ERR(spi->regs)) {
>> +		ret = PTR_ERR(spi->regs);
>> +		goto put_ctlr;
>> +	}
>> +
>> +	spi->clk = devm_clk_get(&pdev->dev, NULL);
>> +	if (IS_ERR(spi->clk)) {
>> +		dev_err(&pdev->dev, "Unable to find bus clock\n");
>> +		ret = PTR_ERR(spi->clk);
>> +		goto put_ctlr;
>> +	}
> 
> dev_err_probe can be used here in case devm_clk_get returns -EPROBE_DEFER.

Yup, good point. I switched most of the dev_errs to dev_err_probe.

> 
>> +
>> +	irq = platform_get_irq(pdev, 0);
>> +	if (irq < 0) {
>> +		ret = irq;
>> +		goto put_ctlr;
>> +	}
>> +
>> +	ret = devm_request_irq(&pdev->dev, irq, apple_spi_irq, 0,
>> +			       dev_name(&pdev->dev), spi);
>> +	if (ret) {
>> +		dev_err(&pdev->dev, "Unable to bind to interrupt\n");
>> +		goto put_ctlr;
>> +	}
>> +
>> +	ret = clk_prepare_enable(spi->clk);
>> +	if (ret) {
>> +		dev_err(&pdev->dev, "Unable to enable bus clock\n");
>> +		goto put_ctlr;
>> +	}
> 
> Unfortunately there's no devm_clk_prepare_enable but you could use
> devm_add_action_or_reset like almost all watchdog drivers as well.

Done.

>> +
>> +	ctlr->dev.of_node = pdev->dev.of_node;
>> +	ctlr->bus_num = pdev->id;
>> +	ctlr->num_chipselect = 1;
>> +	ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
>> +	ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
>> +	ctlr->flags = 0;
>> +	ctlr->prepare_message = apple_spi_prepare_message;
>> +	ctlr->set_cs = apple_spi_set_cs;
>> +	ctlr->transfer_one = apple_spi_transfer_one;
>> +	ctlr->auto_runtime_pm = true;
>> +
>> +	pm_runtime_set_active(&pdev->dev);
>> +	pm_runtime_enable(&pdev->dev);
> 
> You could also use devm_pm_runtime_enable here and then everything
> should be devres managed.

Done, though I still need to wrap the remove remove function in 
pm_runtime calls, since the device might be suspended when it gets called.
Hector Martin Dec. 13, 2021, 3:50 a.m. UTC | #4
Thanks for the review!

On 13/12/2021 08.41, Mark Brown wrote:
> On Sun, Dec 12, 2021 at 12:47:26PM +0900, Hector Martin wrote:
> 
> This looks pretty good - one small issue and several stylistic nits
> below.
> 
>> @@ -0,0 +1,566 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Apple SoC SPI device driver
>> + *
> 
> Please keep the entire comment a C++ one so things look more
> intentional.

I thought this pattern was pretty much the standard style.

$ grep -r -A1 "// SPDX" | grep '/\*' | wc -l
13512

$ grep -r -A1 "// SPDX" | grep -v SPDX | grep '//' | wc -l
705

>> +#define APPLE_SPI_DRIVER_NAME           "apple_spi"
> 
> This is referenced exactly once, just inline it.
Done. Also changed to "apple-spi" since all the other apple drivers use 
the dash convention.

> 
>> +	/* We will want to poll if the time we need to wait is
>> +	 * less than the context switching time.
>> +	 * Let's call that threshold 5us. The operation will take:
>> +	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
>> +	 *    200000 * bits_per_word * fifo_threshold <= hz
>> +	 */
>> +	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <= t->speed_hz;
> 
> Some brackets or an intermediate variable wouldn't hurt here, especially
> given the line length.

How about this?

return (200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2) <= 
t->speed_hz;

>> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
>> +	bool poll = apple_spi_prep_transfer(spi, t);
>> +	const void *tx_ptr = t->tx_buf;
>> +	void *rx_ptr = t->rx_buf;
>> +	unsigned int bytes_per_word = t->bits_per_word > 16 ? 4 : t->bits_per_word > 8 ? 2 : 1;
> 
> Please don't abuse the ternery operator like this - just write normal
> conditional statements, they're much easier to read.  In general the
> driver is a bit too enthusiastic about them and this one is next level.

Ack, I switched it to an if chain. That does mean I had to move the 
subsequent initializers out of the declarations section, so it's overall 
a bit more verbose.

	if (t->bits_per_word > 16)
		bytes_per_word = 4;
	else if (t->bits_per_word > 8)
		bytes_per_word = 2;
	else
		bytes_per_word = 1;

	words = t->len / bytes_per_word;
	remaining_tx = tx_ptr ? words : 0;
	remaining_rx = rx_ptr ? words : 0;

>> +static int apple_spi_remove(struct platform_device *pdev)
>> +{
>> +	struct spi_controller *ctlr = platform_get_drvdata(pdev);
>> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
>> +
>> +	pm_runtime_disable(&pdev->dev);
>> +
>> +	/* Disable all the interrupts just in case */
>> +	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
>> +	reg_write(spi, APPLE_SPI_IE_XFER, 0);
> 
> Since the driver registers with the SPI subsystem using devm and
> remove() gets run before devm gets unwound we still potentially have
> transfers running when the driver gets removed and this probably isn't
> going to cause them to go well - obviously it's an edge case and it's
> unclear when someone would be removing the driver but we still shouldn't
> do this.

With the other devm changes Sven suggested we don't need a remove 
function at all, so I've just gotten rid of it wholesale.

>> +static const struct of_device_id apple_spi_of_match[] = {
>> +	{ .compatible = "apple,spi", },
>> +	{}
>> +};
>> +MODULE_DEVICE_TABLE(of, apple_spi_of_match);
> 
> This is an awfully generic compatible.  It's common to use the SoC name
> for the IP compatibles when they're not otherwise identified?

Apple like to keep blocks compatible across SoC generations - I think 
this one dates, at least to some extent, to the original iPhone or 
thereabouts. We do use per-SoC compatibles in the DTs in case we need to 
throw in per-SoC quirks in the future ("apple,t8103-spi", "apple,spi"), 
but for drivers like this we prefer to use generic compatibles as long 
as backwards compatibility doesn't break. If Apple do something totally 
incompatible (like they did with AIC2 in the latest chips), we'll bump 
to something like apple,spi2. This happens quite rarely, so it makes 
sense to just keep things generic except for these instances. That way 
old kernels will happily bind to the block in newer SoCs if it is 
compatible.

If we had a detailed lineage of what SoCs used what blocks and when 
things changed we could try something else, like using the first SoC 
where the specific block version was introduced, but we don't... so I 
think it makes sense to just go with generic ones where we don't think 
things have changed much since the dark ages. FWIW, Apple calls this one 
spi-1,spimc and claims "spi-version = 1" and the driver has Samsung in 
the name... so the history of this block definitely goes back quite a 
ways :-)
Mark Brown Dec. 13, 2021, 3:56 p.m. UTC | #5
On Mon, Dec 13, 2021 at 12:50:49PM +0900, Hector Martin wrote:
> On 13/12/2021 08.41, Mark Brown wrote:
> > On Sun, Dec 12, 2021 at 12:47:26PM +0900, Hector Martin wrote:

> > > @@ -0,0 +1,566 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * Apple SoC SPI device driver
> > > + *

> > Please keep the entire comment a C++ one so things look more
> > intentional.

> I thought this pattern was pretty much the standard style.

It's common, especially given all the automated conversions, but ugly.

> > > +	/* We will want to poll if the time we need to wait is
> > > +	 * less than the context switching time.
> > > +	 * Let's call that threshold 5us. The operation will take:
> > > +	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
> > > +	 *    200000 * bits_per_word * fifo_threshold <= hz
> > > +	 */
> > > +	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <= t->speed_hz;

> > Some brackets or an intermediate variable wouldn't hurt here, especially
> > given the line length.

> How about this?

> return (200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2) <= t->speed_hz;

That's better but it's still a very long line which is half the issue.

> > > +static const struct of_device_id apple_spi_of_match[] = {
> > > +	{ .compatible = "apple,spi", },
> > > +	{}
> > > +};
> > > +MODULE_DEVICE_TABLE(of, apple_spi_of_match);

> > This is an awfully generic compatible.  It's common to use the SoC name
> > for the IP compatibles when they're not otherwise identified?

> Apple like to keep blocks compatible across SoC generations - I think this
> one dates, at least to some extent, to the original iPhone or thereabouts.
> We do use per-SoC compatibles in the DTs in case we need to throw in per-SoC
> quirks in the future ("apple,t8103-spi", "apple,spi"), but for drivers like
> this we prefer to use generic compatibles as long as backwards compatibility
> doesn't break. If Apple do something totally incompatible (like they did
> with AIC2 in the latest chips), we'll bump to something like apple,spi2.
> This happens quite rarely, so it makes sense to just keep things generic
> except for these instances. That way old kernels will happily bind to the
> block in newer SoCs if it is compatible.

There's currently a bit of a fashion for people with very old SPI blocks
to make incompatible new versions recently, a lot of it seems to be
driven by things like flash engine support.  Sometimes these things end
up getting instantiated together as they have different purposes and the
incompatibilties make the IPs larger.

> If we had a detailed lineage of what SoCs used what blocks and when things
> changed we could try something else, like using the first SoC where the
> specific block version was introduced, but we don't... so I think it makes
> sense to just go with generic ones where we don't think things have changed
> much since the dark ages. FWIW, Apple calls this one spi-1,spimc and claims
> "spi-version = 1" and the driver has Samsung in the name... so the history
> of this block definitely goes back quite a ways :-)

Have you done a contrast and compare with the Samsung driver?  Given
both this and your comments above about this dating back to the original
iPhone...
Hector Martin Dec. 13, 2021, 5:10 p.m. UTC | #6
On 14/12/2021 00.56, Mark Brown wrote:
> On Mon, Dec 13, 2021 at 12:50:49PM +0900, Hector Martin wrote:
>> On 13/12/2021 08.41, Mark Brown wrote:
>>> On Sun, Dec 12, 2021 at 12:47:26PM +0900, Hector Martin wrote:
> 
>>>> @@ -0,0 +1,566 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Apple SoC SPI device driver
>>>> + *
> 
>>> Please keep the entire comment a C++ one so things look more
>>> intentional.
> 
>> I thought this pattern was pretty much the standard style.
> 
> It's common, especially given all the automated conversions, but ugly.

Sure, I'll change it if you insist :)

>>> Some brackets or an intermediate variable wouldn't hurt here, especially
>>> given the line length.
> 
>> How about this?
> 
>> return (200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2) <= t->speed_hz;
> 
> That's better but it's still a very long line which is half the issue.

I think it's quite readable at this point (especially with the comment 
above explaining it anyway). Note that these days a lot of people 
consider lines up to 100 chars okay in the kernel, and checkpatch uses 
that limit. Do you have a specific change in mind?

>>>> +static const struct of_device_id apple_spi_of_match[] = {
>>>> +	{ .compatible = "apple,spi", },
>>>> +	{}
>>>> +};
>>>> +MODULE_DEVICE_TABLE(of, apple_spi_of_match);
> 
>>> This is an awfully generic compatible.  It's common to use the SoC name
>>> for the IP compatibles when they're not otherwise identified?
> 
>> Apple like to keep blocks compatible across SoC generations - I think this
>> one dates, at least to some extent, to the original iPhone or thereabouts.
>> We do use per-SoC compatibles in the DTs in case we need to throw in per-SoC
>> quirks in the future ("apple,t8103-spi", "apple,spi"), but for drivers like
>> this we prefer to use generic compatibles as long as backwards compatibility
>> doesn't break. If Apple do something totally incompatible (like they did
>> with AIC2 in the latest chips), we'll bump to something like apple,spi2.
>> This happens quite rarely, so it makes sense to just keep things generic
>> except for these instances. That way old kernels will happily bind to the
>> block in newer SoCs if it is compatible.
> 
> There's currently a bit of a fashion for people with very old SPI blocks
> to make incompatible new versions recently, a lot of it seems to be
> driven by things like flash engine support.  Sometimes these things end
> up getting instantiated together as they have different purposes and the
> incompatibilties make the IPs larger.

I think if they haven't changed it by now they probably won't; e.g. they 
tacked on DMA using a coprocessor instead of changing the block itself. 
I don't think Apple uses SPI for anything performance-critical. They 
don't even bother with QSPI for the NOR flash (which is mostly only used 
for bootloaders and variable storage).

>> If we had a detailed lineage of what SoCs used what blocks and when things
>> changed we could try something else, like using the first SoC where the
>> specific block version was introduced, but we don't... so I think it makes
>> sense to just go with generic ones where we don't think things have changed
>> much since the dark ages. FWIW, Apple calls this one spi-1,spimc and claims
>> "spi-version = 1" and the driver has Samsung in the name... so the history
>> of this block definitely goes back quite a ways :-)
> 
> Have you done a contrast and compare with the Samsung driver?  Given
> both this and your comments above about this dating back to the original
> iPhone...

You mean the *two* Samsung drivers? :-)

It seems Samsung like to keep making up incompatible SPI blocks. This 
one shares a *few* bits in a *couple* registers with spi-s3c24xx driver, 
which point to a common lineage, but those registers aren't even at the 
same addresses. Not enough in common for it to make sense to try to use 
one driver for both (unlike with UART, where it was close enough to be 
added as a new Samsung UART variant, or I2C, where we could refactor the 
pasemi driver to add a platform backend alongside the existing PCI 
support and mostly use it as-is).
Mark Brown Dec. 13, 2021, 5:54 p.m. UTC | #7
On Tue, Dec 14, 2021 at 02:10:26AM +0900, Hector Martin wrote:
> On 14/12/2021 00.56, Mark Brown wrote:
> > On Mon, Dec 13, 2021 at 12:50:49PM +0900, Hector Martin wrote:

> > > > Some brackets or an intermediate variable wouldn't hurt here, especially
> > > > given the line length.

> > > How about this?

> > > return (200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2) <= t->speed_hz;

> > That's better but it's still a very long line which is half the issue.

> I think it's quite readable at this point (especially with the comment above
> explaining it anyway). Note that these days a lot of people consider lines
> up to 100 chars okay in the kernel, and checkpatch uses that limit. Do you
> have a specific change in mind?

The 100 characters is a "don't send silly checkpatch fixes" thing not a
target to aim for (see also the ternery operator stuff).  Like I said an
intermediate variable wouldn't hurt, for example for the FIFO trigger
level into a fifo_trigger variable.

> > There's currently a bit of a fashion for people with very old SPI blocks
> > to make incompatible new versions recently, a lot of it seems to be
> > driven by things like flash engine support.  Sometimes these things end
> > up getting instantiated together as they have different purposes and the
> > incompatibilties make the IPs larger.

> I think if they haven't changed it by now they probably won't; e.g. they
> tacked on DMA using a coprocessor instead of changing the block itself. I
> don't think Apple uses SPI for anything performance-critical. They don't
> even bother with QSPI for the NOR flash (which is mostly only used for
> bootloaders and variable storage).

This feels like tempting fate but I guess...

> > Have you done a contrast and compare with the Samsung driver?  Given
> > both this and your comments above about this dating back to the original
> > iPhone...

> You mean the *two* Samsung drivers? :-)

> It seems Samsung like to keep making up incompatible SPI blocks. This one
> shares a *few* bits in a *couple* registers with spi-s3c24xx driver, which
> point to a common lineage, but those registers aren't even at the same
> addresses. Not enough in common for it to make sense to try to use one
> driver for both (unlike with UART, where it was close enough to be added as
> a new Samsung UART variant, or I2C, where we could refactor the pasemi
> driver to add a platform backend alongside the existing PCI support and
> mostly use it as-is).

Their older SPI block has quite a few issues IIRC, I think DMA was the
big difference between the two but ICBW.
Hector Martin Dec. 18, 2021, 4:35 a.m. UTC | #8
On 14/12/2021 08.42, Andy Shevchenko wrote:
> On Sunday, December 12, 2021, Hector Martin <marcan@marcan.st
> <mailto:marcan@marcan.st>> wrote:
> 
>     This SPI controller is present in Apple SoCs such as the M1 (t8103) and
>     M1 Pro/Max (t600x). It is a relatively straightforward design with two
>     16-entry FIFOs, arbitrary transfer sizes (up to 2**32 - 1) and fully
>     configurable word size up to 32 bits. It supports one hardware CS line
>     which can also be driven via the pinctrl/GPIO driver instead, if
>     desired. TX and RX can be independently enabled.
> 
>     There are a surprising number of knobs for tweaking details of the
>     transfer, most of which we do not use right now. Hardware CS control
>     is available, but we haven't found a way to make it stay low across
>     multiple logical transfers, so we just use software CS control for now.
> 
> 
> So, AFAIU there is no limitation to the software CS lines (you actually
> meant GPIO, right?). 

No, this is software control over a single built-in CS line that is part
of the SPI peripheral. You can of course use the GPIO mechanism too,
which has no limit on the number of CS lines. That said, I don't think
Apple uses more than one CS per controller on any current machine, so we
just use internal CS.

>     +struct apple_spi {
>     +       void __iomem      *regs;        /* MMIO register address */
>     +       struct clk        *clk;         /* bus clock */
>     +       struct completion done;         /* wake-up from interrupt */
> 
> 
> Making it first member may save a few cycles on pointer arithmetic 

The completion? The IRQ handler has to access regs more often than the
completion, so it sounds like the current order should be faster.

>     +static int apple_spi_wait(struct apple_spi *spi, u32 fifo_bit, u32
>     xfer_bit, int poll)
>     +{
>     +       int ret = 0;
>     +
>     +       if (poll) {
>     +               u32 fifo, xfer;
>     +               unsigned long timeout = jiffies +
>     APPLE_SPI_TIMEOUT_MS * HZ / 1000;
>     +
>     +               do {
>     +                       fifo = reg_read(spi, APPLE_SPI_IF_FIFO);
>     +                       xfer = reg_read(spi, APPLE_SPI_IF_XFER);
>     +                       if (time_after(jiffies, timeout)) {
>     +                               ret = -ETIMEDOUT;
>     +                               break;
>     +                       }
>     +               } while (!((fifo & fifo_bit) || (xfer & xfer_bit)));
> 
> 
> You may use read_poll_timeout() with a specific _read() function, but it
> ma be not worth doing that, just compare and choose the best.

Probably not worth it; I could have a function read both registers and
stuff it into a u64, but I think it'd end up being about the same amount
of code in the end with the extra scaffolding.

>     +       case 4: {
>     +               const u32 *p = *tx_ptr;
>     +
>     +               while (words--)
>     +                       reg_write(spi, APPLE_SPI_TXDATA, *p++);
>     +               break;
>     +       }
>     +       default:
>     +               WARN_ON(1);
>     +       }
>     +
>     +       *tx_ptr = ((u8 *)*tx_ptr) + bytes_per_word * wrote;
> 
> 
> Not sure if it’s good written code from endianness / alignment handling
> perspective (while it may still work), perhaps rewrite it a bit?

I'm not entirely sure what the alignment guarantees for SPI buffers are.
Some drivers use unaligned accessors (e.g. spi-uniphier.c), while others
don't (e.g. spi-xilinx.c). That makes me think they're aligned in the
general case (and they usually would be if drivers intend to use them in
16-bit or 32-bit mode; hopefully they're allocated as arrays of those
units in that case).

spi.h has this to say:

 * In-memory data values are always in native CPU byte order, translated
 * from the wire byte order (big-endian except with SPI_LSB_FIRST).  So
 * for example when bits_per_word is sixteen, buffers are 2N bytes long
 * (@len = 2N) and hold N sixteen bit words in CPU byte order.

So endianness should be correct, at least for power of two word sizes. I
also believe it should work for non-POT word sizes, and assuming packing
doesn't change in SPI_LSB_FIRST, also for that case. It's kind of hard
to properly validate this without a real device that uses these modes,
so I think at this point we can just go with the current logic and if we
run into a problem in the future, we can fix it then :)

>     +       ctlr = spi_alloc_master(&pdev->dev, sizeof(struct apple_spi));
>     +       if (!ctlr) {
>     +               dev_err(&pdev->dev, "out of memory\n");
>     +               return -ENOMEM;
> 
> 
> It’s fine to use dev_err_probe() here as well

Yeah, I switched everything to dev_err_probe()

>     +       pm_runtime_set_active(&pdev->dev);
>     +       pm_runtime_enable(&pdev->dev);
>     +
>     +       pdev->dev.dma_mask = NULL;
> 
> 
> Why do you need this? It won’t work anyway in SPI case.

This was cargo-culted from spi-sifive and I noticed it actually causes a
warning to be printed on rebinding the driver to the same device;
already got rid of it :)
Lukas Wunner Jan. 1, 2022, 7:25 a.m. UTC | #9
On Sun, Dec 12, 2021 at 12:47:26PM +0900, Hector Martin wrote:
> +	ret = devm_spi_register_controller(&pdev->dev, ctlr);
[...]
> +static int apple_spi_remove(struct platform_device *pdev)
> +{
> +	struct spi_controller *ctlr = platform_get_drvdata(pdev);
> +	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
> +
> +	pm_runtime_disable(&pdev->dev);
> +
> +	/* Disable all the interrupts just in case */
> +	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
> +	reg_write(spi, APPLE_SPI_IE_XFER, 0);
> +
> +	clk_disable_unprepare(spi->clk);

You need to use spi_register_controller() in apple_spi_probe()
(*not* the devm variant) and invoke spi_unregister_controller()
first thing in apple_spi_remove().

Otherwise you've got an ordering issue on driver unbind:
__device_release_driver() first calls the ->remove hook and only
then devres_release_all().  You're disabling the clock and interrupts
in the ->remove hook, rendering the controller unusable.  Yet the
expectation is that until spi_unregister_controller() returns,
the controller works and its slaves are accessible.

This is especially important if there are slaves attached to the
controller which perform SPI transfers in their ->remove hooks,
e.g. to quiesce interrupts on the slaves.  Those transfers won't
work the way you've structured the code now.

spi-sifive.c, from which you've derived this, is broken.  As are
a couple dozen other drivers.  I've fixed some of them,
but haven't gotten around to fixing them all.  Just trying to
prevent more brokenness from entering the codebase.

Thanks,

Lukas
Mark Brown Jan. 4, 2022, 12:58 p.m. UTC | #10
On Sat, Jan 01, 2022 at 08:25:48AM +0100, Lukas Wunner wrote:

> This is especially important if there are slaves attached to the
> controller which perform SPI transfers in their ->remove hooks,
> e.g. to quiesce interrupts on the slaves.  Those transfers won't
> work the way you've structured the code now.

The client drivers shouldn't notice - their remove callbacks will have
completed before we start removing the controller.
diff mbox series

Patch

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b2a8821971e1..d4369c73d4ea 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -79,6 +79,14 @@  config SPI_ALTERA_DFL
 	  Altera SPI master controller.  The SPI master is connected
 	  to a SPI slave to Avalon bridge in a Intel MAX BMC.
 
+config SPI_APPLE
+	tristate "Apple SoC SPI Controller platform driver"
+	depends on ARCH_APPLE || COMPILE_TEST
+	help
+	  This enables support for the SPI controller present on
+	  many Apple SoCs, including the t8103 (M1) and t600x
+	  (M1 Pro/Max).
+
 config SPI_AR934X
 	tristate "Qualcomm Atheros AR934X/QCA95XX SPI controller driver"
 	depends on ATH79 || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index dd7393a6046f..35624999d6aa 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -17,6 +17,7 @@  obj-$(CONFIG_SPI_LOOPBACK_TEST)		+= spi-loopback-test.o
 obj-$(CONFIG_SPI_ALTERA)		+= spi-altera-platform.o
 obj-$(CONFIG_SPI_ALTERA_CORE)		+= spi-altera-core.o
 obj-$(CONFIG_SPI_ALTERA_DFL)		+= spi-altera-dfl.o
+obj-$(CONFIG_SPI_APPLE)			+= spi-apple.o
 obj-$(CONFIG_SPI_AR934X)		+= spi-ar934x.o
 obj-$(CONFIG_SPI_ARMADA_3700)		+= spi-armada-3700.o
 obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
diff --git a/drivers/spi/spi-apple.c b/drivers/spi/spi-apple.c
new file mode 100644
index 000000000000..67d93048bb58
--- /dev/null
+++ b/drivers/spi/spi-apple.c
@@ -0,0 +1,566 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Apple SoC SPI device driver
+ *
+ * Copyright The Asahi Linux Contributors
+ *
+ * Based on spi-sifive.c, Copyright 2018 SiFive, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+
+#define APPLE_SPI_DRIVER_NAME           "apple_spi"
+
+#define APPLE_SPI_CTRL			0x000
+#define APPLE_SPI_CTRL_RUN		BIT(0)
+#define APPLE_SPI_CTRL_TX_RESET		BIT(2)
+#define APPLE_SPI_CTRL_RX_RESET		BIT(3)
+
+#define APPLE_SPI_CFG			0x004
+#define APPLE_SPI_CFG_CPHA		BIT(1)
+#define APPLE_SPI_CFG_CPOL		BIT(2)
+#define APPLE_SPI_CFG_MODE		GENMASK(6, 5)
+#define APPLE_SPI_CFG_MODE_POLLED	0
+#define APPLE_SPI_CFG_MODE_IRQ		1
+#define APPLE_SPI_CFG_MODE_DMA		2
+#define APPLE_SPI_CFG_IE_RXCOMPLETE	BIT(7)
+#define APPLE_SPI_CFG_IE_TXRXTHRESH	BIT(8)
+#define APPLE_SPI_CFG_LSB_FIRST		BIT(13)
+#define APPLE_SPI_CFG_WORD_SIZE		GENMASK(16, 15)
+#define APPLE_SPI_CFG_WORD_SIZE_8B	0
+#define APPLE_SPI_CFG_WORD_SIZE_16B	1
+#define APPLE_SPI_CFG_WORD_SIZE_32B	2
+#define APPLE_SPI_CFG_FIFO_THRESH	GENMASK(18, 17)
+#define APPLE_SPI_CFG_FIFO_THRESH_8B	0
+#define APPLE_SPI_CFG_FIFO_THRESH_4B	1
+#define APPLE_SPI_CFG_FIFO_THRESH_1B	2
+#define APPLE_SPI_CFG_IE_TXCOMPLETE	BIT(21)
+
+#define APPLE_SPI_STATUS		0x008
+#define APPLE_SPI_STATUS_RXCOMPLETE	BIT(0)
+#define APPLE_SPI_STATUS_TXRXTHRESH	BIT(1)
+#define APPLE_SPI_STATUS_TXCOMPLETE	BIT(2)
+
+#define APPLE_SPI_PIN			0x00c
+#define APPLE_SPI_PIN_KEEP_MOSI		BIT(0)
+#define APPLE_SPI_PIN_CS		BIT(1)
+
+#define APPLE_SPI_TXDATA		0x010
+#define APPLE_SPI_RXDATA		0x020
+#define APPLE_SPI_CLKDIV		0x030
+#define APPLE_SPI_CLKDIV_MAX		0x7ff
+#define APPLE_SPI_RXCNT			0x034
+#define APPLE_SPI_WORD_DELAY		0x038
+#define APPLE_SPI_TXCNT			0x04c
+
+#define APPLE_SPI_FIFOSTAT		0x10c
+#define APPLE_SPI_FIFOSTAT_TXFULL	BIT(4)
+#define APPLE_SPI_FIFOSTAT_LEVEL_TX	GENMASK(15, 8)
+#define APPLE_SPI_FIFOSTAT_RXEMPTY	BIT(20)
+#define APPLE_SPI_FIFOSTAT_LEVEL_RX	GENMASK(31, 24)
+
+#define APPLE_SPI_IE_XFER		0x130
+#define APPLE_SPI_IF_XFER		0x134
+#define APPLE_SPI_XFER_RXCOMPLETE	BIT(0)
+#define APPLE_SPI_XFER_TXCOMPLETE	BIT(1)
+
+#define APPLE_SPI_IE_FIFO		0x138
+#define APPLE_SPI_IF_FIFO		0x13c
+#define APPLE_SPI_FIFO_RXTHRESH		BIT(4)
+#define APPLE_SPI_FIFO_TXTHRESH		BIT(5)
+#define APPLE_SPI_FIFO_RXFULL		BIT(8)
+#define APPLE_SPI_FIFO_TXEMPTY		BIT(9)
+#define APPLE_SPI_FIFO_RXUNDERRUN	BIT(16)
+#define APPLE_SPI_FIFO_TXOVERFLOW	BIT(17)
+
+#define APPLE_SPI_SHIFTCFG		0x150
+#define APPLE_SPI_SHIFTCFG_CLK_ENABLE	BIT(0)
+#define APPLE_SPI_SHIFTCFG_CS_ENABLE	BIT(1)
+#define APPLE_SPI_SHIFTCFG_AND_CLK_DATA	BIT(8)
+#define APPLE_SPI_SHIFTCFG_CS_AS_DATA	BIT(9)
+#define APPLE_SPI_SHIFTCFG_TX_ENABLE	BIT(10)
+#define APPLE_SPI_SHIFTCFG_RX_ENABLE	BIT(11)
+#define APPLE_SPI_SHIFTCFG_BITS		GENMASK(21, 16)
+#define APPLE_SPI_SHIFTCFG_OVERRIDE_CS	BIT(24)
+
+#define APPLE_SPI_PINCFG		0x154
+#define APPLE_SPI_PINCFG_KEEP_CLK	BIT(0)
+#define APPLE_SPI_PINCFG_KEEP_CS	BIT(1)
+#define APPLE_SPI_PINCFG_KEEP_MOSI	BIT(2)
+#define APPLE_SPI_PINCFG_CLK_IDLE_VAL	BIT(8)
+#define APPLE_SPI_PINCFG_CS_IDLE_VAL	BIT(9)
+#define APPLE_SPI_PINCFG_MOSI_IDLE_VAL	BIT(10)
+
+#define APPLE_SPI_DELAY_PRE		0x160
+#define APPLE_SPI_DELAY_POST		0x168
+#define APPLE_SPI_DELAY_ENABLE		BIT(0)
+#define APPLE_SPI_DELAY_NO_INTERBYTE	BIT(1)
+#define APPLE_SPI_DELAY_SET_SCK		BIT(4)
+#define APPLE_SPI_DELAY_SET_MOSI	BIT(6)
+#define APPLE_SPI_DELAY_SCK_VAL		BIT(8)
+#define APPLE_SPI_DELAY_MOSI_VAL	BIT(12)
+
+#define APPLE_SPI_FIFO_DEPTH		16
+
+/*
+ * The slowest refclock available is 24MHz, the highest divider is 0x7ff,
+ * the largest word size is 32 bits, the FIFO depth is 16, the maximum
+ * intra-word delay is 0xffff refclocks. So the maximum time a transfer
+ * cycle can take is:
+ *
+ * (0x7ff * 32 + 0xffff) * 16 / 24e6 Hz ~= 87ms
+ *
+ * Double it and round it up to 200ms for good measure.
+ */
+#define APPLE_SPI_TIMEOUT_MS		200
+
+struct apple_spi {
+	void __iomem      *regs;        /* MMIO register address */
+	struct clk        *clk;         /* bus clock */
+	struct completion done;         /* wake-up from interrupt */
+};
+
+static inline void reg_write(struct apple_spi *spi, int offset, u32 value)
+{
+	writel_relaxed(value, spi->regs + offset);
+}
+
+static inline u32 reg_read(struct apple_spi *spi, int offset)
+{
+	return readl_relaxed(spi->regs + offset);
+}
+
+static inline void reg_mask(struct apple_spi *spi, int offset, u32 clear, u32 set)
+{
+	u32 val = reg_read(spi, offset);
+
+	val &= ~clear;
+	val |= set;
+	reg_write(spi, offset, val);
+}
+
+static void apple_spi_init(struct apple_spi *spi)
+{
+	/* Set CS high (inactive) and disable override and auto-CS */
+	reg_write(spi, APPLE_SPI_PIN, APPLE_SPI_PIN_CS);
+	reg_mask(spi, APPLE_SPI_SHIFTCFG, APPLE_SPI_SHIFTCFG_OVERRIDE_CS, 0);
+	reg_mask(spi, APPLE_SPI_PINCFG, APPLE_SPI_PINCFG_CS_IDLE_VAL, APPLE_SPI_PINCFG_KEEP_CS);
+
+	/* Reset FIFOs */
+	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RX_RESET | APPLE_SPI_CTRL_TX_RESET);
+
+	/* Configure defaults */
+	reg_write(spi, APPLE_SPI_CFG,
+		  FIELD_PREP(APPLE_SPI_CFG_FIFO_THRESH, APPLE_SPI_CFG_FIFO_THRESH_8B) |
+		  FIELD_PREP(APPLE_SPI_CFG_MODE, APPLE_SPI_CFG_MODE_IRQ) |
+		  FIELD_PREP(APPLE_SPI_CFG_WORD_SIZE, APPLE_SPI_CFG_WORD_SIZE_8B));
+
+	/* Disable IRQs */
+	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
+	reg_write(spi, APPLE_SPI_IE_XFER, 0);
+
+	/* Disable delays */
+	reg_write(spi, APPLE_SPI_DELAY_PRE, 0);
+	reg_write(spi, APPLE_SPI_DELAY_POST, 0);
+}
+
+static int apple_spi_prepare_message(struct spi_controller *ctlr, struct spi_message *msg)
+{
+	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
+	struct spi_device *device = msg->spi;
+
+	u32 cfg = ((device->mode & SPI_CPHA ? APPLE_SPI_CFG_CPHA : 0) |
+		   (device->mode & SPI_CPOL ? APPLE_SPI_CFG_CPOL : 0) |
+		   (device->mode & SPI_LSB_FIRST ? APPLE_SPI_CFG_LSB_FIRST : 0));
+
+	/* Update core config */
+	reg_mask(spi, APPLE_SPI_CFG,
+		 APPLE_SPI_CFG_CPHA | APPLE_SPI_CFG_CPOL | APPLE_SPI_CFG_LSB_FIRST, cfg);
+
+	return 0;
+}
+
+static void apple_spi_set_cs(struct spi_device *device, bool is_high)
+{
+	struct apple_spi *spi = spi_controller_get_devdata(device->controller);
+
+	reg_mask(spi, APPLE_SPI_PIN, APPLE_SPI_PIN_CS, is_high ? APPLE_SPI_PIN_CS : 0);
+}
+
+static bool apple_spi_prep_transfer(struct apple_spi *spi, struct spi_transfer *t)
+{
+	u32 cr;
+
+	/* Calculate and program the clock rate */
+	cr = DIV_ROUND_UP(clk_get_rate(spi->clk), t->speed_hz);
+	reg_write(spi, APPLE_SPI_CLKDIV, min_t(u32, cr, APPLE_SPI_CLKDIV_MAX));
+
+	/* Update bits per word */
+	reg_mask(spi, APPLE_SPI_SHIFTCFG, APPLE_SPI_SHIFTCFG_BITS,
+		 FIELD_PREP(APPLE_SPI_SHIFTCFG_BITS, t->bits_per_word));
+
+	/* We will want to poll if the time we need to wait is
+	 * less than the context switching time.
+	 * Let's call that threshold 5us. The operation will take:
+	 *    bits_per_word * fifo_threshold / hz <= 5 * 10^-6
+	 *    200000 * bits_per_word * fifo_threshold <= hz
+	 */
+	return 200000 * t->bits_per_word * APPLE_SPI_FIFO_DEPTH / 2 <= t->speed_hz;
+}
+
+static irqreturn_t apple_spi_irq(int irq, void *dev_id)
+{
+	struct apple_spi *spi = dev_id;
+	u32 fifo = reg_read(spi, APPLE_SPI_IF_FIFO) & reg_read(spi, APPLE_SPI_IE_FIFO);
+	u32 xfer = reg_read(spi, APPLE_SPI_IF_XFER) & reg_read(spi, APPLE_SPI_IE_XFER);
+
+	if (fifo || xfer) {
+		/* Disable interrupts until next transfer */
+		reg_write(spi, APPLE_SPI_IE_XFER, 0);
+		reg_write(spi, APPLE_SPI_IE_FIFO, 0);
+		complete(&spi->done);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int apple_spi_wait(struct apple_spi *spi, u32 fifo_bit, u32 xfer_bit, int poll)
+{
+	int ret = 0;
+
+	if (poll) {
+		u32 fifo, xfer;
+		unsigned long timeout = jiffies + APPLE_SPI_TIMEOUT_MS * HZ / 1000;
+
+		do {
+			fifo = reg_read(spi, APPLE_SPI_IF_FIFO);
+			xfer = reg_read(spi, APPLE_SPI_IF_XFER);
+			if (time_after(jiffies, timeout)) {
+				ret = -ETIMEDOUT;
+				break;
+			}
+		} while (!((fifo & fifo_bit) || (xfer & xfer_bit)));
+	} else {
+		reinit_completion(&spi->done);
+		reg_write(spi, APPLE_SPI_IE_XFER, xfer_bit);
+		reg_write(spi, APPLE_SPI_IE_FIFO, fifo_bit);
+
+		if (!wait_for_completion_timeout(&spi->done,
+						 msecs_to_jiffies(APPLE_SPI_TIMEOUT_MS)))
+			ret = -ETIMEDOUT;
+
+		reg_write(spi, APPLE_SPI_IE_XFER, 0);
+		reg_write(spi, APPLE_SPI_IE_FIFO, 0);
+	}
+
+	return ret;
+}
+
+static void apple_spi_tx(struct apple_spi *spi, const void **tx_ptr, u32 *left,
+			 unsigned int bytes_per_word)
+{
+	u32 inuse, words, wrote;
+
+	if (!*tx_ptr)
+		return;
+
+	inuse = FIELD_GET(APPLE_SPI_FIFOSTAT_LEVEL_TX, reg_read(spi, APPLE_SPI_FIFOSTAT));
+	words = wrote = min_t(u32, *left, APPLE_SPI_FIFO_DEPTH - inuse);
+
+	if (!words)
+		return;
+
+	*left -= words;
+
+	switch (bytes_per_word) {
+	case 1: {
+		const u8 *p = *tx_ptr;
+
+		while (words--)
+			reg_write(spi, APPLE_SPI_TXDATA, *p++);
+		break;
+	}
+	case 2: {
+		const u16 *p = *tx_ptr;
+
+		while (words--)
+			reg_write(spi, APPLE_SPI_TXDATA, *p++);
+		break;
+	}
+	case 4: {
+		const u32 *p = *tx_ptr;
+
+		while (words--)
+			reg_write(spi, APPLE_SPI_TXDATA, *p++);
+		break;
+	}
+	default:
+		WARN_ON(1);
+	}
+
+	*tx_ptr = ((u8 *)*tx_ptr) + bytes_per_word * wrote;
+}
+
+static void apple_spi_rx(struct apple_spi *spi, void **rx_ptr, u32 *left,
+			 unsigned int bytes_per_word)
+{
+	u32 words, read;
+
+	if (!*rx_ptr)
+		return;
+
+	words = read = FIELD_GET(APPLE_SPI_FIFOSTAT_LEVEL_RX, reg_read(spi, APPLE_SPI_FIFOSTAT));
+	WARN_ON(words > *left);
+
+	if (!words)
+		return;
+
+	*left -= min_t(u32, *left, words);
+
+	switch (bytes_per_word) {
+	case 1: {
+		u8 *p = *rx_ptr;
+
+		while (words--)
+			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
+		break;
+	}
+	case 2: {
+		u16 *p = *rx_ptr;
+
+		while (words--)
+			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
+		break;
+	}
+	case 4: {
+		u32 *p = *rx_ptr;
+
+		while (words--)
+			*p++ = reg_read(spi, APPLE_SPI_RXDATA);
+		break;
+	}
+	default:
+		WARN_ON(1);
+	}
+
+	*rx_ptr = ((u8 *)*rx_ptr) + bytes_per_word * read;
+}
+
+static int apple_spi_transfer_one(struct spi_controller *ctlr, struct spi_device *device,
+				  struct spi_transfer *t)
+{
+	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
+	bool poll = apple_spi_prep_transfer(spi, t);
+	const void *tx_ptr = t->tx_buf;
+	void *rx_ptr = t->rx_buf;
+	unsigned int bytes_per_word = t->bits_per_word > 16 ? 4 : t->bits_per_word > 8 ? 2 : 1;
+	u32 words = t->len / bytes_per_word;
+	u32 remaining_tx = tx_ptr ? words : 0;
+	u32 remaining_rx = rx_ptr ? words : 0;
+	u32 xfer_flags = 0;
+	u32 fifo_flags;
+	int retries = 100;
+	int ret = 0;
+
+	/* Reset FIFOs */
+	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RX_RESET | APPLE_SPI_CTRL_TX_RESET);
+
+	/* Clear IRQ flags */
+	reg_write(spi, APPLE_SPI_IF_XFER, ~0);
+	reg_write(spi, APPLE_SPI_IF_FIFO, ~0);
+
+	/* Determine transfer completion flags we wait for */
+	if (tx_ptr)
+		xfer_flags |= APPLE_SPI_XFER_TXCOMPLETE;
+	if (rx_ptr)
+		xfer_flags |= APPLE_SPI_XFER_RXCOMPLETE;
+
+	/* Set transfer length */
+	reg_write(spi, APPLE_SPI_TXCNT, remaining_tx);
+	reg_write(spi, APPLE_SPI_RXCNT, remaining_rx);
+
+	/* Prime transmit FIFO */
+	apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
+
+	/* Start transfer */
+	reg_write(spi, APPLE_SPI_CTRL, APPLE_SPI_CTRL_RUN);
+
+	/* TX again since a few words get popped off immediately */
+	apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
+
+	while (xfer_flags) {
+		u32 fifo_flags = 0;
+
+		if (remaining_tx)
+			fifo_flags |= APPLE_SPI_FIFO_TXTHRESH;
+		if (remaining_rx)
+			fifo_flags |= APPLE_SPI_FIFO_RXTHRESH;
+
+		/* Wait for anything to happen */
+		ret = apple_spi_wait(spi, fifo_flags, xfer_flags, poll);
+		if (ret) {
+			dev_err(&ctlr->dev, "transfer timed out (remaining %d tx, %d rx)\n",
+				remaining_tx, remaining_rx);
+			goto err;
+		}
+
+		/* Stop waiting on transfer halves once they complete */
+		xfer_flags &= ~reg_read(spi, APPLE_SPI_IF_XFER);
+
+		/* Transmit and receive everything we can */
+		apple_spi_tx(spi, &tx_ptr, &remaining_tx, bytes_per_word);
+		apple_spi_rx(spi, &rx_ptr, &remaining_rx, bytes_per_word);
+	}
+
+	/*
+	 * Sometimes the transfer completes before the last word is in the RX FIFO.
+	 * Normally one retry is all it takes to get the last word out.
+	 */
+	while (remaining_rx && retries--)
+		apple_spi_rx(spi, &rx_ptr, &remaining_rx, bytes_per_word);
+
+	if (remaining_tx)
+		dev_err(&ctlr->dev, "transfer completed with %d words left to transmit\n",
+			remaining_tx);
+	if (remaining_rx)
+		dev_err(&ctlr->dev, "transfer completed with %d words left to receive\n",
+			remaining_rx);
+
+err:
+	fifo_flags = reg_read(spi, APPLE_SPI_IF_FIFO);
+	WARN_ON(fifo_flags & APPLE_SPI_FIFO_TXOVERFLOW);
+	WARN_ON(fifo_flags & APPLE_SPI_FIFO_RXUNDERRUN);
+
+	/* Stop transfer */
+	reg_write(spi, APPLE_SPI_CTRL, 0);
+
+	return ret;
+}
+
+static int apple_spi_probe(struct platform_device *pdev)
+{
+	struct apple_spi *spi;
+	int ret, irq;
+	struct spi_controller *ctlr;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct apple_spi));
+	if (!ctlr) {
+		dev_err(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	spi = spi_controller_get_devdata(ctlr);
+	init_completion(&spi->done);
+	platform_set_drvdata(pdev, ctlr);
+
+	spi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(spi->regs)) {
+		ret = PTR_ERR(spi->regs);
+		goto put_ctlr;
+	}
+
+	spi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(spi->clk)) {
+		dev_err(&pdev->dev, "Unable to find bus clock\n");
+		ret = PTR_ERR(spi->clk);
+		goto put_ctlr;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto put_ctlr;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, apple_spi_irq, 0,
+			       dev_name(&pdev->dev), spi);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to bind to interrupt\n");
+		goto put_ctlr;
+	}
+
+	ret = clk_prepare_enable(spi->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable bus clock\n");
+		goto put_ctlr;
+	}
+
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->bus_num = pdev->id;
+	ctlr->num_chipselect = 1;
+	ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
+	ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
+	ctlr->flags = 0;
+	ctlr->prepare_message = apple_spi_prepare_message;
+	ctlr->set_cs = apple_spi_set_cs;
+	ctlr->transfer_one = apple_spi_transfer_one;
+	ctlr->auto_runtime_pm = true;
+
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	pdev->dev.dma_mask = NULL;
+
+	apple_spi_init(spi);
+
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "spi_register_ctlr failed\n");
+		goto disable_pm;
+	}
+
+	return 0;
+
+disable_pm:
+	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(spi->clk);
+put_ctlr:
+	spi_controller_put(ctlr);
+
+	return ret;
+}
+
+static int apple_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct apple_spi *spi = spi_controller_get_devdata(ctlr);
+
+	pm_runtime_disable(&pdev->dev);
+
+	/* Disable all the interrupts just in case */
+	reg_write(spi, APPLE_SPI_IE_FIFO, 0);
+	reg_write(spi, APPLE_SPI_IE_XFER, 0);
+
+	clk_disable_unprepare(spi->clk);
+
+	return 0;
+}
+
+static const struct of_device_id apple_spi_of_match[] = {
+	{ .compatible = "apple,spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, apple_spi_of_match);
+
+static struct platform_driver apple_spi_driver = {
+	.probe = apple_spi_probe,
+	.remove = apple_spi_remove,
+	.driver = {
+		.name = APPLE_SPI_DRIVER_NAME,
+		.of_match_table = apple_spi_of_match,
+	},
+};
+module_platform_driver(apple_spi_driver);
+
+MODULE_AUTHOR("Hector Martin <marcan@marcan.st>");
+MODULE_DESCRIPTION("Apple SoC SPI driver");
+MODULE_LICENSE("GPL");