diff mbox series

[v3,31/37] mtd: nand: denali: fix raw and oob accessors for syndrome page layout

Message ID 1490861708-27813-1-git-send-email-yamada.masahiro@socionext.com
State New
Headers show
Series None | expand

Commit Message

Masahiro Yamada March 30, 2017, 8:15 a.m. UTC
The Denali IP adopts the syndrome page layout; payload and ECC are
interleaved, with BBM area always placed at the beginning of OOB.

The figure below shows the page organization for ecc->steps == 2:

  |----------------|    |-----------|
  |                |    |           |
  |                |    |           |
  |    Payload0    |    |           |
  |                |    |           |
  |                |    |           |
  |                |    |           |
  |----------------|    |  in-band  |
  |      ECC0      |    |   area    |
  |----------------|    |           |
  |                |    |           |
  |                |    |           |
  |    Payload1    |    |           |
  |                |    |           |
  |                |    |           |
  |----------------|    |-----------|
  |      BBM       |    |           |
  |----------------|    |           |
  |Payload1 (cont.)|    |           |
  |----------------|    |out-of-band|
  |      ECC1      |    |    area   |
  |----------------|    |           |
  |    OOB free    |    |           |
  |----------------|    |-----------|

The current raw / oob accessors do not take that into consideration,
so in-band and out-of-band data are transferred as stored in the
device.  In the case above,

  in-band:      Payload0 + ECC0 + Payload1(partial)
  out-of-band:  BBM + Payload1(cont.) + ECC1 + OOB-free

This is wrong.  As the comment block of struct nand_ecc_ctrl says,
driver callbacks must hide the specific layout used by the hardware
and always return contiguous in-band and out-of-band data.

The current implementation is completely screwed-up, so read/write
callbacks must be re-worked.

Also, PIO transfer has been supported in case DMA may not work for
some reasons.  Actually, the Data DMA may not be equipped depending
on the configuration of the RTL.  This can be checked by reading the
bit 4 of the FEATURES register.  Even if the controller has the DMA
support, dma_set_mask() and dma_map_single() could fail.  In either
case, the driver can fall back to the PIO transfer.  Slower access
would be better than giving up.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

---

Changes in v3: None
Changes in v2:
  - Newly added

 drivers/mtd/nand/denali.c | 603 +++++++++++++++++++++++++++++-----------------
 drivers/mtd/nand/denali.h |   1 +
 2 files changed, 386 insertions(+), 218 deletions(-)

-- 
2.7.4

Comments

Boris Brezillon March 30, 2017, 4:30 p.m. UTC | #1
On Thu, 30 Mar 2017 17:15:03 +0900
Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

> Recent versions of this IP support automatic erased page detection.

> If an erased page is detected on reads, the controller does not set

> INTR__ECC_UNCOR_ERR, but INTR__ERASED_PAGE.  If this feature is

> supported, the driver can use this information instead of calling

> nand_check_erased_ecc_chunk().

> 

> The detection of erased page is based on the number of zeros in the

> page; if the number of zeros is less than the value in the field

> ERASED_THRESHOLD, the page is assumed as erased.

> 

> Set the ERASED_THRESHOLD to (chip->ecc.strength + 1).  This is the

> worst case where all the bitflips come from the same ECC sector.

> This field is Reserved for older IP versions, so this commit has

> no impact on them.


Do you have a way to know the actual number of bitflips in an erased
ECC block? BTW, is the threshold a per-page information or a per ECC
block information.

If you can't know the real number of bitflips I don't think it's safe
to set the threshold to chip->ecc.strength + 1.

You can still use the feature to detect erased pages without any
bitflips (set ERASED_THRESHOLD to 1), which should be the case most of
the time, but for cases where you have bitflips I'd recommend using
nand_check_erased_ecc_chunk() if you can't know the actual number of
bitflips in the page.

> 

> One thing worth mentioning is the driver still needs to fill the

> buffer with 0xff in the case because the ECC correction engine has

> already manipulated the data in the buffer.

> 

> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

> ---

> 

> Changes in v3: None

> Changes in v2:

>   - Newly added

> 

>  drivers/mtd/nand/denali.c | 10 +++++++++-

>  drivers/mtd/nand/denali.h |  5 +++++

>  2 files changed, 14 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c

> index 51e8a9a..9ee0f30 100644

> --- a/drivers/mtd/nand/denali.c

> +++ b/drivers/mtd/nand/denali.c

> @@ -604,6 +604,9 @@ static int denali_pio_read(struct denali_nand_info *denali, void *buf,

>  	if (!(irq_status & INTR__PAGE_XFER_INC))

>  		return -EIO;

>  

> +	if (irq_status & INTR__ERASED_PAGE)

> +		memset(buf, 0xff, size);

> +

>  	return irq_status & ecc_err_mask ? -EBADMSG : 0;

>  }

>  

> @@ -676,6 +679,9 @@ static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,

>  	denali_enable_dma(denali, false);

>  	dma_sync_single_for_cpu(denali->dev, dma_addr, size, dir);

>  

> +	if (irq_status & INTR__ERASED_PAGE)

> +		memset(buf, 0xff, size);

> +

>  	return ret;

>  }

>  

> @@ -1475,7 +1481,9 @@ int denali_init(struct denali_nand_info *denali)

>  	chip->ecc.bytes = denali_calc_ecc_bytes(chip->ecc.size,

>  						chip->ecc.strength);

>  

> -	iowrite32(chip->ecc.strength, denali->flash_reg + ECC_CORRECTION);

> +	iowrite32(MAKE_ECC_CORRECTION(chip->ecc.strength,

> +				      chip->ecc.strength + 1),

> +		  denali->flash_reg + ECC_CORRECTION);

>  	iowrite32(mtd->erasesize / mtd->writesize,

>  		  denali->flash_reg + PAGES_PER_BLOCK);

>  	iowrite32(denali->nand.options & NAND_BUSWIDTH_16 ? 1 : 0,

> diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h

> index f92b9db..b5ea8d7 100644

> --- a/drivers/mtd/nand/denali.h

> +++ b/drivers/mtd/nand/denali.h

> @@ -110,6 +110,10 @@

>  

>  #define ECC_CORRECTION				0x1b0

>  #define     ECC_CORRECTION__VALUE			GENMASK(4, 0)

> +#define     ECC_CORRECTION__ERASE_THRESHOLD		GENMASK(31, 16)

> +#define     MAKE_ECC_CORRECTION(val, thresh)		\

> +			(((val) & (ECC_CORRECTION__VALUE)) | \

> +			(((thresh) << 16) & (ECC_CORRECTION__ERASE_THRESHOLD)))

>  

>  #define READ_MODE				0x1c0

>  #define     READ_MODE__VALUE				GENMASK(3, 0)

> @@ -233,6 +237,7 @@

>  #define     INTR__RST_COMP				BIT(13)

>  #define     INTR__PIPE_CMD_ERR				BIT(14)

>  #define     INTR__PAGE_XFER_INC				BIT(15)

> +#define     INTR__ERASED_PAGE				BIT(16)

>  

>  #define PAGE_CNT(bank)				(0x430 + (bank) * 0x50)

>  #define ERR_PAGE_ADDR(bank)			(0x440 + (bank) * 0x50)
Masahiro Yamada April 7, 2017, 6:49 a.m. UTC | #2
Hi Leonard,

2017-04-06 23:08 GMT+09:00 Leonard Crestez <leonard.crestez@nxp.com>:
> On Thu, Mar 30, 2017 at 11:15 AM, Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

>>

>> Some NAND controllers are using DMA engine requiring a specific

>> buffer alignment.  The core provides no guarantee on the nand_buffers

>> pointers, which forces some drivers to allocate their own buffers

>> and pass the NAND_OWN_BUFFERS flag.

>>

>> Rework the nand_buffers allocation logic to allocate each buffer

>> independently.  This should make most NAND controllers/DMA engine

>> happy, and allow us to get rid of these custom buf allocation in

>> NAND controller drivers.

>>

>> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

>

>> @@ -4914,8 +4930,12 @@ void nand_cleanup(struct nand_chip *chip)

>> >         /* Free bad block table memory */

>>         kfree(chip->bbt);

>> -       if (!(chip->options & NAND_OWN_BUFFERS))

>> +       if (!(chip->options & NAND_OWN_BUFFERS)) {

>> +               kfree(chip->buffers->databuf);

>> +               kfree(chip->buffers->ecccode);

>> +               kfree(chip->buffers->ecccalc);

>>                 kfree(chip->buffers);

>> +       }

>

> It seems that chip->buffers might not be allocated at this point, for

> example if nand_cleanup is called during a failed probe. You should

> check if (chip->buffers != NULL) before freeing stuff inside it.


You are right.

The failure path in NAND drivers is messy.   :-(
nand_cleanup() may be called before nand_scan_tail()
finishes successfully...

I will send a fixup patch.  Thanks!



-- 
Best Regards
Masahiro Yamada
Boris Brezillon April 9, 2017, 7:33 a.m. UTC | #3
On Fri, 7 Apr 2017 15:49:23 +0900
Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

> Hi Leonard,

> 

> 2017-04-06 23:08 GMT+09:00 Leonard Crestez <leonard.crestez@nxp.com>:

> > On Thu, Mar 30, 2017 at 11:15 AM, Masahiro Yamada <yamada.masahiro@socionext.com> wrote:  

> >>

> >> Some NAND controllers are using DMA engine requiring a specific

> >> buffer alignment.  The core provides no guarantee on the nand_buffers

> >> pointers, which forces some drivers to allocate their own buffers

> >> and pass the NAND_OWN_BUFFERS flag.

> >>

> >> Rework the nand_buffers allocation logic to allocate each buffer

> >> independently.  This should make most NAND controllers/DMA engine

> >> happy, and allow us to get rid of these custom buf allocation in

> >> NAND controller drivers.

> >>

> >> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>  

> >  

> >> @@ -4914,8 +4930,12 @@ void nand_cleanup(struct nand_chip *chip)  

> >> >         /* Free bad block table memory */  

> >>         kfree(chip->bbt);

> >> -       if (!(chip->options & NAND_OWN_BUFFERS))

> >> +       if (!(chip->options & NAND_OWN_BUFFERS)) {

> >> +               kfree(chip->buffers->databuf);

> >> +               kfree(chip->buffers->ecccode);

> >> +               kfree(chip->buffers->ecccalc);

> >>                 kfree(chip->buffers);

> >> +       }  

> >

> > It seems that chip->buffers might not be allocated at this point, for

> > example if nand_cleanup is called during a failed probe. You should

> > check if (chip->buffers != NULL) before freeing stuff inside it.  

> 

> You are right.

> 

> The failure path in NAND drivers is messy.   :-(


Totally agree, and that's partly because of the complex/non-trivial
NAND APIs :-/.

> nand_cleanup() may be called before nand_scan_tail()

> finishes successfully...


Actually, I think the real bug is in the GPMI driver which is not using
nand_release() appropriately. nand_release() is supposed to be called
on a registered NAND device, so it's wrong to call it before
mtd_register() has been called and returned 0.

Note that nand_cleanup() can only be called after nand_scan_tail() has
returned 0 (which unfortunately is not obvious :-/).

I still plan to take Masahiro's fixup patch because the more
precautions we take the better it is, but I still think the real bug is
in the GPMI driver.

One last comment: a bug still exists in the GPMI driver when
nand_scan_ident() fails after NAND buffers allocation because it never
sets chip->buffers back to NULL (see [1]).

[1]http://lxr.free-electrons.com/source/drivers/mtd/nand/nand_base.c#L4834
Boris Brezillon April 9, 2017, 2:17 p.m. UTC | #4
On Thu, 30 Mar 2017 17:15:04 +0900
Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

> Some NAND controllers are using DMA engine requiring a specific

> buffer alignment.  The core provides no guarantee on the nand_buffers

> pointers, which forces some drivers to allocate their own buffers

> and pass the NAND_OWN_BUFFERS flag.

> 

> Rework the nand_buffers allocation logic to allocate each buffer

> independently.  This should make most NAND controllers/DMA engine

> happy, and allow us to get rid of these custom buf allocation in

> NAND controller drivers.

> 

> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

> ---

> 

> Changes in v3:

>   - Reword git-log

> 

> Changes in v2:

>   - Newly added

> 

>  drivers/mtd/nand/nand_base.c | 34 +++++++++++++++++++++++++++-------

>  1 file changed, 27 insertions(+), 7 deletions(-)

> 

> diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c

> index f828ad7..e9d3195 100644

> --- a/drivers/mtd/nand/nand_base.c

> +++ b/drivers/mtd/nand/nand_base.c

> @@ -4613,13 +4613,25 @@ int nand_scan_tail(struct mtd_info *mtd)

>  	}

>  

>  	if (!(chip->options & NAND_OWN_BUFFERS)) {

> -		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize

> -				+ mtd->oobsize * 3, GFP_KERNEL);

> +		nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);

>  		if (!nbuf)

>  			return -ENOMEM;

> -		nbuf->ecccalc = (uint8_t *)(nbuf + 1);

> -		nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;

> -		nbuf->databuf = nbuf->ecccode + mtd->oobsize;

> +		nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);

> +		if (!nbuf->ecccalc) {

> +			ret = -EINVAL;

> +			goto err_free;


You have a memory leak here, because chip->buffers = nbuf is only done
after all allocations have succeeded.

> +		}

> +		nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL);

> +		if (!nbuf->ecccode) {

> +			ret = -EINVAL;


			ret = -ENOMEM;

I have the following fixup patch, let me know if you're okay with it
and I'll squash it in the original commit.

Thanks,

Boris

--->8---
From 7903e4c997da101bc0f15016936116c4bb9db78c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>

Date: Sun, 9 Apr 2017 16:14:36 +0200
Subject: [PATCH] fixup! mtd: nand: allocate aligned buffers if
 NAND_OWN_BUFFERS is unset

---
 drivers/mtd/nand/nand_base.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

-- 
2.7.4diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 23a415d1f124..ed49a1d634b0 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4501,7 +4501,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
-	struct nand_buffers *nbuf;
+	struct nand_buffers *nbuf = NULL;
 	int ret;
 
 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
@@ -4518,20 +4518,23 @@ int nand_scan_tail(struct mtd_info *mtd)
 		nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);
 		if (!nbuf)
 			return -ENOMEM;
+
 		nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);
 		if (!nbuf->ecccalc) {
-			ret = -EINVAL;
+			ret = -ENOMEM;
 			goto err_free;
 		}
+
 		nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL);
 		if (!nbuf->ecccode) {
-			ret = -EINVAL;
+			ret = -ENOMEM;
 			goto err_free;
 		}
+
 		nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize,
 					GFP_KERNEL);
 		if (!nbuf->databuf) {
-			ret = -EINVAL;
+			ret = -ENOMEM;
 			goto err_free;
 		}
 
@@ -4773,11 +4776,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 	/* Build bad block table */
 	return chip->scan_bbt(mtd);
 err_free:
-	if (!(chip->options & NAND_OWN_BUFFERS)) {
-		kfree(chip->buffers->databuf);
-		kfree(chip->buffers->ecccode);
-		kfree(chip->buffers->ecccalc);
-		kfree(chip->buffers);
+	if (nbuf) {
+		kfree(nbuf->databuf);
+		kfree(nbuf->ecccode);
+		kfree(nbuf->ecccalc);
+		kfree(nbuf);
 	}
 	return ret;
 }
@@ -4829,7 +4832,7 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free bad block table memory */
 	kfree(chip->bbt);
-	if (!(chip->options & NAND_OWN_BUFFERS)) {
+	if (!(chip->options & NAND_OWN_BUFFERS) && chip->buffers) {
 		kfree(chip->buffers->databuf);
 		kfree(chip->buffers->ecccode);
 		kfree(chip->buffers->ecccalc);

Masahiro Yamada April 10, 2017, 12:20 a.m. UTC | #5
Hi Boris,


2017-04-09 23:17 GMT+09:00 Boris Brezillon <boris.brezillon@free-electrons.com>:
> On Thu, 30 Mar 2017 17:15:04 +0900

> Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

>

>> Some NAND controllers are using DMA engine requiring a specific

>> buffer alignment.  The core provides no guarantee on the nand_buffers

>> pointers, which forces some drivers to allocate their own buffers

>> and pass the NAND_OWN_BUFFERS flag.

>>

>> Rework the nand_buffers allocation logic to allocate each buffer

>> independently.  This should make most NAND controllers/DMA engine

>> happy, and allow us to get rid of these custom buf allocation in

>> NAND controller drivers.

>>

>> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

>> ---

>>

>> Changes in v3:

>>   - Reword git-log

>>

>> Changes in v2:

>>   - Newly added

>>

>>  drivers/mtd/nand/nand_base.c | 34 +++++++++++++++++++++++++++-------

>>  1 file changed, 27 insertions(+), 7 deletions(-)

>>

>> diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c

>> index f828ad7..e9d3195 100644

>> --- a/drivers/mtd/nand/nand_base.c

>> +++ b/drivers/mtd/nand/nand_base.c

>> @@ -4613,13 +4613,25 @@ int nand_scan_tail(struct mtd_info *mtd)

>>       }

>>

>>       if (!(chip->options & NAND_OWN_BUFFERS)) {

>> -             nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize

>> -                             + mtd->oobsize * 3, GFP_KERNEL);

>> +             nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);

>>               if (!nbuf)

>>                       return -ENOMEM;

>> -             nbuf->ecccalc = (uint8_t *)(nbuf + 1);

>> -             nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;

>> -             nbuf->databuf = nbuf->ecccode + mtd->oobsize;

>> +             nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);

>> +             if (!nbuf->ecccalc) {

>> +                     ret = -EINVAL;

>> +                     goto err_free;

>

> You have a memory leak here, because chip->buffers = nbuf is only done

> after all allocations have succeeded.



Indeed.


>> +             }

>> +             nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL);

>> +             if (!nbuf->ecccode) {

>> +                     ret = -EINVAL;

>

>                         ret = -ENOMEM;

>

> I have the following fixup patch, let me know if you're okay with it

> and I'll squash it in the original commit.



Thank you for your fixup patch.  The code-diff looks all good.
Please squash this.

Sorry for my many mistakes.







-- 
Best Regards
Masahiro Yamada
Masahiro Yamada June 6, 2017, 2:04 a.m. UTC | #6
Hi Boris,


2017-03-31 1:30 GMT+09:00 Boris Brezillon <boris.brezillon@free-electrons.com>:
> On Thu, 30 Mar 2017 17:15:03 +0900

> Masahiro Yamada <yamada.masahiro@socionext.com> wrote:

>

>> Recent versions of this IP support automatic erased page detection.

>> If an erased page is detected on reads, the controller does not set

>> INTR__ECC_UNCOR_ERR, but INTR__ERASED_PAGE.  If this feature is

>> supported, the driver can use this information instead of calling

>> nand_check_erased_ecc_chunk().

>>

>> The detection of erased page is based on the number of zeros in the

>> page; if the number of zeros is less than the value in the field

>> ERASED_THRESHOLD, the page is assumed as erased.

>>

>> Set the ERASED_THRESHOLD to (chip->ecc.strength + 1).  This is the

>> worst case where all the bitflips come from the same ECC sector.

>> This field is Reserved for older IP versions, so this commit has

>> no impact on them.

>

> Do you have a way to know the actual number of bitflips in an erased

> ECC block?


There is no way to the actual number of bitflips
except that the driver parses the whole buffer counting bitflips.


> BTW, is the threshold a per-page information or a per ECC

> block information.


Per-page.

Honestly, this hardware feature is not nice
because per-chunk threshold makes sense .




> If you can't know the real number of bitflips I don't think it's safe

> to set the threshold to chip->ecc.strength + 1.


Right.

> You can still use the feature to detect erased pages without any

> bitflips (set ERASED_THRESHOLD to 1), which should be the case most of

> the time, but for cases where you have bitflips I'd recommend using

> nand_check_erased_ecc_chunk() if you can't know the actual number of

> bitflips in the page.

>


You are right.


Probably, this feature can work safely
only when ERASED_THRESHOLD == 1.


I decided to drop this patch from v4 for now,
but I will consider it.


-- 
Best Regards
Masahiro Yamada
diff mbox series

Patch

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 6843cfc..51e8a9a 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -246,6 +246,53 @@  static void denali_write_byte(struct mtd_info *mtd, uint8_t byte)
 	index_addr(denali, MODE_11 | BANK(denali->flash_bank) | 2, byte);
 }
 
+static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len; i++)
+		buf[i] = ioread32(denali->flash_mem + 0x10);
+}
+
+static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len; i++)
+		iowrite32(buf[i], denali->flash_mem + 0x10);
+}
+
+static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint16_t *buf16 = (uint16_t *)buf;
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len / 2; i++)
+		buf16[i] = ioread32(denali->flash_mem + 0x10);
+}
+
+static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
+			       int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	const uint16_t *buf16 = (const uint16_t *)buf;
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len / 2; i++)
+		iowrite32(buf16[i], denali->flash_mem + 0x10);
+}
+
 static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
@@ -275,44 +322,6 @@  static int denali_dev_ready(struct mtd_info *mtd)
 	return !!(denali_check_irq(denali) & INTR__INT_ACT);
 }
 
-/*
- * sends a pipeline command operation to the controller. See the Denali NAND
- * controller's user guide for more information (section 4.2.3.6).
- */
-static int denali_send_pipeline_cmd(struct denali_nand_info *denali, int page,
-				    bool ecc_en, bool transfer_spare,
-				    int access_type, int write)
-{
-	int status = PASS;
-	uint32_t addr, cmd;
-
-	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
-
-	denali_reset_irq(denali);
-
-	addr = BANK(denali->flash_bank) | page;
-
-	if (write && access_type != SPARE_ACCESS) {
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else if (write && access_type == SPARE_ACCESS) {
-		/* read spare area */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else {
-		/* setup page read request for access type */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	}
-	return status;
-}
-
 /* helper function that simply writes a buffer to the flash */
 static int write_data_to_flash_mem(struct denali_nand_info *denali,
 				   const uint8_t *buf, int len)
@@ -355,66 +364,6 @@  static int read_data_from_flash_mem(struct denali_nand_info *denali,
 	return i * 4; /* intent is to return the number of bytes read */
 }
 
-/* writes OOB data to the device */
-static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
-	int status = 0;
-
-	if (denali_send_pipeline_cmd(denali, page, false, false, SPARE_ACCESS,
-							1) == PASS) {
-		write_data_to_flash_mem(denali, buf, mtd->oobsize);
-
-		/* wait for operation to complete */
-		irq_status = denali_wait_for_irq(denali, irq_mask);
-
-		if (!(irq_status & INTR__PROGRAM_COMP)) {
-			dev_err(denali->dev, "OOB write failed\n");
-			status = -EIO;
-		}
-	} else {
-		dev_err(denali->dev, "unable to send pipeline command\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-/* reads OOB data from the device */
-static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_mask = INTR__LOAD_COMP;
-	uint32_t irq_status, addr, cmd;
-
-	if (denali_send_pipeline_cmd(denali, page, false, true, SPARE_ACCESS,
-							0) == PASS) {
-		read_data_from_flash_mem(denali, buf, mtd->oobsize);
-
-		/*
-		 * wait for command to be accepted
-		 * can always use status0 bit as the
-		 * mask is identical for each bank.
-		 */
-		irq_status = denali_wait_for_irq(denali, irq_mask);
-
-		if (!(irq_status & INTR__LOAD_COMP))
-			dev_err(denali->dev, "page on OOB timeout %d\n", page);
-
-		/*
-		 * We set the device back to MAIN_ACCESS here as I observed
-		 * instability with the controller if you do a block erase
-		 * and the last transaction was a SPARE_ACCESS. Block erase
-		 * is reliable (according to the MTD test infrastructure)
-		 * if you are in MAIN_ACCESS.
-		 */
-		addr = BANK(denali->flash_bank) | page;
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, MAIN_ACCESS);
-	}
-}
-
 static int denali_check_erased_page(struct mtd_info *mtd,
 				    struct nand_chip *chip, uint8_t *buf,
 				    unsigned long uncor_ecc_flags,
@@ -630,144 +579,292 @@  static void denali_setup_dma(struct denali_nand_info *denali,
 		denali_setup_dma32(denali, dma_addr, page, write);
 }
 
-/*
- * writes a page. user specifies type, and this function handles the
- * configuration details.
- */
-static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int page, bool raw_xfer)
+static int denali_pio_read(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
-	int ret = 0;
+	uint32_t addr = BANK(denali->flash_bank) | page;
+	uint32_t irq_status, ecc_err_mask;
 
-	/*
-	 * if it is a raw xfer, we want to disable ecc and send the spare area.
-	 * !raw_xfer - enable ecc
-	 * raw_xfer - transfer spare
-	 */
-	setup_ecc_for_xfer(denali, !raw_xfer, raw_xfer);
+	/* setup page read request for access type */
+	index_addr(denali, MODE_10 | addr,
+		   raw ? MAIN_SPARE_ACCESS : MAIN_ACCESS);
 
-	/* copy buffer into DMA buffer */
-	memcpy(denali->buf, buf, mtd->writesize);
+	iowrite32(MODE_01 | addr, denali->flash_mem);
 
-	if (raw_xfer) {
-		/* transfer the data to the spare area */
-		memcpy(denali->buf + mtd->writesize,
-			chip->oob_poi,
-			mtd->oobsize);
-	}
+	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	else
+		ecc_err_mask = INTR__ECC_ERR;
+
+	denali_reset_irq(denali);
+
+	read_data_from_flash_mem(denali, buf, size);
+
+	irq_status = denali_wait_for_irq(denali, INTR__PAGE_XFER_INC);
+	if (!(irq_status & INTR__PAGE_XFER_INC))
+		return -EIO;
+
+	return irq_status & ecc_err_mask ? -EBADMSG : 0;
+}
 
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_TO_DEVICE);
+static int denali_pio_write(struct denali_nand_info *denali,
+			    const void *buf, size_t size, int page, int raw)
+{
+	uint32_t addr = BANK(denali->flash_bank) | page;
+	uint32_t irq_status;
+
+	/* setup page read request for access type */
+	index_addr(denali, MODE_10 | addr,
+		   raw ? MAIN_SPARE_ACCESS : MAIN_ACCESS);
+
+	iowrite32(MODE_01 | addr, denali->flash_mem);
 
 	denali_reset_irq(denali);
+
+	write_data_to_flash_mem(denali, buf, size);
+
+	irq_status = denali_wait_for_irq(denali,
+				INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL);
+	if (!(irq_status & INTR__PROGRAM_COMP))
+		return -EIO;
+
+	return 0;
+}
+
+static int denali_pio_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
+{
+	if (write)
+		return denali_pio_write(denali, buf, size, page, raw);
+	else
+		return denali_pio_read(denali, buf, size, page, raw);
+}
+
+static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
+{
+	dma_addr_t dma_addr = denali->dma_addr;
+	uint32_t irq_mask, irq_status, ecc_err_mask;
+	enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	int ret = 0;
+
+	dma_sync_single_for_device(denali->dev, dma_addr, size, dir);
+
+	if (write) {
+		irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
+		ecc_err_mask = 0;
+	} else if (denali->caps & DENALI_CAP_HW_ECC_FIXUP) {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	} else {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_ERR;
+	}
+
 	denali_enable_dma(denali, true);
 
-	denali_setup_dma(denali, addr, page, 1);
+	denali_reset_irq(denali);
+	denali_setup_dma(denali, dma_addr, page, write);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
-	if (!(irq_status & INTR__DMA_CMD_COMP)) {
-		dev_err(denali->dev, "timeout on write_page (type = %d)\n",
-			raw_xfer);
+	if (!(irq_status & INTR__DMA_CMD_COMP))
 		ret = -EIO;
-	}
+	else if (irq_status & ecc_err_mask)
+		ret = -EBADMSG;
 
 	denali_enable_dma(denali, false);
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(denali->dev, dma_addr, size, dir);
 
 	return ret;
 }
 
-/* NAND core entry points */
-
-/*
- * this is the callback that the NAND core calls to write a page. Since
- * writing a page with ECC or without is similar, all the work is done
- * by write_page above.
- */
-static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int denali_data_xfer(struct denali_nand_info *denali, void *buf,
+			    size_t size, int page, int raw, int write)
 {
-	/*
-	 * for regular page writes, we let HW handle all the ECC
-	 * data written to the device.
-	 */
-	return write_page(mtd, chip, buf, page, false);
+	setup_ecc_for_xfer(denali, !raw, raw);
+
+	if (denali->dma_avail)
+		return denali_dma_xfer(denali, buf, size, page, raw, write);
+	else
+		return denali_pio_xfer(denali, buf, size, page, raw, write);
 }
 
-/*
- * This is the callback that the NAND core calls to write a page without ECC.
- * raw access is similar to ECC page writes, so all the work is done in the
- * write_page() function above.
- */
-static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required,
-				 int page)
+static int denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip,
+			   int page, int write)
 {
-	/*
-	 * for raw page writes, we want to disable ECC and simply write
-	 * whatever data is in the buffer.
-	 */
-	return write_page(mtd, chip, buf, page, true);
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	uint8_t *bufpoi = chip->oob_poi;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	int bbm_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
+
+	/* BBM at the beginning of the OOB area */
+	chip->cmdfunc(mtd, write ? NAND_CMD_SEQIN : NAND_CMD_READ0,
+		      writesize, page);
+	if (write)
+		chip->write_buf(mtd, bufpoi, bbm_skip);
+	else
+		chip->read_buf(mtd, bufpoi, bbm_skip);
+	bufpoi += bbm_skip;
+
+	/* OOB ECC */
+	for (i = 0; i < ecc_steps; i++) {
+		pos = ecc_size + i * (ecc_size + ecc_bytes);
+		len = ecc_bytes;
+
+		if (pos >= writesize)
+			pos += bbm_skip;
+		else if (pos + len > writesize)
+			len = writesize - pos;
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, pos, -1);
+		if (write)
+			chip->write_buf(mtd, bufpoi, len);
+		else
+			chip->read_buf(mtd, bufpoi, len);
+		bufpoi += len;
+		if (len < ecc_bytes) {
+			len = ecc_bytes - len;
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT,
+				      writesize + bbm_skip, -1);
+			if (write)
+				chip->write_buf(mtd, bufpoi, len);
+			else
+				chip->read_buf(mtd, bufpoi, len);
+			bufpoi += len;
+		}
+	}
+
+	/* OOB free */
+	len = oobsize - (bufpoi - chip->oob_poi);
+	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, size - len, -1);
+	if (write)
+		chip->write_buf(mtd, bufpoi, len);
+	else
+		chip->read_buf(mtd, bufpoi, len);
+
+	return 0;
 }
 
-static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
 {
-	return write_oob_data(mtd, chip->oob_poi, page);
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int bbm_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int ret, i, pos, len;
+
+	ret = denali_data_xfer(denali, dma_buf, size, page, 1, 0);
+	if (ret)
+		return ret;
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += bbm_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(buf, dma_buf + pos, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(buf, dma_buf + writesize + bbm_skip,
+				       len);
+				buf += len;
+			}
+		}
+	}
+
+	if (oob_required) {
+		uint8_t *oob = chip->oob_poi;
+
+		/* BBM at the beginning of the OOB area */
+		memcpy(oob, dma_buf + writesize, bbm_skip);
+		oob += bbm_skip;
+
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
+
+			if (pos >= writesize)
+				pos += bbm_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(oob, dma_buf + pos, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(oob, dma_buf + writesize + bbm_skip,
+				       len);
+				oob += len;
+			}
+		}
+
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(oob, dma_buf + size - len, len);
+	}
+
+	return 0;
 }
 
 static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page)
 {
-	read_oob_data(mtd, chip->oob_poi, page);
+	return denali_oob_xfer(mtd, chip, page, 0);
+}
 
-	return 0;
+static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
+			   int page)
+{
+	return denali_oob_xfer(mtd, chip, page, 1);
 }
 
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_status;
-	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
-				INTR__DMA_CMD_COMP | INTR__ECC_UNCOR_ERR :
-				INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
+	int ret;
 
-	setup_ecc_for_xfer(denali, true, false);
-
-	denali_enable_dma(denali, true);
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
-
-	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, 0);
-
-	/* wait for operation to complete */
-	irq_status = denali_wait_for_irq(denali, irq_mask);
-
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
+	ret = denali_data_xfer(denali, denali->buf, mtd->writesize, page, 0, 0);
+	if (ret && ret != -EBADMSG)
+		return ret;
 
 	memcpy(buf, denali->buf, mtd->writesize);
 
 	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
 		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
-	else if (irq_status & INTR__ECC_ERR)
+	else if (ret == -EBADMSG)
 		stat = denali_sw_ecc_fixup(mtd, denali, &uncor_ecc_flags, buf);
-	denali_enable_dma(denali, false);
 
 	if (stat < 0)
 		return stat;
 
 	if (uncor_ecc_flags) {
-		read_oob_data(mtd, chip->oob_poi, page);
+		ret = denali_read_oob(mtd, chip, page);
+		if (ret)
+			return ret;
 
 		stat = denali_check_erased_page(mtd, chip, buf,
 						uncor_ecc_flags, stat);
@@ -776,36 +873,93 @@  static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return stat;
 }
 
-static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP;
-	uint32_t irq_status;
-
-	setup_ecc_for_xfer(denali, false, true);
-	denali_enable_dma(denali, true);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int bbm_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
 
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
+	/*
+	 * Fill the buffer with 0xff first except the full page transfer.
+	 * This simplifies the logic.
+	 */
+	if (!buf || !oob_required)
+		memset(dma_buf, 0xff, size);
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += bbm_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(dma_buf + pos, buf, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(dma_buf + writesize + bbm_skip, buf,
+				       len);
+				buf += len;
+			}
+		}
+	}
 
-	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, 0);
+	if (oob_required) {
+		const uint8_t *oob = chip->oob_poi;
+
+		/* BBM at the beginning of the OOB area */
+		memcpy(dma_buf + writesize, oob, bbm_skip);
+		oob += bbm_skip;
+
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
+
+			if (pos >= writesize)
+				pos += bbm_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(dma_buf + pos, oob, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(dma_buf + writesize + bbm_skip, oob,
+				       len);
+				oob += len;
+			}
+		}
 
-	/* wait for operation to complete */
-	irq_status = denali_wait_for_irq(denali, irq_mask);
-	if (irq_status & INTR__DMA_CMD_COMP)
-		return -ETIMEDOUT;
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(dma_buf + size - len, oob, len);
+	}
 
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
+	return denali_data_xfer(denali, dma_buf, size, page, 1, 1);
+}
 
-	denali_enable_dma(denali, false);
+static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			     const uint8_t *buf, int oob_required, int page)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	memcpy(buf, denali->buf, mtd->writesize);
-	memcpy(chip->oob_poi, denali->buf + mtd->writesize, mtd->oobsize);
+	memcpy(denali->buf, buf, mtd->writesize);
 
-	return 0;
+	return denali_data_xfer(denali, denali->buf, mtd->writesize, page,
+				0, 1);
 }
 
 static void denali_select_chip(struct mtd_info *mtd, int chip)
@@ -1235,21 +1389,27 @@  int denali_init(struct denali_nand_info *denali)
 		goto disable_irq;
 	}
 
-	ret = dma_set_mask(denali->dev,
-			   DMA_BIT_MASK(denali->caps & DENALI_CAP_DMA_64BIT ?
-					64 : 32));
-	if (ret) {
-		dev_err(denali->dev, "No usable DMA configuration\n");
-		goto disable_irq;
+	if (ioread32(denali->flash_reg + FEATURES) & FEATURES__DMA)
+		denali->dma_avail = 1;
+
+	if (denali->dma_avail) {
+		int dma_bit = denali->caps & DENALI_CAP_DMA_64BIT ? 64 : 32;
+
+		ret = dma_set_mask(denali->dev, DMA_BIT_MASK(dma_bit));
+		if (ret) {
+			dev_info(denali->dev, "Failed to set DMA mask. Disabling DMA.\n");
+			denali->dma_avail = 0;
+		}
 	}
 
-	denali->dma_addr = dma_map_single(denali->dev, denali->buf,
-			     mtd->writesize + mtd->oobsize,
-			     DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(denali->dev, denali->dma_addr)) {
-		dev_err(denali->dev, "Failed to map DMA buffer\n");
-		ret = -EIO;
-		goto disable_irq;
+	if (denali->dma_avail) {
+		denali->dma_addr = dma_map_single(denali->dev, denali->buf,
+						  mtd->writesize + mtd->oobsize,
+						  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(denali->dev, denali->dma_addr)) {
+			dev_info(denali->dev, "Failed to map DMA buffer. Disabling DMA.\n");
+			denali->dma_avail = 0;
+		};
 	}
 
 	/*
@@ -1331,6 +1491,13 @@  int denali_init(struct denali_nand_info *denali)
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
+	if (denali->nand.options & NAND_BUSWIDTH_16) {
+		chip->read_buf = denali_read_buf16;
+		chip->write_buf = denali_write_buf16;
+	} else {
+		chip->read_buf = denali_read_buf;
+		chip->write_buf = denali_write_buf;
+	}
 	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 3ca6218..f92b9db 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -323,6 +323,7 @@  struct denali_nand_info {
 
 	void *buf;
 	dma_addr_t dma_addr;
+	int dma_avail;
 	int devnum;	/* represent how many nands connected */
 	int bbtskipbytes;
 	int max_banks;