diff mbox series

[2/2] uboot: fs/btrfs: Fix LZO false decompression error caused by pending zero

Message ID 20200319123319.37848-3-wqu@suse.com
State New
Headers show
Series uboot: fs/btrfs: Fix read error on LZO compressed extents | expand

Commit Message

Qu Wenruo March 19, 2020, 12:33 p.m. UTC
[BUG]
For certain btrfs files with compressed file extent, uboot will fail to
load it:

  btrfs_read_extent_reg: disk_bytenr=14229504 disk_len=73728 offset=0 nr_bytes=131
  072
  decompress_lzo: tot_len=70770
  decompress_lzo: in_len=1389
  decompress_lzo: in_len=2400
  decompress_lzo: in_len=3002
  decompress_lzo: in_len=1379
  decompress_lzo: in_len=88539136
  decompress_lzo: header error, in_len=88539136 clen=65534 tot_len=62580

NOTE: except the last line, all other lines are debug output.

[CAUSE]
Btrfs lzo compression uses its own format to record compressed size
(segmant header, LE32).

However to make decompression easier, we never put such segment header
across page boundary.

In above case, the xxd dump of the lzo compressed data looks like this:

00001fe0: 4cdc 02fc 0bfd 02c0 dc02 0d13 0100 0001  L...............
00001ff0: 0000 0008 0300 0000 0000 0011 0000|0000  ................
00002000: 4705 0000 0001 cc02 0000 0000 0000 1e01  G...............

'|' is the "expected" segment header start position.

But in that page, there are only 2 bytes left, can't contain the 4 bytes
segment header.

So btrfs compression will skip that 2 bytes, put the segment header in
next page directly.

Uboot doesn't have such check, and read the header with 2 bytes offset,
result 0x05470000 (88539136), other than the expected result
0x00000547 (1351), resulting above error.

[FIX]
Follow the btrfs-progs restore implementation, by introducing tot_in to
record total processed bytes (including headers), and do proper page
boundary skip to fix it.

Signed-off-by: Qu Wenruo <wqu at suse.com>
---
 fs/btrfs/compression.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Comments

Marek Behún March 25, 2020, 8:09 a.m. UTC | #1
On Thu, 19 Mar 2020 20:33:19 +0800
Qu Wenruo <wqu at suse.com> wrote:

> [BUG]

The subject line should not contain uboot keyword. If you check git log
for fs/btrfs, the commits always start with:
  fs: btrfs:

Also please don't use [BUG] [CAUSE] and [FIX] in commit messages.

You could have also added myself to CC, since I am the original author
of U-Boots btrfs implementation. I just stumbled on your patches by
chance.

Also do not add linux-btrfs mailing list for u-boot patches.

> For certain btrfs files with compressed file extent, uboot will fail to
> load it:
> 
>   btrfs_read_extent_reg: disk_bytenr=14229504 disk_len=73728 offset=0 nr_bytes=131
>   072
>   decompress_lzo: tot_len=70770
>   decompress_lzo: in_len=1389
>   decompress_lzo: in_len=2400
>   decompress_lzo: in_len=3002
>   decompress_lzo: in_len=1379
>   decompress_lzo: in_len=88539136
>   decompress_lzo: header error, in_len=88539136 clen=65534 tot_len=62580
> 
> NOTE: except the last line, all other lines are debug output.
> 
> [CAUSE]
> Btrfs lzo compression uses its own format to record compressed size
> (segmant header, LE32).
> 
> However to make decompression easier, we never put such segment header
> across page boundary.
> 
> In above case, the xxd dump of the lzo compressed data looks like this:
> 
> 00001fe0: 4cdc 02fc 0bfd 02c0 dc02 0d13 0100 0001  L...............
> 00001ff0: 0000 0008 0300 0000 0000 0011 0000|0000  ................
> 00002000: 4705 0000 0001 cc02 0000 0000 0000 1e01  G...............
> 
> '|' is the "expected" segment header start position.
> 
> But in that page, there are only 2 bytes left, can't contain the 4 bytes
> segment header.
> 
> So btrfs compression will skip that 2 bytes, put the segment header in
> next page directly.
> 
> Uboot doesn't have such check, and read the header with 2 bytes offset,
> result 0x05470000 (88539136), other than the expected result
> 0x00000547 (1351), resulting above error.
> 
> [FIX]
> Follow the btrfs-progs restore implementation, by introducing tot_in to
> record total processed bytes (including headers), and do proper page
> boundary skip to fix it.
> 
> Signed-off-by: Qu Wenruo <wqu at suse.com>
> ---
>  fs/btrfs/compression.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 4ef44ce11485..2a6ac8bb1029 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -9,6 +9,7 @@
>  #include <malloc.h>
>  #include <linux/lzo.h>
>  #include <linux/zstd.h>
> +#include <linux/compat.h>
>  #include <u-boot/zlib.h>
>  #include <asm/unaligned.h>
>  
> @@ -17,6 +18,7 @@
>  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>  {
>  	u32 tot_len, in_len, res;
> +	u32 tot_in = 0;

This function does not define local variable values in declaration,
please don't mix this. Also tot_in is of the same type as the variables
above, so use
  u32 tot_len, tot_in, in_len, res;

And put
  tot_in = 0;
after line 24:
  tot_len = le32_to_cpu(get_unaligned((u32 *)cbuf));

>  	size_t out_len;
>  	int ret;
>  
> @@ -27,6 +29,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>  	cbuf += LZO_LEN;
>  	clen -= LZO_LEN;
>  	tot_len -= LZO_LEN;
> +	tot_in += LZO_LEN;
>  
>  	if (tot_len == 0 && dlen)
>  		return -1;
> @@ -36,6 +39,9 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>  	res = 0;
>  
>  	while (tot_len > LZO_LEN) {
> +		size_t mod_page;
> +		size_t rem_page;

The rest of the function uses u32, not size_t. Please use it as well.
Also since the variables are of same type, please use one-line
declaration only, eg:
  u32 mod_page, rem_page;

> +
>  		in_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
>  		cbuf += LZO_LEN;
>  		clen -= LZO_LEN;
> @@ -44,6 +50,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>  			return -1;
>  
>  		tot_len -= (LZO_LEN + in_len);
> +		tot_in += (LZO_LEN + in_len);
>  
>  		out_len = dlen;
>  		ret = lzo1x_decompress_safe(cbuf, in_len, dbuf, &out_len);
> @@ -56,6 +63,19 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>  		dlen -= out_len;
>  
>  		res += out_len;
> +
> +		/*
> +		 * If the 4 bytes header does not fit to the rest of the page we
> +		 * have to move to next one, or we read some garbage.
> +		 */
> +		mod_page = tot_in % PAGE_SIZE;
> +		rem_page = PAGE_SIZE - mod_page;

Is there a reasof for mod_page? You could use just
  rem_page = PAGE_SIZE - (tot_in % PAGE_SIZE);

> +		if (rem_page < LZO_LEN) {
> +			cbuf += rem_page;
> +			tot_in += rem_page;
> +			clen -= rem_page;
> +			tot_len -= rem_page;
> +		}
>  	}
>  
>  	return res;

Sorry for this nitpicking, but I would like my driver to remain
consistent in coding style.

Marek
Qu Wenruo March 25, 2020, 8:27 a.m. UTC | #2
On 2020/3/25 ??4:09, Marek Behun wrote:
> On Thu, 19 Mar 2020 20:33:19 +0800
> Qu Wenruo <wqu at suse.com> wrote:
> 
>> [BUG]
> 
> The subject line should not contain uboot keyword. If you check git log
> for fs/btrfs, the commits always start with:
>   fs: btrfs:
> 
> Also please don't use [BUG] [CAUSE] and [FIX] in commit messages.

Why? I think such section makes the analyse much easier to read.

> 
> You could have also added myself to CC, since I am the original author
> of U-Boots btrfs implementation. I just stumbled on your patches by
> chance.

Since there is no maintainer name in MAINTAINERS file, there is no way
other guys would know who to CC.

> 
> Also do not add linux-btrfs mailing list for u-boot patches.

I don't see any reason why we can't include the mail list for more
experts to review.

> 
>> For certain btrfs files with compressed file extent, uboot will fail to
>> load it:
>>
>>   btrfs_read_extent_reg: disk_bytenr=14229504 disk_len=73728 offset=0 nr_bytes=131
>>   072
>>   decompress_lzo: tot_len=70770
>>   decompress_lzo: in_len=1389
>>   decompress_lzo: in_len=2400
>>   decompress_lzo: in_len=3002
>>   decompress_lzo: in_len=1379
>>   decompress_lzo: in_len=88539136
>>   decompress_lzo: header error, in_len=88539136 clen=65534 tot_len=62580
>>
>> NOTE: except the last line, all other lines are debug output.
>>
>> [CAUSE]
>> Btrfs lzo compression uses its own format to record compressed size
>> (segmant header, LE32).
>>
>> However to make decompression easier, we never put such segment header
>> across page boundary.
>>
>> In above case, the xxd dump of the lzo compressed data looks like this:
>>
>> 00001fe0: 4cdc 02fc 0bfd 02c0 dc02 0d13 0100 0001  L...............
>> 00001ff0: 0000 0008 0300 0000 0000 0011 0000|0000  ................
>> 00002000: 4705 0000 0001 cc02 0000 0000 0000 1e01  G...............
>>
>> '|' is the "expected" segment header start position.
>>
>> But in that page, there are only 2 bytes left, can't contain the 4 bytes
>> segment header.
>>
>> So btrfs compression will skip that 2 bytes, put the segment header in
>> next page directly.
>>
>> Uboot doesn't have such check, and read the header with 2 bytes offset,
>> result 0x05470000 (88539136), other than the expected result
>> 0x00000547 (1351), resulting above error.
>>
>> [FIX]
>> Follow the btrfs-progs restore implementation, by introducing tot_in to
>> record total processed bytes (including headers), and do proper page
>> boundary skip to fix it.
>>
>> Signed-off-by: Qu Wenruo <wqu at suse.com>
>> ---
>>  fs/btrfs/compression.c | 20 ++++++++++++++++++++
>>  1 file changed, 20 insertions(+)
>>
>> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
>> index 4ef44ce11485..2a6ac8bb1029 100644
>> --- a/fs/btrfs/compression.c
>> +++ b/fs/btrfs/compression.c
>> @@ -9,6 +9,7 @@
>>  #include <malloc.h>
>>  #include <linux/lzo.h>
>>  #include <linux/zstd.h>
>> +#include <linux/compat.h>
>>  #include <u-boot/zlib.h>
>>  #include <asm/unaligned.h>
>>  
>> @@ -17,6 +18,7 @@
>>  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>  {
>>  	u32 tot_len, in_len, res;
>> +	u32 tot_in = 0;
> 
> This function does not define local variable values in declaration,
> please don't mix this. Also tot_in is of the same type as the variables
> above, so use
>   u32 tot_len, tot_in, in_len, res;

Please give us the proper code style doc, I understand that each project
or even each subsystem has its own style, but without proper doc it will
be a mess to maintain.

So, please show the proper code style for us to follow.

> 
> And put
>   tot_in = 0;
> after line 24:
>   tot_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
> 
>>  	size_t out_len;
>>  	int ret;
>>  
>> @@ -27,6 +29,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>  	cbuf += LZO_LEN;
>>  	clen -= LZO_LEN;
>>  	tot_len -= LZO_LEN;
>> +	tot_in += LZO_LEN;
>>  
>>  	if (tot_len == 0 && dlen)
>>  		return -1;
>> @@ -36,6 +39,9 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>  	res = 0;
>>  
>>  	while (tot_len > LZO_LEN) {
>> +		size_t mod_page;
>> +		size_t rem_page;
> 
> The rest of the function uses u32, not size_t. Please use it as well.
> Also since the variables are of same type, please use one-line
> declaration only, eg:
>   u32 mod_page, rem_page;
> 
>> +
>>  		in_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
>>  		cbuf += LZO_LEN;
>>  		clen -= LZO_LEN;
>> @@ -44,6 +50,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>  			return -1;
>>  
>>  		tot_len -= (LZO_LEN + in_len);
>> +		tot_in += (LZO_LEN + in_len);
>>  
>>  		out_len = dlen;
>>  		ret = lzo1x_decompress_safe(cbuf, in_len, dbuf, &out_len);
>> @@ -56,6 +63,19 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>  		dlen -= out_len;
>>  
>>  		res += out_len;
>> +
>> +		/*
>> +		 * If the 4 bytes header does not fit to the rest of the page we
>> +		 * have to move to next one, or we read some garbage.
>> +		 */
>> +		mod_page = tot_in % PAGE_SIZE;
>> +		rem_page = PAGE_SIZE - mod_page;
> 
> Is there a reasof for mod_page? You could use just
>   rem_page = PAGE_SIZE - (tot_in % PAGE_SIZE);

I tend to keep the difference from btrfs-progs to minimal, as that's
what I tend to plan to backport soon.

> 
>> +		if (rem_page < LZO_LEN) {
>> +			cbuf += rem_page;
>> +			tot_in += rem_page;
>> +			clen -= rem_page;
>> +			tot_len -= rem_page;
>> +		}
>>  	}
>>  
>>  	return res;
> 
> Sorry for this nitpicking, but I would like my driver to remain
> consistent in coding style.

I know uboot btrfs is mostly backported by yourself and I respect the work.

But please add yourself to maintainers files, and also consider use
existing btrfs-progs code to make code sycn easier, which would avoid
such bug from the very beginning.

Thanks,
Qu

> 
> Marek
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <https://lists.denx.de/pipermail/u-boot/attachments/20200325/d1eaa03d/attachment.sig>
Marek Behún March 25, 2020, 11 a.m. UTC | #3
On Wed, 25 Mar 2020 16:27:16 +0800
Qu Wenruo <quwenruo.btrfs at gmx.com> wrote:

> On 2020/3/25 ??4:09, Marek Behun wrote:
> > On Thu, 19 Mar 2020 20:33:19 +0800
> > Qu Wenruo <wqu at suse.com> wrote:
> >   
> >> [BUG]  
> > 
> > The subject line should not contain uboot keyword. If you check git log
> > for fs/btrfs, the commits always start with:
> >   fs: btrfs:
> > 
> > Also please don't use [BUG] [CAUSE] and [FIX] in commit messages.  
> 
> Why? I think such section makes the analyse much easier to read.

Hi Qu,

I think your commit message without the tags is well-readable, but maybe
this is just my personal view. On the other hand such tagging is not
customary for U-Boot commit messages nor for Linux.

> > 
> > You could have also added myself to CC, since I am the original author
> > of U-Boots btrfs implementation. I just stumbled on your patches by
> > chance.  
> 
> Since there is no maintainer name in MAINTAINERS file, there is no way
> other guys would know who to CC.

You have a fair point, sorry about that.

> 
> > 
> > Also do not add linux-btrfs mailing list for u-boot patches.  
> 
> I don't see any reason why we can't include the mail list for more
> experts to review.

See below.

> >   
> >> For certain btrfs files with compressed file extent, uboot will fail to
> >> load it:
> >>
> >>   btrfs_read_extent_reg: disk_bytenr=14229504 disk_len=73728 offset=0 nr_bytes=131
> >>   072
> >>   decompress_lzo: tot_len=70770
> >>   decompress_lzo: in_len=1389
> >>   decompress_lzo: in_len=2400
> >>   decompress_lzo: in_len=3002
> >>   decompress_lzo: in_len=1379
> >>   decompress_lzo: in_len=88539136
> >>   decompress_lzo: header error, in_len=88539136 clen=65534 tot_len=62580
> >>
> >> NOTE: except the last line, all other lines are debug output.
> >>
> >> [CAUSE]
> >> Btrfs lzo compression uses its own format to record compressed size
> >> (segmant header, LE32).
> >>
> >> However to make decompression easier, we never put such segment header
> >> across page boundary.
> >>
> >> In above case, the xxd dump of the lzo compressed data looks like this:
> >>
> >> 00001fe0: 4cdc 02fc 0bfd 02c0 dc02 0d13 0100 0001  L...............
> >> 00001ff0: 0000 0008 0300 0000 0000 0011 0000|0000  ................
> >> 00002000: 4705 0000 0001 cc02 0000 0000 0000 1e01  G...............
> >>
> >> '|' is the "expected" segment header start position.
> >>
> >> But in that page, there are only 2 bytes left, can't contain the 4 bytes
> >> segment header.
> >>
> >> So btrfs compression will skip that 2 bytes, put the segment header in
> >> next page directly.
> >>
> >> Uboot doesn't have such check, and read the header with 2 bytes offset,
> >> result 0x05470000 (88539136), other than the expected result
> >> 0x00000547 (1351), resulting above error.
> >>
> >> [FIX]
> >> Follow the btrfs-progs restore implementation, by introducing tot_in to
> >> record total processed bytes (including headers), and do proper page
> >> boundary skip to fix it.
> >>
> >> Signed-off-by: Qu Wenruo <wqu at suse.com>
> >> ---
> >>  fs/btrfs/compression.c | 20 ++++++++++++++++++++
> >>  1 file changed, 20 insertions(+)
> >>
> >> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> >> index 4ef44ce11485..2a6ac8bb1029 100644
> >> --- a/fs/btrfs/compression.c
> >> +++ b/fs/btrfs/compression.c
> >> @@ -9,6 +9,7 @@
> >>  #include <malloc.h>
> >>  #include <linux/lzo.h>
> >>  #include <linux/zstd.h>
> >> +#include <linux/compat.h>
> >>  #include <u-boot/zlib.h>
> >>  #include <asm/unaligned.h>
> >>  
> >> @@ -17,6 +18,7 @@
> >>  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
> >>  {
> >>  	u32 tot_len, in_len, res;
> >> +	u32 tot_in = 0;  
> > 
> > This function does not define local variable values in declaration,
> > please don't mix this. Also tot_in is of the same type as the variables
> > above, so use
> >   u32 tot_len, tot_in, in_len, res;  
> 
> Please give us the proper code style doc, I understand that each project
> or even each subsystem has its own style, but without proper doc it will
> be a mess to maintain.
> 
> So, please show the proper code style for us to follow.

When patches are being sent for example to netdev, sometimes netdev
maintainer or reviewers nitpick about coding style and ask the author
to fix this. Not all of these coding style requests are
documented nor does the checkpatch script checks for all of them.
Sometimes the only way for to get to know the coding style is by
reading the code, and sometimes even that does not suffice. Either way
these are reasonable requests and the authors fix the patches.
I know that I always did it, for example when I sent patches for the
mv88e6xxx linux driver, and Vivien or David asked me to fix such things.

I understand that people may find this exaggeration, but we know what
happens when nobody cares about such things: look at U-Boot's ext4
driver.

> > 
> > And put
> >   tot_in = 0;
> > after line 24:
> >   tot_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
> >   
> >>  	size_t out_len;
> >>  	int ret;
> >>  
> >> @@ -27,6 +29,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
> >>  	cbuf += LZO_LEN;
> >>  	clen -= LZO_LEN;
> >>  	tot_len -= LZO_LEN;
> >> +	tot_in += LZO_LEN;
> >>  
> >>  	if (tot_len == 0 && dlen)
> >>  		return -1;
> >> @@ -36,6 +39,9 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
> >>  	res = 0;
> >>  
> >>  	while (tot_len > LZO_LEN) {
> >> +		size_t mod_page;
> >> +		size_t rem_page;  
> > 
> > The rest of the function uses u32, not size_t. Please use it as well.
> > Also since the variables are of same type, please use one-line
> > declaration only, eg:
> >   u32 mod_page, rem_page;
> >   
> >> +
> >>  		in_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
> >>  		cbuf += LZO_LEN;
> >>  		clen -= LZO_LEN;
> >> @@ -44,6 +50,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
> >>  			return -1;
> >>  
> >>  		tot_len -= (LZO_LEN + in_len);
> >> +		tot_in += (LZO_LEN + in_len);
> >>  
> >>  		out_len = dlen;
> >>  		ret = lzo1x_decompress_safe(cbuf, in_len, dbuf, &out_len);
> >> @@ -56,6 +63,19 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
> >>  		dlen -= out_len;
> >>  
> >>  		res += out_len;
> >> +
> >> +		/*
> >> +		 * If the 4 bytes header does not fit to the rest of the page we
> >> +		 * have to move to next one, or we read some garbage.
> >> +		 */
> >> +		mod_page = tot_in % PAGE_SIZE;
> >> +		rem_page = PAGE_SIZE - mod_page;  
> > 
> > Is there a reasof for mod_page? You could use just
> >   rem_page = PAGE_SIZE - (tot_in % PAGE_SIZE);  
> 
> I tend to keep the difference from btrfs-progs to minimal, as that's
> what I tend to plan to backport soon.

See below.

>
> >   
> >> +		if (rem_page < LZO_LEN) {
> >> +			cbuf += rem_page;
> >> +			tot_in += rem_page;
> >> +			clen -= rem_page;
> >> +			tot_len -= rem_page;
> >> +		}
> >>  	}
> >>  
> >>  	return res;  
> > 
> > Sorry for this nitpicking, but I would like my driver to remain
> > consistent in coding style.  
> 
> I know uboot btrfs is mostly backported by yourself and I respect the work.
> 
> But please add yourself to maintainers files, and also consider use
> existing btrfs-progs code to make code sycn easier, which would avoid
> such bug from the very beginning.

I am going to call this crossporting (porting from Linux to U-Boot),
since I understand backporting to mean porting code to a previous
version of the same program (ie. Linux 5.6 to Linux <5.6).

The U-Boot btrfs driver was not exactly crossported from Linux btrfs
driver. Sure I have many times read Linux sources when I was not sure
about something from documentation, and this reflected on the U-Boot
implementation. But code-syncing is not possible with the original, as
it was never intended to.

I also did not collaborate with Linux btrfs authors when writing this
driver. These are the reasons why I don't see much point in adding
linux-btrfs mailing list to Cc, since they may have never seen the
codebase.

Marek
Marek Behún March 25, 2020, 11:10 a.m. UTC | #4
On Wed, 25 Mar 2020 12:00:20 +0100
Marek Behun <marek.behun at nic.cz> wrote:

> I also did not collaborate with Linux btrfs authors when writing this
> driver. These are the reasons why I don't see much point in adding
> linux-btrfs mailing list to Cc, since they may have never seen the
> codebase.

On the other hand it can't hurt for Linux btrfs authors to have
a look at U-Boot's implementation, so I am withdrawing my objection. :)
Sorry.
Qu Wenruo March 25, 2020, 11:32 a.m. UTC | #5
On 2020/3/25 ??7:00, Marek Behun wrote:
> On Wed, 25 Mar 2020 16:27:16 +0800
> Qu Wenruo <quwenruo.btrfs at gmx.com> wrote:
> 
>> On 2020/3/25 ??4:09, Marek Behun wrote:
>>> On Thu, 19 Mar 2020 20:33:19 +0800
>>> Qu Wenruo <wqu at suse.com> wrote:
>>>   
>>>> [BUG]  
>>>
>>> The subject line should not contain uboot keyword. If you check git log
>>> for fs/btrfs, the commits always start with:
>>>   fs: btrfs:
>>>
>>> Also please don't use [BUG] [CAUSE] and [FIX] in commit messages.  
>>
>> Why? I think such section makes the analyse much easier to read.
> 
> Hi Qu,
> 
> I think your commit message without the tags is well-readable, but maybe
> this is just my personal view. On the other hand such tagging is not
> customary for U-Boot commit messages nor for Linux.

Got it, would try to control my eager to use such sections at least for
U-boot code.

> 
>>>
>>> You could have also added myself to CC, since I am the original author
>>> of U-Boots btrfs implementation. I just stumbled on your patches by
>>> chance.  
>>
>> Since there is no maintainer name in MAINTAINERS file, there is no way
>> other guys would know who to CC.
> 
> You have a fair point, sorry about that.
> 
>>
>>>
>>> Also do not add linux-btrfs mailing list for u-boot patches.  
>>
>> I don't see any reason why we can't include the mail list for more
>> experts to review.
> 
> See below.
> 
>>>   
>>>> For certain btrfs files with compressed file extent, uboot will fail to
>>>> load it:
>>>>
>>>>   btrfs_read_extent_reg: disk_bytenr=14229504 disk_len=73728 offset=0 nr_bytes=131
>>>>   072
>>>>   decompress_lzo: tot_len=70770
>>>>   decompress_lzo: in_len=1389
>>>>   decompress_lzo: in_len=2400
>>>>   decompress_lzo: in_len=3002
>>>>   decompress_lzo: in_len=1379
>>>>   decompress_lzo: in_len=88539136
>>>>   decompress_lzo: header error, in_len=88539136 clen=65534 tot_len=62580
>>>>
>>>> NOTE: except the last line, all other lines are debug output.
>>>>
>>>> [CAUSE]
>>>> Btrfs lzo compression uses its own format to record compressed size
>>>> (segmant header, LE32).
>>>>
>>>> However to make decompression easier, we never put such segment header
>>>> across page boundary.
>>>>
>>>> In above case, the xxd dump of the lzo compressed data looks like this:
>>>>
>>>> 00001fe0: 4cdc 02fc 0bfd 02c0 dc02 0d13 0100 0001  L...............
>>>> 00001ff0: 0000 0008 0300 0000 0000 0011 0000|0000  ................
>>>> 00002000: 4705 0000 0001 cc02 0000 0000 0000 1e01  G...............
>>>>
>>>> '|' is the "expected" segment header start position.
>>>>
>>>> But in that page, there are only 2 bytes left, can't contain the 4 bytes
>>>> segment header.
>>>>
>>>> So btrfs compression will skip that 2 bytes, put the segment header in
>>>> next page directly.
>>>>
>>>> Uboot doesn't have such check, and read the header with 2 bytes offset,
>>>> result 0x05470000 (88539136), other than the expected result
>>>> 0x00000547 (1351), resulting above error.
>>>>
>>>> [FIX]
>>>> Follow the btrfs-progs restore implementation, by introducing tot_in to
>>>> record total processed bytes (including headers), and do proper page
>>>> boundary skip to fix it.
>>>>
>>>> Signed-off-by: Qu Wenruo <wqu at suse.com>
>>>> ---
>>>>  fs/btrfs/compression.c | 20 ++++++++++++++++++++
>>>>  1 file changed, 20 insertions(+)
>>>>
>>>> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
>>>> index 4ef44ce11485..2a6ac8bb1029 100644
>>>> --- a/fs/btrfs/compression.c
>>>> +++ b/fs/btrfs/compression.c
>>>> @@ -9,6 +9,7 @@
>>>>  #include <malloc.h>
>>>>  #include <linux/lzo.h>
>>>>  #include <linux/zstd.h>
>>>> +#include <linux/compat.h>
>>>>  #include <u-boot/zlib.h>
>>>>  #include <asm/unaligned.h>
>>>>  
>>>> @@ -17,6 +18,7 @@
>>>>  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>>>  {
>>>>  	u32 tot_len, in_len, res;
>>>> +	u32 tot_in = 0;  
>>>
>>> This function does not define local variable values in declaration,
>>> please don't mix this. Also tot_in is of the same type as the variables
>>> above, so use
>>>   u32 tot_len, tot_in, in_len, res;  
>>
>> Please give us the proper code style doc, I understand that each project
>> or even each subsystem has its own style, but without proper doc it will
>> be a mess to maintain.
>>
>> So, please show the proper code style for us to follow.
> 
> When patches are being sent for example to netdev, sometimes netdev
> maintainer or reviewers nitpick about coding style and ask the author
> to fix this. Not all of these coding style requests are
> documented nor does the checkpatch script checks for all of them.
> Sometimes the only way for to get to know the coding style is by
> reading the code, and sometimes even that does not suffice. Either way
> these are reasonable requests and the authors fix the patches.
> I know that I always did it, for example when I sent patches for the
> mv88e6xxx linux driver, and Vivien or David asked me to fix such things.
> 
> I understand that people may find this exaggeration, but we know what
> happens when nobody cares about such things: look at U-Boot's ext4
> driver.

Fine.

Although it may be tricky that if we cross port btrfs-progs (not kernel,
as we don't really need to bother the full functionality from kernel),
the code style would greatly change from the current one.

> 
>>>
>>> And put
>>>   tot_in = 0;
>>> after line 24:
>>>   tot_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
>>>   
>>>>  	size_t out_len;
>>>>  	int ret;
>>>>  
>>>> @@ -27,6 +29,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>>>  	cbuf += LZO_LEN;
>>>>  	clen -= LZO_LEN;
>>>>  	tot_len -= LZO_LEN;
>>>> +	tot_in += LZO_LEN;
>>>>  
>>>>  	if (tot_len == 0 && dlen)
>>>>  		return -1;
>>>> @@ -36,6 +39,9 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>>>  	res = 0;
>>>>  
>>>>  	while (tot_len > LZO_LEN) {
>>>> +		size_t mod_page;
>>>> +		size_t rem_page;  
>>>
>>> The rest of the function uses u32, not size_t. Please use it as well.
>>> Also since the variables are of same type, please use one-line
>>> declaration only, eg:
>>>   u32 mod_page, rem_page;
>>>   
>>>> +
>>>>  		in_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
>>>>  		cbuf += LZO_LEN;
>>>>  		clen -= LZO_LEN;
>>>> @@ -44,6 +50,7 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>>>  			return -1;
>>>>  
>>>>  		tot_len -= (LZO_LEN + in_len);
>>>> +		tot_in += (LZO_LEN + in_len);
>>>>  
>>>>  		out_len = dlen;
>>>>  		ret = lzo1x_decompress_safe(cbuf, in_len, dbuf, &out_len);
>>>> @@ -56,6 +63,19 @@ static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
>>>>  		dlen -= out_len;
>>>>  
>>>>  		res += out_len;
>>>> +
>>>> +		/*
>>>> +		 * If the 4 bytes header does not fit to the rest of the page we
>>>> +		 * have to move to next one, or we read some garbage.
>>>> +		 */
>>>> +		mod_page = tot_in % PAGE_SIZE;
>>>> +		rem_page = PAGE_SIZE - mod_page;  
>>>
>>> Is there a reasof for mod_page? You could use just
>>>   rem_page = PAGE_SIZE - (tot_in % PAGE_SIZE);  
>>
>> I tend to keep the difference from btrfs-progs to minimal, as that's
>> what I tend to plan to backport soon.
> 
> See below.
> 
>>
>>>   
>>>> +		if (rem_page < LZO_LEN) {
>>>> +			cbuf += rem_page;
>>>> +			tot_in += rem_page;
>>>> +			clen -= rem_page;
>>>> +			tot_len -= rem_page;
>>>> +		}
>>>>  	}
>>>>  
>>>>  	return res;  
>>>
>>> Sorry for this nitpicking, but I would like my driver to remain
>>> consistent in coding style.  
>>
>> I know uboot btrfs is mostly backported by yourself and I respect the work.
>>
>> But please add yourself to maintainers files, and also consider use
>> existing btrfs-progs code to make code sycn easier, which would avoid
>> such bug from the very beginning.
> 
> I am going to call this crossporting (porting from Linux to U-Boot),

Indeed crossport is much appropriate.

> since I understand backporting to mean porting code to a previous
> version of the same program (ie. Linux 5.6 to Linux <5.6).
> 
> The U-Boot btrfs driver was not exactly crossported from Linux btrfs
> driver.

The kernel part is too complex, too fully functional, and too limited to
kernel infrastructure. Things like bio is never really used out of
kernel. So kernel code is seldom a good source for cross port.

> Sure I have many times read Linux sources when I was not sure
> about something from documentation,

And any complain about the doc is welcome since it helps us to improve
the doc.
(Yep, another reason why getting btrfs community involved could be helpful)

> and this reflected on the U-Boot
> implementation. But code-syncing is not possible with the original, as
> it was never intended to.
> 
> I also did not collaborate with Linux btrfs authors when writing this
> driver. These are the reasons why I don't see much point in adding
> linux-btrfs mailing list to Cc, since they may have never seen the
> codebase.

But from the git history, there are some other problem which can be
exposed earlier, like the tree search behavior.

Thus sometimes it would be nicer to get the fs guys involved, some ideas
like cross port btrfs-progs to u-boot even before trying to implement
the code base.

Anyway, I'll update the patchset to address your comment soon, and
explore the possibility to cross port btrfs-progs to make it easier to
sync code.

Thanks,
Qu

> 
> Marek
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <https://lists.denx.de/pipermail/u-boot/attachments/20200325/4a392e2a/attachment.sig>
diff mbox series

Patch

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4ef44ce11485..2a6ac8bb1029 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -9,6 +9,7 @@ 
 #include <malloc.h>
 #include <linux/lzo.h>
 #include <linux/zstd.h>
+#include <linux/compat.h>
 #include <u-boot/zlib.h>
 #include <asm/unaligned.h>
 
@@ -17,6 +18,7 @@ 
 static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
 {
 	u32 tot_len, in_len, res;
+	u32 tot_in = 0;
 	size_t out_len;
 	int ret;
 
@@ -27,6 +29,7 @@  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
 	cbuf += LZO_LEN;
 	clen -= LZO_LEN;
 	tot_len -= LZO_LEN;
+	tot_in += LZO_LEN;
 
 	if (tot_len == 0 && dlen)
 		return -1;
@@ -36,6 +39,9 @@  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
 	res = 0;
 
 	while (tot_len > LZO_LEN) {
+		size_t mod_page;
+		size_t rem_page;
+
 		in_len = le32_to_cpu(get_unaligned((u32 *)cbuf));
 		cbuf += LZO_LEN;
 		clen -= LZO_LEN;
@@ -44,6 +50,7 @@  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
 			return -1;
 
 		tot_len -= (LZO_LEN + in_len);
+		tot_in += (LZO_LEN + in_len);
 
 		out_len = dlen;
 		ret = lzo1x_decompress_safe(cbuf, in_len, dbuf, &out_len);
@@ -56,6 +63,19 @@  static u32 decompress_lzo(const u8 *cbuf, u32 clen, u8 *dbuf, u32 dlen)
 		dlen -= out_len;
 
 		res += out_len;
+
+		/*
+		 * If the 4 bytes header does not fit to the rest of the page we
+		 * have to move to next one, or we read some garbage.
+		 */
+		mod_page = tot_in % PAGE_SIZE;
+		rem_page = PAGE_SIZE - mod_page;
+		if (rem_page < LZO_LEN) {
+			cbuf += rem_page;
+			tot_in += rem_page;
+			clen -= rem_page;
+			tot_len -= rem_page;
+		}
 	}
 
 	return res;