diff mbox series

[v8,09/14] iio: afe: rescale: fix accuracy for small fractional scales

Message ID 20210820191714.69898-10-liambeguin@gmail.com
State New
Headers show
Series iio: afe: add temperature rescaling support | expand

Commit Message

Liam Beguin Aug. 20, 2021, 7:17 p.m. UTC
From: Liam Beguin <lvb@xiphos.com>

The approximation caused by integer divisions can be costly on smaller
scale values since the decimal part is significant compared to the
integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such
cases to maintain accuracy.

Signed-off-by: Liam Beguin <lvb@xiphos.com>
---
 drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

Comments

kernel test robot Aug. 21, 2021, 2 a.m. UTC | #1
Hi Liam,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on 6cbb3aa0f9d5d23221df787cf36f74d3866fdb78]

url:    https://github.com/0day-ci/linux/commits/Liam-Beguin/iio-afe-add-temperature-rescaling-support/20210821-032112
base:   6cbb3aa0f9d5d23221df787cf36f74d3866fdb78
config: openrisc-randconfig-m031-20210821 (attached as .config)
compiler: or1k-linux-gcc (GCC) 11.2.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

smatch warnings:
drivers/iio/afe/iio-rescale.c:56 rescale_process_scale() warn: inconsistent indenting

vim +56 drivers/iio/afe/iio-rescale.c

    20	
    21	int rescale_process_scale(struct rescale *rescale, int scale_type,
    22				  int *val, int *val2)
    23	{
    24		s64 tmp;
    25		s32 rem, rem2;
    26		u32 mult;
    27		u32 neg;
    28	
    29		switch (scale_type) {
    30		case IIO_VAL_INT:
    31			*val *= rescale->numerator;
    32			if (rescale->denominator == 1)
    33				return scale_type;
    34			*val2 = rescale->denominator;
    35			return IIO_VAL_FRACTIONAL;
    36		case IIO_VAL_FRACTIONAL:
    37		case IIO_VAL_FRACTIONAL_LOG2:
    38			tmp = (s64)*val * 1000000000LL;
    39			tmp = div_s64(tmp, rescale->denominator);
    40			tmp *= rescale->numerator;
    41	
    42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);
    43			*val = tmp;
    44	
    45			/*
    46			 * For small values, the approximation can be costly,
    47			 * change scale type to maintain accuracy.
    48			 *
    49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.
    50			 */
    51			if (scale_type == IIO_VAL_FRACTIONAL)
    52				tmp = *val2;
    53			else
    54				tmp = 1 << *val2;
    55	
  > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {
    57				 *val = div_s64_rem(*val, tmp, &rem2);
    58	
    59				 *val2 = div_s64(rem, tmp);
    60				 if (rem2)
    61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);
    62	
    63				 return IIO_VAL_INT_PLUS_NANO;
    64			 }
    65	
    66			return scale_type;
    67		case IIO_VAL_INT_PLUS_NANO:
    68		case IIO_VAL_INT_PLUS_MICRO:
    69			if (scale_type == IIO_VAL_INT_PLUS_NANO)
    70				mult = 1000000000LL;
    71			else
    72				mult = 1000000LL;
    73			/*
    74			 * For IIO_VAL_INT_PLUS_{MICRO,NANO} scale types if *val OR
    75			 * *val2 is negative the schan scale is negative
    76			 */
    77			neg = *val < 0 || *val2 < 0;
    78	
    79			tmp = (s64)abs(*val) * abs(rescale->numerator);
    80			*val = div_s64_rem(tmp, abs(rescale->denominator), &rem);
    81	
    82			tmp = (s64)rem * mult + (s64)abs(*val2) * abs(rescale->numerator);
    83			tmp = div_s64(tmp, abs(rescale->denominator));
    84	
    85			*val += div_s64_rem(tmp, mult, val2);
    86	
    87			/*
    88			 * If only one of the rescaler elements or the schan scale is
    89			 * negative, the combined scale is negative.
    90			 */
    91			if (neg ^ ((rescale->numerator < 0) ^ (rescale->denominator < 0))) {
    92				if (*val)
    93					*val = -*val;
    94				else
    95					*val2 = -*val2;
    96			}
    97	
    98			return scale_type;
    99		default:
   100			return -EOPNOTSUPP;
   101		}
   102	}
   103	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Aug. 21, 2021, 7:21 a.m. UTC | #2
Hi Liam,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on 6cbb3aa0f9d5d23221df787cf36f74d3866fdb78]

url:    https://github.com/0day-ci/linux/commits/Liam-Beguin/iio-afe-add-temperature-rescaling-support/20210821-032112
base:   6cbb3aa0f9d5d23221df787cf36f74d3866fdb78
config: i386-randconfig-r026-20210821 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/e5c2e1505fa3f8cf9fe6d3a21f3a5c585efc6dce
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Liam-Beguin/iio-afe-add-temperature-rescaling-support/20210821-032112
        git checkout e5c2e1505fa3f8cf9fe6d3a21f3a5c585efc6dce
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   ld: drivers/iio/afe/iio-rescale.o: in function `rescale_process_scale':
>> drivers/iio/afe/iio-rescale.c:56: undefined reference to `__divdi3'


vim +56 drivers/iio/afe/iio-rescale.c

    20	
    21	int rescale_process_scale(struct rescale *rescale, int scale_type,
    22				  int *val, int *val2)
    23	{
    24		s64 tmp;
    25		s32 rem, rem2;
    26		u32 mult;
    27		u32 neg;
    28	
    29		switch (scale_type) {
    30		case IIO_VAL_INT:
    31			*val *= rescale->numerator;
    32			if (rescale->denominator == 1)
    33				return scale_type;
    34			*val2 = rescale->denominator;
    35			return IIO_VAL_FRACTIONAL;
    36		case IIO_VAL_FRACTIONAL:
    37		case IIO_VAL_FRACTIONAL_LOG2:
    38			tmp = (s64)*val * 1000000000LL;
    39			tmp = div_s64(tmp, rescale->denominator);
    40			tmp *= rescale->numerator;
    41	
    42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);
    43			*val = tmp;
    44	
    45			/*
    46			 * For small values, the approximation can be costly,
    47			 * change scale type to maintain accuracy.
    48			 *
    49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.
    50			 */
    51			if (scale_type == IIO_VAL_FRACTIONAL)
    52				tmp = *val2;
    53			else
    54				tmp = 1 << *val2;
    55	
  > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {
    57				 *val = div_s64_rem(*val, tmp, &rem2);
    58	
    59				 *val2 = div_s64(rem, tmp);
    60				 if (rem2)
    61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);
    62	
    63				 return IIO_VAL_INT_PLUS_NANO;
    64			 }
    65	
    66			return scale_type;
    67		case IIO_VAL_INT_PLUS_NANO:
    68		case IIO_VAL_INT_PLUS_MICRO:
    69			if (scale_type == IIO_VAL_INT_PLUS_NANO)
    70				mult = 1000000000LL;
    71			else
    72				mult = 1000000LL;
    73			/*
    74			 * For IIO_VAL_INT_PLUS_{MICRO,NANO} scale types if *val OR
    75			 * *val2 is negative the schan scale is negative
    76			 */
    77			neg = *val < 0 || *val2 < 0;
    78	
    79			tmp = (s64)abs(*val) * abs(rescale->numerator);
    80			*val = div_s64_rem(tmp, abs(rescale->denominator), &rem);
    81	
    82			tmp = (s64)rem * mult + (s64)abs(*val2) * abs(rescale->numerator);
    83			tmp = div_s64(tmp, abs(rescale->denominator));
    84	
    85			*val += div_s64_rem(tmp, mult, val2);
    86	
    87			/*
    88			 * If only one of the rescaler elements or the schan scale is
    89			 * negative, the combined scale is negative.
    90			 */
    91			if (neg ^ ((rescale->numerator < 0) ^ (rescale->denominator < 0))) {
    92				if (*val)
    93					*val = -*val;
    94				else
    95					*val2 = -*val2;
    96			}
    97	
    98			return scale_type;
    99		default:
   100			return -EOPNOTSUPP;
   101		}
   102	}
   103	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Peter Rosin Aug. 22, 2021, 10:18 p.m. UTC | #3
[I started to write an answer to your plans in the v7 thread, but didn't
have time to finish before v8 appeared...]

On 2021-08-20 21:17, Liam Beguin wrote:
> From: Liam Beguin <lvb@xiphos.com>

> 

> The approximation caused by integer divisions can be costly on smaller

> scale values since the decimal part is significant compared to the

> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> cases to maintain accuracy.


The conversion to int-plus-nano may also carry a cost of accuracy.

90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,
but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more
digits). So, in this case you lose precision with the new code.

Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old
code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

I'm also wondering if it is wise to not always return the same scale type?
What happens if we want to extend this driver to scale a buffered channel?
Honest question! I don't know, but I fear that this patch may make that
step more difficult to take??

Jonathan, do you have any input on that?

Some more examples of problematic properties of this patch:

21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.
But if you reduce the input number, gcd(21837, 24041) -> 29, you have:
753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in
this case you get 0.01568154403. Which is less precise. It is unfortunate
that input that should be easier to scale may yield worse results.

760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is
returned. Which is perhaps not great accuracy, but such is life. However.
761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course
greater, but 8.6e-8 is returned. Which is less than what was returned for
the smaller 760/1373754273 value above.

Some of these objections are related to what I talked about in v7, i.e.:

    Also, changing the calculation so that you get more precision whenever that is
    possible feels dangerous. I fear linearity breaks and that bigger input cause
    smaller output due to rounding if the bigger value has to be rounded down, but
    that this isn't done carefully enough. I.e. attempting to return an exact
    fraction and only falling back to the old code when that is not possible is
    still not safe since the old code isn't careful enough about rounding. I think
    it is really important that bigger input cause bigger (or equal) output.
    Otherwise you might trigger instability in feedback loops should a rescaler be
    involved in a some regulator function.

Sadly, I see no elegant solution to your problem.

One way forward may be to somehow provide information on the expected
input range, and then determine the scaling method based on that
instead of the individual values. But, as indicated, there's no real
elegance in that. It can't be automated...

> Signed-off-by: Liam Beguin <lvb@xiphos.com>

> ---

>  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

>  1 file changed, 25 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

> index c408c4057c08..7304306c9806 100644

> --- a/drivers/iio/afe/iio-rescale.c

> +++ b/drivers/iio/afe/iio-rescale.c

> @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

>  			  int *val, int *val2)

>  {

>  	s64 tmp;

> -	s32 rem;

> +	s32 rem, rem2;

>  	u32 mult;

>  	u32 neg;

>  

> @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

>  		tmp = (s64)*val * 1000000000LL;

>  		tmp = div_s64(tmp, rescale->denominator);

>  		tmp *= rescale->numerator;

> -		tmp = div_s64(tmp, 1000000000LL);

> +

> +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

>  		*val = tmp;

> +

> +		/*

> +		 * For small values, the approximation can be costly,

> +		 * change scale type to maintain accuracy.

> +		 *

> +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> +		 */

> +		if (scale_type == IIO_VAL_FRACTIONAL)

> +			tmp = *val2;

> +		else

> +			tmp = 1 << *val2;

> +

> +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> +			 *val = div_s64_rem(*val, tmp, &rem2);

> +

> +			 *val2 = div_s64(rem, tmp);

> +			 if (rem2)

> +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);


rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine
where 64-bit arithmetic is really expensive? In that case, the above
is broken. The safe route is to do these things as in the existing
code with a cast to s64. But maybe that's just cargo cult crap?

Cheers,
Peter

> +

> +			 return IIO_VAL_INT_PLUS_NANO;

> +		 }

> +

>  		return scale_type;

>  	case IIO_VAL_INT_PLUS_NANO:

>  	case IIO_VAL_INT_PLUS_MICRO:

>
Peter Rosin Aug. 23, 2021, 6:53 a.m. UTC | #4
On 2021-08-21 03:33, Liam Beguin wrote:
> On Fri Aug 20, 2021 at 7:37 PM EDT, kernel test robot wrote:

>> Hi Liam,

>>

>> Thank you for the patch! Yet something to improve:

>>

>> [auto build test ERROR on 6cbb3aa0f9d5d23221df787cf36f74d3866fdb78]

>>

>> url:

>> https://github.com/0day-ci/linux/commits/Liam-Beguin/iio-afe-add-temperature-rescaling-support/20210821-032112

>> base: 6cbb3aa0f9d5d23221df787cf36f74d3866fdb78

>> config: nds32-buildonly-randconfig-r005-20210821 (attached as .config)

>> compiler: nds32le-linux-gcc (GCC) 11.2.0

>> reproduce (this is a W=1 build):

>> wget

>> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross

>> -O ~/bin/make.cross

>> chmod +x ~/bin/make.cross

>> #

>> https://github.com/0day-ci/linux/commit/e5c2e1505fa3f8cf9fe6d3a21f3a5c585efc6dce

>> git remote add linux-review https://github.com/0day-ci/linux

>> git fetch --no-tags linux-review

>> Liam-Beguin/iio-afe-add-temperature-rescaling-support/20210821-032112

>> git checkout e5c2e1505fa3f8cf9fe6d3a21f3a5c585efc6dce

>> # save the attached .config to linux build tree

>> mkdir build_dir

>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross

>> O=build_dir ARCH=nds32 SHELL=/bin/bash

>>

>> If you fix the issue, kindly add following tag as appropriate

>> Reported-by: kernel test robot <lkp@intel.com>

>>

>> All errors (new ones prefixed by >>):

>>

>> nds32le-linux-ld: drivers/iio/afe/iio-rescale.o: in function

>> `rescale_process_scale':

>>>> iio-rescale.c:(.text+0x5f4): undefined reference to `__divdi3'

>>>> nds32le-linux-ld: iio-rescale.c:(.text+0x5f8): undefined reference to `__divdi3'

> 

> My mistake, I'll replace the division by a div64_s64().

> 

> --- a/drivers/iio/afe/iio-rescale.c

> +++ b/drivers/iio/afe/iio-rescale.c

> @@ -53,7 +53,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

>  		else

>  			tmp = 1 << *val2;

>  

> -		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> +		 if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {

>  			*val = div_s64_rem(*val, tmp, &rem2);

>  

>  			*val2 = div_s64(rem, tmp);

> 


At this point in the code, you know that tmp is a 32-bit value. *val
and rem are also 32-bit. It feels like a waste to not exploit that fact
and spend cycles doing lots of pointless 64-bit math on weakish 32-bit
machines.

Cheers,
Peter

> 

> The if statement is also misaligned here. I'll fix that too.

> 

> Thanks,

> Liam

> 

>>

>> ---

>> 0-DAY CI Kernel Test Service, Intel Corporation

>> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

>
Liam Beguin Aug. 24, 2021, 8:28 p.m. UTC | #5
On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:
> [I started to write an answer to your plans in the v7 thread, but didn't

> have time to finish before v8 appeared...]

> 

> On 2021-08-20 21:17, Liam Beguin wrote:

> > From: Liam Beguin <lvb@xiphos.com>

> > 

> > The approximation caused by integer divisions can be costly on smaller

> > scale values since the decimal part is significant compared to the

> > integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> > cases to maintain accuracy.

> 


Hi Peter,

Thanks for taking time to look at this in detail again. I really
appreciate all the feedback you've provided.

> The conversion to int-plus-nano may also carry a cost of accuracy.

> 

> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> digits). So, in this case you lose precision with the new code.

> 

> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> 


I see what you mean here.
I added test cases with these values to see exactly what we get.

Expected rel_ppm < 0, but
    rel_ppm == 1000000

     real=0.000000000
 expected=0.000000033594
# iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509
Expected rel_ppm < 0, but
    rel_ppm == 1000000

     real=0.000000000
 expected=0.000000050318
# iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000


The main issue is that the first two examples return 0 which night be worst
that loosing a little precision.

At the same time, I wonder how "real" these values would be. Having such a
small scale would mean having a large raw value. With 16-bits of resolution,
that would still give about (1 << 16) * 3.3594e-08 = 0.002201616 mV.

We could try to get more precision out of the first division

	tmp = (s64)*val * 1000000000LL;
	tmp = div_s64(tmp, rescale->denominator);
	tmp *= rescale->numerator;
	tmp = div_s64_rem(tmp, 1000000000LL, &rem);

But then, we'd be more likely to overflow. What would be a good middle
ground?

> I'm also wondering if it is wise to not always return the same scale type?

> What happens if we want to extend this driver to scale a buffered channel?

> Honest question! I don't know, but I fear that this patch may make that

> step more difficult to take??


That's a fair point, I didn't know it could be a problem to change
scale.

> 

> Jonathan, do you have any input on that?

> 

> Some more examples of problematic properties of this patch:

> 

> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

> this case you get 0.01568154403. Which is less precise. It is unfortunate

> that input that should be easier to scale may yield worse results.


Expected rel_ppm < 0, but
    rel_ppm == 0

     real=0.015685445
 expected=0.015685446719
# iio_rescale_test_scale: not ok 44 - v8 - 21837/24041 scaled by 427/24727
Expected rel_ppm < 0, but
    rel_ppm == 0

     real=0.015685445
 expected=0.015685446719
# iio_rescale_test_scale: not ok 45 - v8 - 753/829 scaled by 427/24727

It seems like both cases are rounded and give the same result. I do get
your point though, values that could be simplified might loose more
precision because of this change in scale type.

> 

> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

> returned. Which is perhaps not great accuracy, but such is life. However.

> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

> greater, but 8.6e-8 is returned. Which is less than what was returned for

> the smaller 760/1373754273 value above.


Expected rel_ppm < 0, but
    rel_ppm == 1000000

     real=0.000000000
 expected=0.000000086626
# iio_rescale_test_scale: not ok 46 - v8 - 760/1373754273 scaled by 427/2727
Expected rel_ppm < 0, but
    rel_ppm == 1000000

     real=0.000000000
 expected=0.000000086740
# iio_rescale_test_scale: not ok 47 - v8 - 761/1373754273 scaled by 427/2727

We fall into the same case as the first two examples where the real value is
null.

Considering these null values and the possible issue of not always having the
same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO
scale?

> 

> Some of these objections are related to what I talked about in v7, i.e.:

> 

>     Also, changing the calculation so that you get more precision whenever that is

>     possible feels dangerous. I fear linearity breaks and that bigger input cause

>     smaller output due to rounding if the bigger value has to be rounded down, but

>     that this isn't done carefully enough. I.e. attempting to return an exact

>     fraction and only falling back to the old code when that is not possible is

>     still not safe since the old code isn't careful enough about rounding. I think

>     it is really important that bigger input cause bigger (or equal) output.

>     Otherwise you might trigger instability in feedback loops should a rescaler be

>     involved in a some regulator function.


I think I didn't read this closely enought the first time around. I agree that
bigger inputs should cause bigger outputs, especially with these rounding
errors. My original indention was to have all scales withing a tight margin,
that's why I ended up going with ppm for the test cases.

> 

> Sadly, I see no elegant solution to your problem.

> 

> One way forward may be to somehow provide information on the expected

> input range, and then determine the scaling method based on that

> instead of the individual values. But, as indicated, there's no real

> elegance in that. It can't be automated...


I guess the issue with that is that unless it's a user parameter, we're
always going go have these little islands you mentioned in v7...

Would it be viable to guaranty a MICRO precision instead of NANO, and
not have the range parameter?

> 

> > Signed-off-by: Liam Beguin <lvb@xiphos.com>

> > ---

> >  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

> >  1 file changed, 25 insertions(+), 2 deletions(-)

> > 

> > diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

> > index c408c4057c08..7304306c9806 100644

> > --- a/drivers/iio/afe/iio-rescale.c

> > +++ b/drivers/iio/afe/iio-rescale.c

> > @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >  			  int *val, int *val2)

> >  {

> >  	s64 tmp;

> > -	s32 rem;

> > +	s32 rem, rem2;

> >  	u32 mult;

> >  	u32 neg;

> >  

> > @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >  		tmp = (s64)*val * 1000000000LL;

> >  		tmp = div_s64(tmp, rescale->denominator);

> >  		tmp *= rescale->numerator;

> > -		tmp = div_s64(tmp, 1000000000LL);

> > +

> > +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> >  		*val = tmp;

> > +

> > +		/*

> > +		 * For small values, the approximation can be costly,

> > +		 * change scale type to maintain accuracy.

> > +		 *

> > +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> > +		 */

> > +		if (scale_type == IIO_VAL_FRACTIONAL)

> > +			tmp = *val2;

> > +		else

> > +			tmp = 1 << *val2;

> > +

> > +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> > +			 *val = div_s64_rem(*val, tmp, &rem2);

> > +

> > +			 *val2 = div_s64(rem, tmp);

> > +			 if (rem2)

> > +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);

> 

> rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine

> where 64-bit arithmetic is really expensive? In that case, the above

> is broken. The safe route is to do these things as in the existing

> code with a cast to s64. But maybe that's just cargo cult crap?


You're right, this should be

	div_s64((s64)rem2 * 1000000000LL, tmp);

I've been trying th get the kunit tests running on a 32-bit kernel image, but
I'm still having issues with that...

Thanks,
Liam

> 

> Cheers,

> Peter

> 

> > +

> > +			 return IIO_VAL_INT_PLUS_NANO;

> > +		 }

> > +

> >  		return scale_type;

> >  	case IIO_VAL_INT_PLUS_NANO:

> >  	case IIO_VAL_INT_PLUS_MICRO:

> > 

>
Peter Rosin Aug. 26, 2021, 9:53 a.m. UTC | #6
On 2021-08-24 22:28, Liam Beguin wrote:
> On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:

>> [I started to write an answer to your plans in the v7 thread, but didn't

>> have time to finish before v8 appeared...]

>>

>> On 2021-08-20 21:17, Liam Beguin wrote:

>>> From: Liam Beguin <lvb@xiphos.com>

>>>

>>> The approximation caused by integer divisions can be costly on smaller

>>> scale values since the decimal part is significant compared to the

>>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

>>> cases to maintain accuracy.

>>

> 

> Hi Peter,

> 

> Thanks for taking time to look at this in detail again. I really

> appreciate all the feedback you've provided.

> 

>> The conversion to int-plus-nano may also carry a cost of accuracy.

>>

>> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

>> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

>> digits). So, in this case you lose precision with the new code.

>>

>> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

>> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

>>

> 

> I see what you mean here.

> I added test cases with these values to see exactly what we get.


Excellent!

> 

> Expected rel_ppm < 0, but

>     rel_ppm == 1000000

> 

>      real=0.000000000

>  expected=0.000000033594

> # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

> Expected rel_ppm < 0, but

>     rel_ppm == 1000000

> 

>      real=0.000000000

>  expected=0.000000050318

> # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

> 

> 

> The main issue is that the first two examples return 0 which night be worst

> that loosing a little precision.


They shouldn't return zero?

Here's the new code quoted from the test robot (and assuming
a 64-bit machine, thus ignoring the 32-bit problem on line 56).

    36		case IIO_VAL_FRACTIONAL:
    37		case IIO_VAL_FRACTIONAL_LOG2:
    38			tmp = (s64)*val * 1000000000LL;
    39			tmp = div_s64(tmp, rescale->denominator);
    40			tmp *= rescale->numerator;
    41	
    42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);
    43			*val = tmp;
    44	
    45			/*
    46			 * For small values, the approximation can be costly,
    47			 * change scale type to maintain accuracy.
    48			 *
    49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.
    50			 */
    51			if (scale_type == IIO_VAL_FRACTIONAL)
    52				tmp = *val2;
    53			else
    54				tmp = 1 << *val2;
    55	
  > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

    57				 *val = div_s64_rem(*val, tmp, &rem2);
    58	
    59				 *val2 = div_s64(rem, tmp);
    60				 if (rem2)
    61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);
    62	
    63				 return IIO_VAL_INT_PLUS_NANO;
    64			 }
    65	
    66			return scale_type;

When I go through the above manually, I get:

line 
38: tmp = 90000000000    ; 90 * 1000000000
39: tmp = 176817288      ; 90000000000 / 509
40: tmp = 46149312168    ; 176817288 * 261
42: rem = 149312168      ; 46149312168 % 1000000000
    tmp = 46             ; 46149312168 / 1000000000
43: *val = 46
51: if (<fractional>) [yes]
52: tmp = 1373754273
56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]
57: rem2 = 46            ; 46 % 1373754273
    *val = 0             ; 46 / 1373754273
59: *val2 = 0            ; 149312168 / 1373754273
60: if 46 [yes]
61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273
63: return <int-plus-nano> [0.000000033]

and

line 
38: tmp = 100000000000   ; 100 * 1000000000
39: tmp = 14285714       ; 100000000000 / 7000
40: tmp = 54028570348    ; 176817288 * 3782
42: rem = 28570348       ; 54028570348 % 1000000000
    tmp = 54             ; 54028570348 / 1000000000
43: *val = 54
51: if (<fractional>) [no]
54: tmp = 1073741824     ; 1 << 30
56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]
57: rem2 = 54            ; 54 % 1073741824
    *val = 0             ; 54 / 1073741824
59: *val2 = 0            ; 28570348 / 1073741824
60: if 46 [yes]
61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824
63: return <int-plus-nano> [0.000000050]

Why do you get zero, what am I missing?

> At the same time, I wonder how "real" these values would be. Having such a

> small scale would mean having a large raw value. With 16-bits of resolution,

> that would still give about (1 << 16) * 3.3594e-08 = 0.002201616 mV.


If we cap at 16 bits it sounds as if we probably erase some precision
provided by 24-bit ADCs. We have drivers for those. I didn't really
dig that deep in the driver offerings, but I did find a AD7177 ADC
(but no driver) which is 32-bit. If we don't have any 32-bit ADC driver
yet, it's only a matter of time, methinks. I have personally worked
with 24-bit DACs, and needed every last bit...

> We could try to get more precision out of the first division

> 

> 	tmp = (s64)*val * 1000000000LL;

> 	tmp = div_s64(tmp, rescale->denominator);

> 	tmp *= rescale->numerator;

> 	tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> 

> But then, we'd be more likely to overflow. What would be a good middle

> ground?


I don't think we can settle for anything that makes any existing case
worse. That's a regression waiting to happen, and what to do then?

>> I'm also wondering if it is wise to not always return the same scale type?

>> What happens if we want to extend this driver to scale a buffered channel?

>> Honest question! I don't know, but I fear that this patch may make that

>> step more difficult to take??

> 

> That's a fair point, I didn't know it could be a problem to change

> scale.


I don't *know* either? But it would not be completely alien to me if
the buffered case assumes "raw" numbers, and that there is little room
for "meta-data" with each sample.

>>

>> Jonathan, do you have any input on that?

>>

>> Some more examples of problematic properties of this patch:

>>

>> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

>> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

>> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

>> this case you get 0.01568154403. Which is less precise. It is unfortunate

>> that input that should be easier to scale may yield worse results.

> 

> Expected rel_ppm < 0, but

>     rel_ppm == 0

> 

>      real=0.015685445

>  expected=0.015685446719

> # iio_rescale_test_scale: not ok 44 - v8 - 21837/24041 scaled by 427/24727

> Expected rel_ppm < 0, but

>     rel_ppm == 0

> 

>      real=0.015685445

>  expected=0.015685446719

> # iio_rescale_test_scale: not ok 45 - v8 - 753/829 scaled by 427/24727

> 

> It seems like both cases are rounded and give the same result. I do get

> your point though, values that could be simplified might loose more

> precision because of this change in scale type.


I aimed at this:

line
38: tmp = 21837000000000 ; 21837 * 1000000000
39: tmp = 883123710      ; 21837000000000 / 24727
40: tmp = 377093824170   ; 883123710 * 427
42: rem = 93824170       ; 377093824170 % 1000000000
    tmp = 377            ; 377093824170 / 1000000000
43: *val = 377
51: if (<fractional>) [yes]
52: tmp = 24041
56: if (149312168 > 10000000 && 377/24041 < 100) [yes && yes]
57: rem2 = 377           ; 377 % 24041
    *val = 0             ; 377 / 24041
59: *val2 = 3902         ; 93824170 / 24041
60: if 377 [yes]
61: *val2 = 15685446     ; 3902 + 377 * 1000000000 / 24041
63: return <int-plus-nano> [0.0015685446]

Why does the test output a 5 at the end and not a 6? It's all
integer arithmetic so there is no room for rounding issues.

and

line 
38: tmp = 753000000000   ; 753 * 1000000000
39: tmp = 30452541       ; 753000000000 / 24727
40: tmp = 13003235007    ; 30452541 * 427
42: rem = 3235007        ; 13003235007 % 1000000000
    tmp = 13             ; 13003235007 / 1000000000
43: *val = 13
51: if (<fractional>) [yes]
52: tmp = 829
56: if (3235007 > 10000000 && 13/829 < 100) [no && yes]
66: return <fractional> [13/829 ~= 0.015681544]

0.015681544 is pretty different from 0.015685446

Again, I don't understand what's going on.

>>

>> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

>> returned. Which is perhaps not great accuracy, but such is life. However.

>> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

>> greater, but 8.6e-8 is returned. Which is less than what was returned for

>> the smaller 760/1373754273 value above.

> 

> Expected rel_ppm < 0, but

>     rel_ppm == 1000000

> 

>      real=0.000000000

>  expected=0.000000086626

> # iio_rescale_test_scale: not ok 46 - v8 - 760/1373754273 scaled by 427/2727

> Expected rel_ppm < 0, but

>     rel_ppm == 1000000

> 

>      real=0.000000000

>  expected=0.000000086740

> # iio_rescale_test_scale: not ok 47 - v8 - 761/1373754273 scaled by 427/2727

> 

> We fall into the same case as the first two examples where the real value is

> null.


I aimed at

line
38: tmp = 760000000000   ; 760 * 1000000000
39: tmp = 278694536      ; 760000000000 / 2727
40: tmp = 119002566872   ; 278694536 * 427
42: rem = 2566872        ; 119002566872 % 1000000000
    tmp = 119            ; 119002566872 / 1000000000
43: *val = 119
51: if (<fractional>) [yes]
52: tmp = 1373754273
56: if (2566872 > 10000000 && 119/1373754273 < 100) [no && yes]
66: return <fractional> [119/1373754273 ~= 0.000000086624]

and

line
38: tmp = 761000000000   ; 761 * 1000000000
39: tmp = 279061239      ; 761000000000 / 2727
40: tmp = 119159149053   ; 279061239 * 427
42: rem = 159149053      ; 119159149053 % 1000000000
    tmp = 119            ; 119159149053 / 1000000000
43: *val = 119
51: if (<fractional>) [yes]
52: tmp = 1373754273
56: if (159149053 > 10000000 && 119/1373754273 < 100) [yes && yes]
57: rem2 = 119           ; 119 % 1373754273
    *val = 0             ; 119 / 1373754273
59: *val2 = 0            ; 159149053 / 1373754273
60: if 119 [yes]
61: *val2 = 86           ; 0 + 119 * 1000000000 / 1373754273
63: return <int-plus-nano> [0.000000086]

> Considering these null values and the possible issue of not always having the

> same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

> scale?


No, that absolutely kills the precision for small values that are much
better off as-is. The closer you get to zero, the more the conversion
to int-plus-nano hurts, relatively speaking.

>>

>> Some of these objections are related to what I talked about in v7, i.e.:

>>

>>     Also, changing the calculation so that you get more precision whenever that is

>>     possible feels dangerous. I fear linearity breaks and that bigger input cause

>>     smaller output due to rounding if the bigger value has to be rounded down, but

>>     that this isn't done carefully enough. I.e. attempting to return an exact

>>     fraction and only falling back to the old code when that is not possible is

>>     still not safe since the old code isn't careful enough about rounding. I think

>>     it is really important that bigger input cause bigger (or equal) output.

>>     Otherwise you might trigger instability in feedback loops should a rescaler be

>>     involved in a some regulator function.

> 

> I think I didn't read this closely enought the first time around. I agree that

> bigger inputs should cause bigger outputs, especially with these rounding

> errors. My original indention was to have all scales withing a tight margin,

> that's why I ended up going with ppm for the test cases.

> 

>>

>> Sadly, I see no elegant solution to your problem.

>>

>> One way forward may be to somehow provide information on the expected

>> input range, and then determine the scaling method based on that

>> instead of the individual values. But, as indicated, there's no real

>> elegance in that. It can't be automated...

> 

> I guess the issue with that is that unless it's a user parameter, we're

> always going go have these little islands you mentioned in v7...

> 

> Would it be viable to guaranty a MICRO precision instead of NANO, and

> not have the range parameter?


I don't get what you mean here? Returning int-plus-micro can't be it,
since that would be completely pointless and only make it easier to
trigger accuracy problems of the conversion. However, I feel that any
attempt to shift digits but still having the same general approch will
just change the size and position of the islands, and thus not fix the
fundamental problematic border between land and water.

Cheers,
Peter

>>

>>> Signed-off-by: Liam Beguin <lvb@xiphos.com>

>>> ---

>>>  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

>>>  1 file changed, 25 insertions(+), 2 deletions(-)

>>>

>>> diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

>>> index c408c4057c08..7304306c9806 100644

>>> --- a/drivers/iio/afe/iio-rescale.c

>>> +++ b/drivers/iio/afe/iio-rescale.c

>>> @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

>>>  			  int *val, int *val2)

>>>  {

>>>  	s64 tmp;

>>> -	s32 rem;

>>> +	s32 rem, rem2;

>>>  	u32 mult;

>>>  	u32 neg;

>>>  

>>> @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

>>>  		tmp = (s64)*val * 1000000000LL;

>>>  		tmp = div_s64(tmp, rescale->denominator);

>>>  		tmp *= rescale->numerator;

>>> -		tmp = div_s64(tmp, 1000000000LL);

>>> +

>>> +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

>>>  		*val = tmp;

>>> +

>>> +		/*

>>> +		 * For small values, the approximation can be costly,

>>> +		 * change scale type to maintain accuracy.

>>> +		 *

>>> +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

>>> +		 */

>>> +		if (scale_type == IIO_VAL_FRACTIONAL)

>>> +			tmp = *val2;

>>> +		else

>>> +			tmp = 1 << *val2;

>>> +

>>> +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

>>> +			 *val = div_s64_rem(*val, tmp, &rem2);

>>> +

>>> +			 *val2 = div_s64(rem, tmp);

>>> +			 if (rem2)

>>> +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);

>>

>> rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine

>> where 64-bit arithmetic is really expensive? In that case, the above

>> is broken. The safe route is to do these things as in the existing

>> code with a cast to s64. But maybe that's just cargo cult crap?

> 

> You're right, this should be

> 

> 	div_s64((s64)rem2 * 1000000000LL, tmp);

> 

> I've been trying th get the kunit tests running on a 32-bit kernel image, but

> I'm still having issues with that...

> 

> Thanks,

> Liam

> 

>>

>> Cheers,

>> Peter

>>

>>> +

>>> +			 return IIO_VAL_INT_PLUS_NANO;

>>> +		 }

>>> +

>>>  		return scale_type;

>>>  	case IIO_VAL_INT_PLUS_NANO:

>>>  	case IIO_VAL_INT_PLUS_MICRO:

>>>

>>
Liam Beguin Aug. 29, 2021, 4:41 a.m. UTC | #7
On Thu, Aug 26, 2021 at 11:53:14AM +0200, Peter Rosin wrote:
> On 2021-08-24 22:28, Liam Beguin wrote:

> > On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:

> >> [I started to write an answer to your plans in the v7 thread, but didn't

> >> have time to finish before v8 appeared...]

> >>

> >> On 2021-08-20 21:17, Liam Beguin wrote:

> >>> From: Liam Beguin <lvb@xiphos.com>

> >>>

> >>> The approximation caused by integer divisions can be costly on smaller

> >>> scale values since the decimal part is significant compared to the

> >>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> >>> cases to maintain accuracy.

> >>

> > 

> > Hi Peter,

> > 

> > Thanks for taking time to look at this in detail again. I really

> > appreciate all the feedback you've provided.

> > 

> >> The conversion to int-plus-nano may also carry a cost of accuracy.

> >>

> >> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> >> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> >> digits). So, in this case you lose precision with the new code.

> >>

> >> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> >> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> >>

> > 

> > I see what you mean here.

> > I added test cases with these values to see exactly what we get.

> 

> Excellent!

> 

> > 

> > Expected rel_ppm < 0, but

> >     rel_ppm == 1000000

> > 

> >      real=0.000000000

> >  expected=0.000000033594

> > # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

> > Expected rel_ppm < 0, but

> >     rel_ppm == 1000000

> > 

> >      real=0.000000000

> >  expected=0.000000050318

> > # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

> > 

> > 

> > The main issue is that the first two examples return 0 which night be worst

> > that loosing a little precision.

> 

> They shouldn't return zero?

> 

> Here's the new code quoted from the test robot (and assuming

> a 64-bit machine, thus ignoring the 32-bit problem on line 56).

> 

>     36		case IIO_VAL_FRACTIONAL:

>     37		case IIO_VAL_FRACTIONAL_LOG2:

>     38			tmp = (s64)*val * 1000000000LL;

>     39			tmp = div_s64(tmp, rescale->denominator);

>     40			tmp *= rescale->numerator;

>     41	

>     42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);

>     43			*val = tmp;

>     44	

>     45			/*

>     46			 * For small values, the approximation can be costly,

>     47			 * change scale type to maintain accuracy.

>     48			 *

>     49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

>     50			 */

>     51			if (scale_type == IIO_VAL_FRACTIONAL)

>     52				tmp = *val2;

>     53			else

>     54				tmp = 1 << *val2;

>     55	

>   > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

>     57				 *val = div_s64_rem(*val, tmp, &rem2);

>     58	

>     59				 *val2 = div_s64(rem, tmp);

>     60				 if (rem2)

>     61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);

>     62	

>     63				 return IIO_VAL_INT_PLUS_NANO;

>     64			 }

>     65	

>     66			return scale_type;

> 

> When I go through the above manually, I get:

> 

> line 

> 38: tmp = 90000000000    ; 90 * 1000000000

> 39: tmp = 176817288      ; 90000000000 / 509

> 40: tmp = 46149312168    ; 176817288 * 261

> 42: rem = 149312168      ; 46149312168 % 1000000000

>     tmp = 46             ; 46149312168 / 1000000000

> 43: *val = 46

> 51: if (<fractional>) [yes]

> 52: tmp = 1373754273

> 56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]

> 57: rem2 = 46            ; 46 % 1373754273

>     *val = 0             ; 46 / 1373754273

> 59: *val2 = 0            ; 149312168 / 1373754273

> 60: if 46 [yes]

> 61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273

> 63: return <int-plus-nano> [0.000000033]

> 

> and

> 

> line 

> 38: tmp = 100000000000   ; 100 * 1000000000

> 39: tmp = 14285714       ; 100000000000 / 7000

> 40: tmp = 54028570348    ; 176817288 * 3782

> 42: rem = 28570348       ; 54028570348 % 1000000000

>     tmp = 54             ; 54028570348 / 1000000000

> 43: *val = 54

> 51: if (<fractional>) [no]

> 54: tmp = 1073741824     ; 1 << 30

> 56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]

> 57: rem2 = 54            ; 54 % 1073741824

>     *val = 0             ; 54 / 1073741824

> 59: *val2 = 0            ; 28570348 / 1073741824

> 60: if 46 [yes]

> 61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824

> 63: return <int-plus-nano> [0.000000050]

> 

> Why do you get zero, what am I missing?


So... It turns out, I swapped schan and rescaler values which gives us:

numerator = 90
denominator = 1373754273
schan_val = 261
schan_val2 = 509

line
38: tmp = 261000000000   ; 261 * 1000000000
39: tmp = 189            ; 261000000000 / 1373754273
40: tmp = 17010          ; 189 * 90
42: rem = 17010          ; 17010 % 1000000000
    tmp = 0              ; 17010 / 1000000000
43: *val = 0
51: if (<fractional>) [yes]
52: tmp = 509
56: if (17010 > 10000000 && 0/509 < 100) [no && yes]
66: *val = 0
    *val2 = 509
    return <fractional> [0.000000000]

Swapping back the values, I get the same results as you!

Also, replacing line 56 from the snippet above with

	- if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {
	+ if (abs(rem)) {

Fixes these precision errors. It also prevents us from returning
different scales if we swap the two divisions (schan and rescaler
parameters).

> 

> > At the same time, I wonder how "real" these values would be. Having such a

> > small scale would mean having a large raw value. With 16-bits of resolution,

> > that would still give about (1 << 16) * 3.3594e-08 = 0.002201616 mV.

> 

> If we cap at 16 bits it sounds as if we probably erase some precision

> provided by 24-bit ADCs. We have drivers for those. I didn't really

> dig that deep in the driver offerings, but I did find a AD7177 ADC

> (but no driver) which is 32-bit. If we don't have any 32-bit ADC driver

> yet, it's only a matter of time, methinks. I have personally worked

> with 24-bit DACs, and needed every last bit...

> 


I was only using 16-bits as an example, but I guess you're right, these
values do start to make sense when you're looking at 24-bit and 32-bit
ADCs.

> > We could try to get more precision out of the first division

> > 

> > 	tmp = (s64)*val * 1000000000LL;

> > 	tmp = div_s64(tmp, rescale->denominator);

> > 	tmp *= rescale->numerator;

> > 	tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> > 

> > But then, we'd be more likely to overflow. What would be a good middle

> > ground?

> 

> I don't think we can settle for anything that makes any existing case

> worse. That's a regression waiting to happen, and what to do then?

> 


Agreed, and looking at this more, there's still ways to improve without
having to compromise.
Hopefully adding the test suite will make it easier to catch potential
regressions in the future :-)

> >> I'm also wondering if it is wise to not always return the same scale type?

> >> What happens if we want to extend this driver to scale a buffered channel?

> >> Honest question! I don't know, but I fear that this patch may make that

> >> step more difficult to take??

> > 

> > That's a fair point, I didn't know it could be a problem to change

> > scale.

> 

> I don't *know* either? But it would not be completely alien to me if

> the buffered case assumes "raw" numbers, and that there is little room

> for "meta-data" with each sample.

> 

> >>

> >> Jonathan, do you have any input on that?

> >>

> >> Some more examples of problematic properties of this patch:

> >>

> >> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

> >> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

> >> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

> >> this case you get 0.01568154403. Which is less precise. It is unfortunate

> >> that input that should be easier to scale may yield worse results.

> > 

> > Expected rel_ppm < 0, but

> >     rel_ppm == 0

> > 

> >      real=0.015685445

> >  expected=0.015685446719

> > # iio_rescale_test_scale: not ok 44 - v8 - 21837/24041 scaled by 427/24727

> > Expected rel_ppm < 0, but

> >     rel_ppm == 0

> > 

> >      real=0.015685445

> >  expected=0.015685446719

> > # iio_rescale_test_scale: not ok 45 - v8 - 753/829 scaled by 427/24727

> > 

> > It seems like both cases are rounded and give the same result. I do get

> > your point though, values that could be simplified might loose more

> > precision because of this change in scale type.

> 

> I aimed at this:

> 

> line

> 38: tmp = 21837000000000 ; 21837 * 1000000000

> 39: tmp = 883123710      ; 21837000000000 / 24727

> 40: tmp = 377093824170   ; 883123710 * 427

> 42: rem = 93824170       ; 377093824170 % 1000000000

>     tmp = 377            ; 377093824170 / 1000000000

> 43: *val = 377

> 51: if (<fractional>) [yes]

> 52: tmp = 24041

> 56: if (149312168 > 10000000 && 377/24041 < 100) [yes && yes]

> 57: rem2 = 377           ; 377 % 24041

>     *val = 0             ; 377 / 24041

> 59: *val2 = 3902         ; 93824170 / 24041

> 60: if 377 [yes]

> 61: *val2 = 15685446     ; 3902 + 377 * 1000000000 / 24041

> 63: return <int-plus-nano> [0.0015685446]

> 

> Why does the test output a 5 at the end and not a 6? It's all

> integer arithmetic so there is no room for rounding issues.

> 

> and

> 

> line 

> 38: tmp = 753000000000   ; 753 * 1000000000

> 39: tmp = 30452541       ; 753000000000 / 24727

> 40: tmp = 13003235007    ; 30452541 * 427

> 42: rem = 3235007        ; 13003235007 % 1000000000

>     tmp = 13             ; 13003235007 / 1000000000

> 43: *val = 13

> 51: if (<fractional>) [yes]

> 52: tmp = 829

> 56: if (3235007 > 10000000 && 13/829 < 100) [no && yes]

> 66: return <fractional> [13/829 ~= 0.015681544]

> 

> 0.015681544 is pretty different from 0.015685446

> 

> Again, I don't understand what's going on.

> 

> >>

> >> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

> >> returned. Which is perhaps not great accuracy, but such is life. However.

> >> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

> >> greater, but 8.6e-8 is returned. Which is less than what was returned for

> >> the smaller 760/1373754273 value above.

> > 

> > Expected rel_ppm < 0, but

> >     rel_ppm == 1000000

> > 

> >      real=0.000000000

> >  expected=0.000000086626

> > # iio_rescale_test_scale: not ok 46 - v8 - 760/1373754273 scaled by 427/2727

> > Expected rel_ppm < 0, but

> >     rel_ppm == 1000000

> > 

> >      real=0.000000000

> >  expected=0.000000086740

> > # iio_rescale_test_scale: not ok 47 - v8 - 761/1373754273 scaled by 427/2727

> > 

> > We fall into the same case as the first two examples where the real value is

> > null.

> 

> I aimed at

> 

> line

> 38: tmp = 760000000000   ; 760 * 1000000000

> 39: tmp = 278694536      ; 760000000000 / 2727

> 40: tmp = 119002566872   ; 278694536 * 427

> 42: rem = 2566872        ; 119002566872 % 1000000000

>     tmp = 119            ; 119002566872 / 1000000000

> 43: *val = 119

> 51: if (<fractional>) [yes]

> 52: tmp = 1373754273

> 56: if (2566872 > 10000000 && 119/1373754273 < 100) [no && yes]

> 66: return <fractional> [119/1373754273 ~= 0.000000086624]

> 

> and

> 

> line

> 38: tmp = 761000000000   ; 761 * 1000000000

> 39: tmp = 279061239      ; 761000000000 / 2727

> 40: tmp = 119159149053   ; 279061239 * 427

> 42: rem = 159149053      ; 119159149053 % 1000000000

>     tmp = 119            ; 119159149053 / 1000000000

> 43: *val = 119

> 51: if (<fractional>) [yes]

> 52: tmp = 1373754273

> 56: if (159149053 > 10000000 && 119/1373754273 < 100) [yes && yes]

> 57: rem2 = 119           ; 119 % 1373754273

>     *val = 0             ; 119 / 1373754273

> 59: *val2 = 0            ; 159149053 / 1373754273

> 60: if 119 [yes]

> 61: *val2 = 86           ; 0 + 119 * 1000000000 / 1373754273

> 63: return <int-plus-nano> [0.000000086]

> 

> > Considering these null values and the possible issue of not always having the

> > same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

> > scale?

> 

> No, that absolutely kills the precision for small values that are much

> better off as-is. The closer you get to zero, the more the conversion

> to int-plus-nano hurts, relatively speaking.


I'm not sure I understand what you mean. The point of switching to
IIO_VAL_INT_PLUS_NANO at the moment is to get more precision on small
values. Am I missing something?

> 

> >>

> >> Some of these objections are related to what I talked about in v7, i.e.:

> >>

> >>     Also, changing the calculation so that you get more precision whenever that is

> >>     possible feels dangerous. I fear linearity breaks and that bigger input cause

> >>     smaller output due to rounding if the bigger value has to be rounded down, but

> >>     that this isn't done carefully enough. I.e. attempting to return an exact

> >>     fraction and only falling back to the old code when that is not possible is

> >>     still not safe since the old code isn't careful enough about rounding. I think

> >>     it is really important that bigger input cause bigger (or equal) output.

> >>     Otherwise you might trigger instability in feedback loops should a rescaler be

> >>     involved in a some regulator function.

> > 

> > I think I didn't read this closely enought the first time around. I agree that

> > bigger inputs should cause bigger outputs, especially with these rounding

> > errors. My original indention was to have all scales withing a tight margin,

> > that's why I ended up going with ppm for the test cases.

> > 

> >>

> >> Sadly, I see no elegant solution to your problem.

> >>

> >> One way forward may be to somehow provide information on the expected

> >> input range, and then determine the scaling method based on that

> >> instead of the individual values. But, as indicated, there's no real

> >> elegance in that. It can't be automated...

> > 

> > I guess the issue with that is that unless it's a user parameter, we're

> > always going go have these little islands you mentioned in v7...

> > 

> > Would it be viable to guaranty a MICRO precision instead of NANO, and

> > not have the range parameter?

> 

> I don't get what you mean here? Returning int-plus-micro can't be it,

> since that would be completely pointless and only make it easier to

> trigger accuracy problems of the conversion. However, I feel that any

> attempt to shift digits but still having the same general approch will

> just change the size and position of the islands, and thus not fix the

> fundamental problematic border between land and water.


My apologies, discard this last comment. I was suggesting to guaranty
less precision, but consistent over the full range. I don't believe
that's a viable option.

Thanks again for your time,
Liam

> 

> Cheers,

> Peter

> 

> >>

> >>> Signed-off-by: Liam Beguin <lvb@xiphos.com>

> >>> ---

> >>>  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

> >>>  1 file changed, 25 insertions(+), 2 deletions(-)

> >>>

> >>> diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

> >>> index c408c4057c08..7304306c9806 100644

> >>> --- a/drivers/iio/afe/iio-rescale.c

> >>> +++ b/drivers/iio/afe/iio-rescale.c

> >>> @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >>>  			  int *val, int *val2)

> >>>  {

> >>>  	s64 tmp;

> >>> -	s32 rem;

> >>> +	s32 rem, rem2;

> >>>  	u32 mult;

> >>>  	u32 neg;

> >>>  

> >>> @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >>>  		tmp = (s64)*val * 1000000000LL;

> >>>  		tmp = div_s64(tmp, rescale->denominator);

> >>>  		tmp *= rescale->numerator;

> >>> -		tmp = div_s64(tmp, 1000000000LL);

> >>> +

> >>> +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> >>>  		*val = tmp;

> >>> +

> >>> +		/*

> >>> +		 * For small values, the approximation can be costly,

> >>> +		 * change scale type to maintain accuracy.

> >>> +		 *

> >>> +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> >>> +		 */

> >>> +		if (scale_type == IIO_VAL_FRACTIONAL)

> >>> +			tmp = *val2;

> >>> +		else

> >>> +			tmp = 1 << *val2;

> >>> +

> >>> +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> >>> +			 *val = div_s64_rem(*val, tmp, &rem2);

> >>> +

> >>> +			 *val2 = div_s64(rem, tmp);

> >>> +			 if (rem2)

> >>> +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);

> >>

> >> rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine

> >> where 64-bit arithmetic is really expensive? In that case, the above

> >> is broken. The safe route is to do these things as in the existing

> >> code with a cast to s64. But maybe that's just cargo cult crap?

> > 

> > You're right, this should be

> > 

> > 	div_s64((s64)rem2 * 1000000000LL, tmp);

> > 

> > I've been trying th get the kunit tests running on a 32-bit kernel image, but

> > I'm still having issues with that...

> > 

> > Thanks,

> > Liam

> > 

> >>

> >> Cheers,

> >> Peter

> >>

> >>> +

> >>> +			 return IIO_VAL_INT_PLUS_NANO;

> >>> +		 }

> >>> +

> >>>  		return scale_type;

> >>>  	case IIO_VAL_INT_PLUS_NANO:

> >>>  	case IIO_VAL_INT_PLUS_MICRO:

> >>>

> >>
Jonathan Cameron Aug. 30, 2021, 11:22 a.m. UTC | #8
On Mon, 23 Aug 2021 00:18:55 +0200
Peter Rosin <peda@axentia.se> wrote:

> [I started to write an answer to your plans in the v7 thread, but didn't

> have time to finish before v8 appeared...]

> 

> On 2021-08-20 21:17, Liam Beguin wrote:

> > From: Liam Beguin <lvb@xiphos.com>

> > 

> > The approximation caused by integer divisions can be costly on smaller

> > scale values since the decimal part is significant compared to the

> > integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> > cases to maintain accuracy.  

> 

> The conversion to int-plus-nano may also carry a cost of accuracy.

> 

> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> digits). So, in this case you lose precision with the new code.

> 

> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> 

> I'm also wondering if it is wise to not always return the same scale type?

> What happens if we want to extend this driver to scale a buffered channel?

> Honest question! I don't know, but I fear that this patch may make that

> step more difficult to take??

> 

> Jonathan, do you have any input on that?


I'm a bit lost.  As things currently stand IIO buffered data flows all use
_RAW.  It's either up to userspace or the inkernel user to query scale
and use that to compute the appropriate _processed values.  There have been
various discussions over the years on how to add metadata but it's tricky
without adding significant complexity and for vast majority of usecases not
necessary.  Given the rescaler copes with _raw and _processed inputs, we
would only support buffered flows if using the _raw ones.

If nothing changes in configuration of the rescaler, the scale should be
static for a given device.  What format that 'scale' takes is something
that userspace code or inkernel users should cope fine with given they
need to do that anyway for different devices.

Jonathan


> 

> Some more examples of problematic properties of this patch:

> 

> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

> this case you get 0.01568154403. Which is less precise. It is unfortunate

> that input that should be easier to scale may yield worse results.

> 

> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

> returned. Which is perhaps not great accuracy, but such is life. However.

> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

> greater, but 8.6e-8 is returned. Which is less than what was returned for

> the smaller 760/1373754273 value above.

> 

> Some of these objections are related to what I talked about in v7, i.e.:

> 

>     Also, changing the calculation so that you get more precision whenever that is

>     possible feels dangerous. I fear linearity breaks and that bigger input cause

>     smaller output due to rounding if the bigger value has to be rounded down, but

>     that this isn't done carefully enough. I.e. attempting to return an exact

>     fraction and only falling back to the old code when that is not possible is

>     still not safe since the old code isn't careful enough about rounding. I think

>     it is really important that bigger input cause bigger (or equal) output.

>     Otherwise you might trigger instability in feedback loops should a rescaler be

>     involved in a some regulator function.

> 

> Sadly, I see no elegant solution to your problem.

> 

> One way forward may be to somehow provide information on the expected

> input range, and then determine the scaling method based on that

> instead of the individual values. But, as indicated, there's no real

> elegance in that. It can't be automated...

> 

> > Signed-off-by: Liam Beguin <lvb@xiphos.com>

> > ---

> >  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

> >  1 file changed, 25 insertions(+), 2 deletions(-)

> > 

> > diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

> > index c408c4057c08..7304306c9806 100644

> > --- a/drivers/iio/afe/iio-rescale.c

> > +++ b/drivers/iio/afe/iio-rescale.c

> > @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >  			  int *val, int *val2)

> >  {

> >  	s64 tmp;

> > -	s32 rem;

> > +	s32 rem, rem2;

> >  	u32 mult;

> >  	u32 neg;

> >  

> > @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> >  		tmp = (s64)*val * 1000000000LL;

> >  		tmp = div_s64(tmp, rescale->denominator);

> >  		tmp *= rescale->numerator;

> > -		tmp = div_s64(tmp, 1000000000LL);

> > +

> > +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> >  		*val = tmp;

> > +

> > +		/*

> > +		 * For small values, the approximation can be costly,

> > +		 * change scale type to maintain accuracy.

> > +		 *

> > +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> > +		 */

> > +		if (scale_type == IIO_VAL_FRACTIONAL)

> > +			tmp = *val2;

> > +		else

> > +			tmp = 1 << *val2;

> > +

> > +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> > +			 *val = div_s64_rem(*val, tmp, &rem2);

> > +

> > +			 *val2 = div_s64(rem, tmp);

> > +			 if (rem2)

> > +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);  

> 

> rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine

> where 64-bit arithmetic is really expensive? In that case, the above

> is broken. The safe route is to do these things as in the existing

> code with a cast to s64. But maybe that's just cargo cult crap?

> 

> Cheers,

> Peter

> 

> > +

> > +			 return IIO_VAL_INT_PLUS_NANO;

> > +		 }

> > +

> >  		return scale_type;

> >  	case IIO_VAL_INT_PLUS_NANO:

> >  	case IIO_VAL_INT_PLUS_MICRO:

> >
Jonathan Cameron Aug. 30, 2021, 11:27 a.m. UTC | #9
On Sun, 29 Aug 2021 00:41:21 -0400
Liam Beguin <liambeguin@gmail.com> wrote:

> On Thu, Aug 26, 2021 at 11:53:14AM +0200, Peter Rosin wrote:

> > On 2021-08-24 22:28, Liam Beguin wrote:  

> > > On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:  

> > >> [I started to write an answer to your plans in the v7 thread, but didn't

> > >> have time to finish before v8 appeared...]

> > >>

> > >> On 2021-08-20 21:17, Liam Beguin wrote:  

> > >>> From: Liam Beguin <lvb@xiphos.com>

> > >>>

> > >>> The approximation caused by integer divisions can be costly on smaller

> > >>> scale values since the decimal part is significant compared to the

> > >>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> > >>> cases to maintain accuracy.  

> > >>  

> > > 

> > > Hi Peter,

> > > 

> > > Thanks for taking time to look at this in detail again. I really

> > > appreciate all the feedback you've provided.

> > >   

> > >> The conversion to int-plus-nano may also carry a cost of accuracy.

> > >>

> > >> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> > >> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> > >> digits). So, in this case you lose precision with the new code.

> > >>

> > >> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> > >> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> > >>  

> > > 

> > > I see what you mean here.

> > > I added test cases with these values to see exactly what we get.  

> > 

> > Excellent!

> >   

> > > 

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 1000000

> > > 

> > >      real=0.000000000

> > >  expected=0.000000033594

> > > # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 1000000

> > > 

> > >      real=0.000000000

> > >  expected=0.000000050318

> > > # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

> > > 

> > > 

> > > The main issue is that the first two examples return 0 which night be worst

> > > that loosing a little precision.  

> > 

> > They shouldn't return zero?

> > 

> > Here's the new code quoted from the test robot (and assuming

> > a 64-bit machine, thus ignoring the 32-bit problem on line 56).

> > 

> >     36		case IIO_VAL_FRACTIONAL:

> >     37		case IIO_VAL_FRACTIONAL_LOG2:

> >     38			tmp = (s64)*val * 1000000000LL;

> >     39			tmp = div_s64(tmp, rescale->denominator);

> >     40			tmp *= rescale->numerator;

> >     41	

> >     42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> >     43			*val = tmp;

> >     44	

> >     45			/*

> >     46			 * For small values, the approximation can be costly,

> >     47			 * change scale type to maintain accuracy.

> >     48			 *

> >     49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> >     50			 */

> >     51			if (scale_type == IIO_VAL_FRACTIONAL)

> >     52				tmp = *val2;

> >     53			else

> >     54				tmp = 1 << *val2;

> >     55	  

> >   > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {  

> >     57				 *val = div_s64_rem(*val, tmp, &rem2);

> >     58	

> >     59				 *val2 = div_s64(rem, tmp);

> >     60				 if (rem2)

> >     61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);

> >     62	

> >     63				 return IIO_VAL_INT_PLUS_NANO;

> >     64			 }

> >     65	

> >     66			return scale_type;

> > 

> > When I go through the above manually, I get:

> > 

> > line 

> > 38: tmp = 90000000000    ; 90 * 1000000000

> > 39: tmp = 176817288      ; 90000000000 / 509

> > 40: tmp = 46149312168    ; 176817288 * 261

> > 42: rem = 149312168      ; 46149312168 % 1000000000

> >     tmp = 46             ; 46149312168 / 1000000000

> > 43: *val = 46

> > 51: if (<fractional>) [yes]

> > 52: tmp = 1373754273

> > 56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]

> > 57: rem2 = 46            ; 46 % 1373754273

> >     *val = 0             ; 46 / 1373754273

> > 59: *val2 = 0            ; 149312168 / 1373754273

> > 60: if 46 [yes]

> > 61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273

> > 63: return <int-plus-nano> [0.000000033]

> > 

> > and

> > 

> > line 

> > 38: tmp = 100000000000   ; 100 * 1000000000

> > 39: tmp = 14285714       ; 100000000000 / 7000

> > 40: tmp = 54028570348    ; 176817288 * 3782

> > 42: rem = 28570348       ; 54028570348 % 1000000000

> >     tmp = 54             ; 54028570348 / 1000000000

> > 43: *val = 54

> > 51: if (<fractional>) [no]

> > 54: tmp = 1073741824     ; 1 << 30

> > 56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]

> > 57: rem2 = 54            ; 54 % 1073741824

> >     *val = 0             ; 54 / 1073741824

> > 59: *val2 = 0            ; 28570348 / 1073741824

> > 60: if 46 [yes]

> > 61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824

> > 63: return <int-plus-nano> [0.000000050]

> > 

> > Why do you get zero, what am I missing?  

> 

> So... It turns out, I swapped schan and rescaler values which gives us:

> 

> numerator = 90

> denominator = 1373754273

> schan_val = 261

> schan_val2 = 509

> 

> line

> 38: tmp = 261000000000   ; 261 * 1000000000

> 39: tmp = 189            ; 261000000000 / 1373754273

> 40: tmp = 17010          ; 189 * 90

> 42: rem = 17010          ; 17010 % 1000000000

>     tmp = 0              ; 17010 / 1000000000

> 43: *val = 0

> 51: if (<fractional>) [yes]

> 52: tmp = 509

> 56: if (17010 > 10000000 && 0/509 < 100) [no && yes]

> 66: *val = 0

>     *val2 = 509

>     return <fractional> [0.000000000]

> 

> Swapping back the values, I get the same results as you!

> 

> Also, replacing line 56 from the snippet above with

> 

> 	- if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {

> 	+ if (abs(rem)) {

> 

> Fixes these precision errors. It also prevents us from returning

> different scales if we swap the two divisions (schan and rescaler

> parameters).

> 

> >   

> > > At the same time, I wonder how "real" these values would be. Having such a

> > > small scale would mean having a large raw value. With 16-bits of resolution,

> > > that would still give about (1 << 16) * 3.3594e-08 = 0.002201616 mV.  

> > 

> > If we cap at 16 bits it sounds as if we probably erase some precision

> > provided by 24-bit ADCs. We have drivers for those. I didn't really

> > dig that deep in the driver offerings, but I did find a AD7177 ADC

> > (but no driver) which is 32-bit. If we don't have any 32-bit ADC driver

> > yet, it's only a matter of time, methinks. I have personally worked

> > with 24-bit DACs, and needed every last bit...

> >   

> 

> I was only using 16-bits as an example, but I guess you're right, these

> values do start to make sense when you're looking at 24-bit and 32-bit

> ADCs.


I'd be tempted to be cynical on this.  High resolution devices are rare
as frankly building a low enough noise board to take advantage is hard.
Known users of the AFE infrastructure also rare and so as long as we don't
break any 'current' users via loss of accuracy I'm not that bothered if
we aren't perfect for 32 bit devices. 

I'm guessing we can sometimes sanity check if an overflow will occur
at probe?  Perhaps do that where possible and print something obvious
to the log.  Then someone who needs it can figure out the magic maths
to do this for those high resolution devices!

> 

> > > We could try to get more precision out of the first division

> > > 

> > > 	tmp = (s64)*val * 1000000000LL;

> > > 	tmp = div_s64(tmp, rescale->denominator);

> > > 	tmp *= rescale->numerator;

> > > 	tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> > > 

> > > But then, we'd be more likely to overflow. What would be a good middle

> > > ground?  

> > 

> > I don't think we can settle for anything that makes any existing case

> > worse. That's a regression waiting to happen, and what to do then?

> >   

> 

> Agreed, and looking at this more, there's still ways to improve without

> having to compromise.

> Hopefully adding the test suite will make it easier to catch potential

> regressions in the future :-)


Absolutely.  Have that test suite is great :)

> 

> > >> I'm also wondering if it is wise to not always return the same scale type?

> > >> What happens if we want to extend this driver to scale a buffered channel?

> > >> Honest question! I don't know, but I fear that this patch may make that

> > >> step more difficult to take??  

> > > 

> > > That's a fair point, I didn't know it could be a problem to change

> > > scale.  

> > 

> > I don't *know* either? But it would not be completely alien to me if

> > the buffered case assumes "raw" numbers, and that there is little room

> > for "meta-data" with each sample.


Spot on.  Meta data is a pain so an early design decision in IIO was to
not support it in band.

> >   

> > >>

> > >> Jonathan, do you have any input on that?

> > >>

> > >> Some more examples of problematic properties of this patch:

> > >>

> > >> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

> > >> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

> > >> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

> > >> this case you get 0.01568154403. Which is less precise. It is unfortunate

> > >> that input that should be easier to scale may yield worse results.  

> > > 

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 0

> > > 

> > >      real=0.015685445

> > >  expected=0.015685446719

> > > # iio_rescale_test_scale: not ok 44 - v8 - 21837/24041 scaled by 427/24727

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 0

> > > 

> > >      real=0.015685445

> > >  expected=0.015685446719

> > > # iio_rescale_test_scale: not ok 45 - v8 - 753/829 scaled by 427/24727

> > > 

> > > It seems like both cases are rounded and give the same result. I do get

> > > your point though, values that could be simplified might loose more

> > > precision because of this change in scale type.  

> > 

> > I aimed at this:

> > 

> > line

> > 38: tmp = 21837000000000 ; 21837 * 1000000000

> > 39: tmp = 883123710      ; 21837000000000 / 24727

> > 40: tmp = 377093824170   ; 883123710 * 427

> > 42: rem = 93824170       ; 377093824170 % 1000000000

> >     tmp = 377            ; 377093824170 / 1000000000

> > 43: *val = 377

> > 51: if (<fractional>) [yes]

> > 52: tmp = 24041

> > 56: if (149312168 > 10000000 && 377/24041 < 100) [yes && yes]

> > 57: rem2 = 377           ; 377 % 24041

> >     *val = 0             ; 377 / 24041

> > 59: *val2 = 3902         ; 93824170 / 24041

> > 60: if 377 [yes]

> > 61: *val2 = 15685446     ; 3902 + 377 * 1000000000 / 24041

> > 63: return <int-plus-nano> [0.0015685446]

> > 

> > Why does the test output a 5 at the end and not a 6? It's all

> > integer arithmetic so there is no room for rounding issues.

> > 

> > and

> > 

> > line 

> > 38: tmp = 753000000000   ; 753 * 1000000000

> > 39: tmp = 30452541       ; 753000000000 / 24727

> > 40: tmp = 13003235007    ; 30452541 * 427

> > 42: rem = 3235007        ; 13003235007 % 1000000000

> >     tmp = 13             ; 13003235007 / 1000000000

> > 43: *val = 13

> > 51: if (<fractional>) [yes]

> > 52: tmp = 829

> > 56: if (3235007 > 10000000 && 13/829 < 100) [no && yes]

> > 66: return <fractional> [13/829 ~= 0.015681544]

> > 

> > 0.015681544 is pretty different from 0.015685446

> > 

> > Again, I don't understand what's going on.

> >   

> > >>

> > >> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

> > >> returned. Which is perhaps not great accuracy, but such is life. However.

> > >> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

> > >> greater, but 8.6e-8 is returned. Which is less than what was returned for

> > >> the smaller 760/1373754273 value above.  

> > > 

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 1000000

> > > 

> > >      real=0.000000000

> > >  expected=0.000000086626

> > > # iio_rescale_test_scale: not ok 46 - v8 - 760/1373754273 scaled by 427/2727

> > > Expected rel_ppm < 0, but

> > >     rel_ppm == 1000000

> > > 

> > >      real=0.000000000

> > >  expected=0.000000086740

> > > # iio_rescale_test_scale: not ok 47 - v8 - 761/1373754273 scaled by 427/2727

> > > 

> > > We fall into the same case as the first two examples where the real value is

> > > null.  

> > 

> > I aimed at

> > 

> > line

> > 38: tmp = 760000000000   ; 760 * 1000000000

> > 39: tmp = 278694536      ; 760000000000 / 2727

> > 40: tmp = 119002566872   ; 278694536 * 427

> > 42: rem = 2566872        ; 119002566872 % 1000000000

> >     tmp = 119            ; 119002566872 / 1000000000

> > 43: *val = 119

> > 51: if (<fractional>) [yes]

> > 52: tmp = 1373754273

> > 56: if (2566872 > 10000000 && 119/1373754273 < 100) [no && yes]

> > 66: return <fractional> [119/1373754273 ~= 0.000000086624]

> > 

> > and

> > 

> > line

> > 38: tmp = 761000000000   ; 761 * 1000000000

> > 39: tmp = 279061239      ; 761000000000 / 2727

> > 40: tmp = 119159149053   ; 279061239 * 427

> > 42: rem = 159149053      ; 119159149053 % 1000000000

> >     tmp = 119            ; 119159149053 / 1000000000

> > 43: *val = 119

> > 51: if (<fractional>) [yes]

> > 52: tmp = 1373754273

> > 56: if (159149053 > 10000000 && 119/1373754273 < 100) [yes && yes]

> > 57: rem2 = 119           ; 119 % 1373754273

> >     *val = 0             ; 119 / 1373754273

> > 59: *val2 = 0            ; 159149053 / 1373754273

> > 60: if 119 [yes]

> > 61: *val2 = 86           ; 0 + 119 * 1000000000 / 1373754273

> > 63: return <int-plus-nano> [0.000000086]

> >   

> > > Considering these null values and the possible issue of not always having the

> > > same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

> > > scale?  

> > 

> > No, that absolutely kills the precision for small values that are much

> > better off as-is. The closer you get to zero, the more the conversion

> > to int-plus-nano hurts, relatively speaking.  

> 

> I'm not sure I understand what you mean. The point of switching to

> IIO_VAL_INT_PLUS_NANO at the moment is to get more precision on small

> values. Am I missing something?

> 

> >   

> > >>

> > >> Some of these objections are related to what I talked about in v7, i.e.:

> > >>

> > >>     Also, changing the calculation so that you get more precision whenever that is

> > >>     possible feels dangerous. I fear linearity breaks and that bigger input cause

> > >>     smaller output due to rounding if the bigger value has to be rounded down, but

> > >>     that this isn't done carefully enough. I.e. attempting to return an exact

> > >>     fraction and only falling back to the old code when that is not possible is

> > >>     still not safe since the old code isn't careful enough about rounding. I think

> > >>     it is really important that bigger input cause bigger (or equal) output.

> > >>     Otherwise you might trigger instability in feedback loops should a rescaler be

> > >>     involved in a some regulator function.  

> > > 

> > > I think I didn't read this closely enought the first time around. I agree that

> > > bigger inputs should cause bigger outputs, especially with these rounding

> > > errors. My original indention was to have all scales withing a tight margin,

> > > that's why I ended up going with ppm for the test cases.

> > >   

> > >>

> > >> Sadly, I see no elegant solution to your problem.

> > >>

> > >> One way forward may be to somehow provide information on the expected

> > >> input range, and then determine the scaling method based on that

> > >> instead of the individual values. But, as indicated, there's no real

> > >> elegance in that. It can't be automated...  

> > > 

> > > I guess the issue with that is that unless it's a user parameter, we're

> > > always going go have these little islands you mentioned in v7...

> > > 

> > > Would it be viable to guaranty a MICRO precision instead of NANO, and

> > > not have the range parameter?  

> > 

> > I don't get what you mean here? Returning int-plus-micro can't be it,

> > since that would be completely pointless and only make it easier to

> > trigger accuracy problems of the conversion. However, I feel that any

> > attempt to shift digits but still having the same general approch will

> > just change the size and position of the islands, and thus not fix the

> > fundamental problematic border between land and water.  

> 

> My apologies, discard this last comment. I was suggesting to guaranty

> less precision, but consistent over the full range. I don't believe

> that's a viable option.


Keep up the good work!  I'm looking forward to this going in (hopefully
shortly!)

Jonathan

> 

> Thanks again for your time,

> Liam

> 

> > 

> > Cheers,

> > Peter

> >   

> > >>  

> > >>> Signed-off-by: Liam Beguin <lvb@xiphos.com>

> > >>> ---

> > >>>  drivers/iio/afe/iio-rescale.c | 27 +++++++++++++++++++++++++--

> > >>>  1 file changed, 25 insertions(+), 2 deletions(-)

> > >>>

> > >>> diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c

> > >>> index c408c4057c08..7304306c9806 100644

> > >>> --- a/drivers/iio/afe/iio-rescale.c

> > >>> +++ b/drivers/iio/afe/iio-rescale.c

> > >>> @@ -22,7 +22,7 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> > >>>  			  int *val, int *val2)

> > >>>  {

> > >>>  	s64 tmp;

> > >>> -	s32 rem;

> > >>> +	s32 rem, rem2;

> > >>>  	u32 mult;

> > >>>  	u32 neg;

> > >>>  

> > >>> @@ -38,8 +38,31 @@ int rescale_process_scale(struct rescale *rescale, int scale_type,

> > >>>  		tmp = (s64)*val * 1000000000LL;

> > >>>  		tmp = div_s64(tmp, rescale->denominator);

> > >>>  		tmp *= rescale->numerator;

> > >>> -		tmp = div_s64(tmp, 1000000000LL);

> > >>> +

> > >>> +		tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> > >>>  		*val = tmp;

> > >>> +

> > >>> +		/*

> > >>> +		 * For small values, the approximation can be costly,

> > >>> +		 * change scale type to maintain accuracy.

> > >>> +		 *

> > >>> +		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> > >>> +		 */

> > >>> +		if (scale_type == IIO_VAL_FRACTIONAL)

> > >>> +			tmp = *val2;

> > >>> +		else

> > >>> +			tmp = 1 << *val2;

> > >>> +

> > >>> +		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> > >>> +			 *val = div_s64_rem(*val, tmp, &rem2);

> > >>> +

> > >>> +			 *val2 = div_s64(rem, tmp);

> > >>> +			 if (rem2)

> > >>> +				 *val2 += div_s64(rem2 * 1000000000LL, tmp);  

> > >>

> > >> rem2 is 32-bit. Might 1000000000LL also be 32-bit on a small machine

> > >> where 64-bit arithmetic is really expensive? In that case, the above

> > >> is broken. The safe route is to do these things as in the existing

> > >> code with a cast to s64. But maybe that's just cargo cult crap?  

> > > 

> > > You're right, this should be

> > > 

> > > 	div_s64((s64)rem2 * 1000000000LL, tmp);

> > > 

> > > I've been trying th get the kunit tests running on a 32-bit kernel image, but

> > > I'm still having issues with that...

> > > 

> > > Thanks,

> > > Liam

> > >   

> > >>

> > >> Cheers,

> > >> Peter

> > >>  

> > >>> +

> > >>> +			 return IIO_VAL_INT_PLUS_NANO;

> > >>> +		 }

> > >>> +

> > >>>  		return scale_type;

> > >>>  	case IIO_VAL_INT_PLUS_NANO:

> > >>>  	case IIO_VAL_INT_PLUS_MICRO:

> > >>>  

> > >>
Peter Rosin Aug. 30, 2021, 1:03 p.m. UTC | #10
On 2021-08-29 06:41, Liam Beguin wrote:
> On Thu, Aug 26, 2021 at 11:53:14AM +0200, Peter Rosin wrote:

>> On 2021-08-24 22:28, Liam Beguin wrote:

>>> On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:

>>>> [I started to write an answer to your plans in the v7 thread, but didn't

>>>> have time to finish before v8 appeared...]

>>>>

>>>> On 2021-08-20 21:17, Liam Beguin wrote:

>>>>> From: Liam Beguin <lvb@xiphos.com>

>>>>>

>>>>> The approximation caused by integer divisions can be costly on smaller

>>>>> scale values since the decimal part is significant compared to the

>>>>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

>>>>> cases to maintain accuracy.

>>>>

>>>

>>> Hi Peter,

>>>

>>> Thanks for taking time to look at this in detail again. I really

>>> appreciate all the feedback you've provided.

>>>

>>>> The conversion to int-plus-nano may also carry a cost of accuracy.

>>>>

>>>> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

>>>> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

>>>> digits). So, in this case you lose precision with the new code.

>>>>

>>>> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

>>>> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

>>>>

>>>

>>> I see what you mean here.

>>> I added test cases with these values to see exactly what we get.

>>

>> Excellent!

>>

>>>

>>> Expected rel_ppm < 0, but

>>>     rel_ppm == 1000000

>>>

>>>      real=0.000000000

>>>  expected=0.000000033594

>>> # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

>>> Expected rel_ppm < 0, but

>>>     rel_ppm == 1000000

>>>

>>>      real=0.000000000

>>>  expected=0.000000050318

>>> # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

>>>

>>>

>>> The main issue is that the first two examples return 0 which night be worst

>>> that loosing a little precision.

>>

>> They shouldn't return zero?

>>

>> Here's the new code quoted from the test robot (and assuming

>> a 64-bit machine, thus ignoring the 32-bit problem on line 56).

>>

>>     36		case IIO_VAL_FRACTIONAL:

>>     37		case IIO_VAL_FRACTIONAL_LOG2:

>>     38			tmp = (s64)*val * 1000000000LL;

>>     39			tmp = div_s64(tmp, rescale->denominator);

>>     40			tmp *= rescale->numerator;

>>     41	

>>     42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);

>>     43			*val = tmp;

>>     44	

>>     45			/*

>>     46			 * For small values, the approximation can be costly,

>>     47			 * change scale type to maintain accuracy.

>>     48			 *

>>     49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

>>     50			 */

>>     51			if (scale_type == IIO_VAL_FRACTIONAL)

>>     52				tmp = *val2;

>>     53			else

>>     54				tmp = 1 << *val2;

>>     55	

>>   > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

>>     57				 *val = div_s64_rem(*val, tmp, &rem2);

>>     58	

>>     59				 *val2 = div_s64(rem, tmp);

>>     60				 if (rem2)

>>     61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);

>>     62	

>>     63				 return IIO_VAL_INT_PLUS_NANO;

>>     64			 }

>>     65	

>>     66			return scale_type;

>>

>> When I go through the above manually, I get:

>>

>> line 

>> 38: tmp = 90000000000    ; 90 * 1000000000

>> 39: tmp = 176817288      ; 90000000000 / 509

>> 40: tmp = 46149312168    ; 176817288 * 261

>> 42: rem = 149312168      ; 46149312168 % 1000000000

>>     tmp = 46             ; 46149312168 / 1000000000

>> 43: *val = 46

>> 51: if (<fractional>) [yes]

>> 52: tmp = 1373754273

>> 56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]

>> 57: rem2 = 46            ; 46 % 1373754273

>>     *val = 0             ; 46 / 1373754273

>> 59: *val2 = 0            ; 149312168 / 1373754273

>> 60: if 46 [yes]

>> 61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273

>> 63: return <int-plus-nano> [0.000000033]

>>

>> and

>>

>> line 

>> 38: tmp = 100000000000   ; 100 * 1000000000

>> 39: tmp = 14285714       ; 100000000000 / 7000

>> 40: tmp = 54028570348    ; 176817288 * 3782

>> 42: rem = 28570348       ; 54028570348 % 1000000000

>>     tmp = 54             ; 54028570348 / 1000000000

>> 43: *val = 54

>> 51: if (<fractional>) [no]

>> 54: tmp = 1073741824     ; 1 << 30

>> 56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]

>> 57: rem2 = 54            ; 54 % 1073741824

>>     *val = 0             ; 54 / 1073741824

>> 59: *val2 = 0            ; 28570348 / 1073741824

>> 60: if 46 [yes]

>> 61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824

>> 63: return <int-plus-nano> [0.000000050]

>>

>> Why do you get zero, what am I missing?

> 

> So... It turns out, I swapped schan and rescaler values which gives us:


Ahh, good. The explanation is simple!

> 

> numerator = 90

> denominator = 1373754273

> schan_val = 261

> schan_val2 = 509

> 

> line

> 38: tmp = 261000000000   ; 261 * 1000000000

> 39: tmp = 189            ; 261000000000 / 1373754273

> 40: tmp = 17010          ; 189 * 90

> 42: rem = 17010          ; 17010 % 1000000000

>     tmp = 0              ; 17010 / 1000000000

> 43: *val = 0

> 51: if (<fractional>) [yes]

> 52: tmp = 509

> 56: if (17010 > 10000000 && 0/509 < 100) [no && yes]

> 66: *val = 0

>     *val2 = 509

>     return <fractional> [0.000000000]

> 

> Swapping back the values, I get the same results as you!

> 

> Also, replacing line 56 from the snippet above with

> 

> 	- if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {

> 	+ if (abs(rem)) {

> 

> Fixes these precision errors. It also prevents us from returning

> different scales if we swap the two divisions (schan and rescaler

> parameters).


No, it doesn't fix the precision problems. Not really, it only reduces
them. See below.

*snip*

>>> Considering these null values and the possible issue of not always having the

>>> same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

>>> scale?

>>

>> No, that absolutely kills the precision for small values that are much

>> better off as-is. The closer you get to zero, the more the conversion

>> to int-plus-nano hurts, relatively speaking.

> 

> I'm not sure I understand what you mean. The point of switching to

> IIO_VAL_INT_PLUS_NANO at the moment is to get more precision on small

> values. Am I missing something?


Yes, apparently :-) We always sacrifice accuracy by going to
IIO_VAL_INT_PLUS_NANO. More is lost with smaller numbers, relatively.
That is an inherent property of fix-point style representations such
as IIO_VAL_INT_PLUS_NANO. Unless we get lucky and just happen to be
able to represent the desired number exactly of course, but that tends
to be both non-interesting and the exception.

Let's go back to the example from my response to the 8/14 patch, i.e.
5/32768 scaled by 3/10000. With the current code this yields

15 / 327680000 (0.0000000457763671875)

Note, the above is /exact/. With IIO_VAL_INT_PLUS_NANO we instead get
the truncated 0.000000045

The relative error introduced by the IIO_VAL_INT_PLUS_NANO conversion
is almost 1.7% in this case. Sure, rounding instead of truncating
would reduce that to 0.5%, but that's not really a significant
improvement if you compare to /no/ error. Besides, there are many
smaller numbers with even bigger relative conversion "noise".

And remember, this function is used to rescale the scale of the
raw values. We are going to multiply the scale and the raw values
at some point. If we have rounding errors in the scale, they will
multiply with the raw values. It wouldn't look too good if something
that should be able to reach 3V with a lot of accuracy (ca 26 bits)
instead caps out at 2.94912V (or hits 3.014656V) because of accuracy
issues with the scaling (1.7% too low or 0.5% too high).

It's impossible to do better than exact, which is what we have now for
IIO_VAL_FRACTIONAL and IIO_VAL_INT (for IIO_VAL_FRACTIONAL_LOG2, not
so much...). At least as long as there's no overflow.

Cheers,
Peter
Peter Rosin Aug. 30, 2021, 2:30 p.m. UTC | #11
On 2021-08-30 13:22, Jonathan Cameron wrote:
> On Mon, 23 Aug 2021 00:18:55 +0200

> Peter Rosin <peda@axentia.se> wrote:

> 

>> [I started to write an answer to your plans in the v7 thread, but didn't

>> have time to finish before v8 appeared...]

>>

>> On 2021-08-20 21:17, Liam Beguin wrote:

>>> From: Liam Beguin <lvb@xiphos.com>

>>>

>>> The approximation caused by integer divisions can be costly on smaller

>>> scale values since the decimal part is significant compared to the

>>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

>>> cases to maintain accuracy.  

>>

>> The conversion to int-plus-nano may also carry a cost of accuracy.

>>

>> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

>> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

>> digits). So, in this case you lose precision with the new code.

>>

>> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

>> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

>>

>> I'm also wondering if it is wise to not always return the same scale type?

>> What happens if we want to extend this driver to scale a buffered channel?

>> Honest question! I don't know, but I fear that this patch may make that

>> step more difficult to take??

>>

>> Jonathan, do you have any input on that?

> 

> I'm a bit lost.  As things currently stand IIO buffered data flows all use

> _RAW.  It's either up to userspace or the inkernel user to query scale

> and use that to compute the appropriate _processed values.  There have been

> various discussions over the years on how to add metadata but it's tricky

> without adding significant complexity and for vast majority of usecases not

> necessary.  Given the rescaler copes with _raw and _processed inputs, we

> would only support buffered flows if using the _raw ones.

> 

> If nothing changes in configuration of the rescaler, the scale should be

> static for a given device.  What format that 'scale' takes is something

> that userspace code or inkernel users should cope fine with given they

> need to do that anyway for different devices.


Ok, if 'scale' (and 'offset') of the source channel is to be considered
static, then it is much safer to ignore the "island problem" and rescale
each value independently on a case-by-case basis. We should add an
explicit comment somewhere that we make this assumption.

Sorry for wasting time and effort by not realizing by myself (and earlier).

Maybe something like this?

/*
 * The rescaler assumes that the 'scale' and 'offset' properties of
 * the source channel are static. If they are not, there exist some
 * corner cases where rounding/truncating might cause confusing
 * mathematical properties (non-linearity).
 */

I then propose that we rescale IIO_VAL_FRACTIONAL as before if that works,
thus preserving any previous exact rescaling, but if there is an overflow
while doing that, then we fall back to new code that rescales to a
IIO_VAL_INT_PLUS_NANO value. Trying the gcd-thing as it ended up in v7 still
seems expensive to me, but maybe I overestimate the cost of gcd? Anyway, my
guts vote for completely skipping gcd and that we aim directly for
IIO_VAL_INT_PLUS_NANO in case of overflow while doing the old thing.

Having said that, if 'scale' and 'offset' indeed are static, then the gcd
cost can be mitigated by caching the result. Exact rescaling is always
nice...

If IIO_VAL_INT overflows while rescaling, we are SOL whichever way we turn,
so ignore doing anything about that.

Liam, was it IIO_VAL_FRACTIONAL or IIO_VAL_FRACTIONAL_LOG2 that was your
real use case? Anyway, your 100 ppm threshold is probably as good a
compromise as any for this case. However, that threshold does nothing for
the case where the conversion to IIO_VAL_INT_PLUS_NANO throws away way
more precision than the existing operations. It would probably be good
to somehow stay with the old method for the really tiny values, if there
is a viable test/threshold for when to do what.

Cheers,
Peter
Jonathan Cameron Aug. 30, 2021, 5:03 p.m. UTC | #12
On Mon, 30 Aug 2021 16:30:54 +0200
Peter Rosin <peda@axentia.se> wrote:

> On 2021-08-30 13:22, Jonathan Cameron wrote:

> > On Mon, 23 Aug 2021 00:18:55 +0200

> > Peter Rosin <peda@axentia.se> wrote:

> >   

> >> [I started to write an answer to your plans in the v7 thread, but didn't

> >> have time to finish before v8 appeared...]

> >>

> >> On 2021-08-20 21:17, Liam Beguin wrote:  

> >>> From: Liam Beguin <lvb@xiphos.com>

> >>>

> >>> The approximation caused by integer divisions can be costly on smaller

> >>> scale values since the decimal part is significant compared to the

> >>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> >>> cases to maintain accuracy.    

> >>

> >> The conversion to int-plus-nano may also carry a cost of accuracy.

> >>

> >> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> >> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> >> digits). So, in this case you lose precision with the new code.

> >>

> >> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> >> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> >>

> >> I'm also wondering if it is wise to not always return the same scale type?

> >> What happens if we want to extend this driver to scale a buffered channel?

> >> Honest question! I don't know, but I fear that this patch may make that

> >> step more difficult to take??

> >>

> >> Jonathan, do you have any input on that?  

> > 

> > I'm a bit lost.  As things currently stand IIO buffered data flows all use

> > _RAW.  It's either up to userspace or the inkernel user to query scale

> > and use that to compute the appropriate _processed values.  There have been

> > various discussions over the years on how to add metadata but it's tricky

> > without adding significant complexity and for vast majority of usecases not

> > necessary.  Given the rescaler copes with _raw and _processed inputs, we

> > would only support buffered flows if using the _raw ones.

> > 

> > If nothing changes in configuration of the rescaler, the scale should be

> > static for a given device.  What format that 'scale' takes is something

> > that userspace code or inkernel users should cope fine with given they

> > need to do that anyway for different devices.  

> 

> Ok, if 'scale' (and 'offset') of the source channel is to be considered

> static, then it is much safer to ignore the "island problem" and rescale

> each value independently on a case-by-case basis. We should add an

> explicit comment somewhere that we make this assumption.

> 

> Sorry for wasting time and effort by not realizing by myself (and earlier).

> 

> Maybe something like this?

> 

> /*

>  * The rescaler assumes that the 'scale' and 'offset' properties of

>  * the source channel are static. If they are not, there exist some

>  * corner cases where rounding/truncating might cause confusing

>  * mathematical properties (non-linearity).

>  */

Looks good to me.

There are some really obscure corner cases in which it is theoretically possible
to have scale change autonomously. Reality is they mostly don't happen in reality
and are on strange sensors you couldn't stick an AFE on anyway ;)
IIRC hid-sensors in theory allow this, but as far as I know, no device actually does
it...  I'm not sure the driver takes any notice if they do!

Jonathan

> 

> I then propose that we rescale IIO_VAL_FRACTIONAL as before if that works,

> thus preserving any previous exact rescaling, but if there is an overflow

> while doing that, then we fall back to new code that rescales to a

> IIO_VAL_INT_PLUS_NANO value. Trying the gcd-thing as it ended up in v7 still

> seems expensive to me, but maybe I overestimate the cost of gcd? Anyway, my

> guts vote for completely skipping gcd and that we aim directly for

> IIO_VAL_INT_PLUS_NANO in case of overflow while doing the old thing.

> 

> Having said that, if 'scale' and 'offset' indeed are static, then the gcd

> cost can be mitigated by caching the result. Exact rescaling is always

> nice...

> 

> If IIO_VAL_INT overflows while rescaling, we are SOL whichever way we turn,

> so ignore doing anything about that.

> 

> Liam, was it IIO_VAL_FRACTIONAL or IIO_VAL_FRACTIONAL_LOG2 that was your

> real use case? Anyway, your 100 ppm threshold is probably as good a

> compromise as any for this case. However, that threshold does nothing for

> the case where the conversion to IIO_VAL_INT_PLUS_NANO throws away way

> more precision than the existing operations. It would probably be good

> to somehow stay with the old method for the really tiny values, if there

> is a viable test/threshold for when to do what.

> 

> Cheers,

> Peter
Liam Beguin Sept. 2, 2021, 2:27 a.m. UTC | #13
On Mon, Aug 30, 2021 at 04:30:54PM +0200, Peter Rosin wrote:
> On 2021-08-30 13:22, Jonathan Cameron wrote:

> > On Mon, 23 Aug 2021 00:18:55 +0200

> > Peter Rosin <peda@axentia.se> wrote:

> > 

> >> [I started to write an answer to your plans in the v7 thread, but didn't

> >> have time to finish before v8 appeared...]

> >>

> >> On 2021-08-20 21:17, Liam Beguin wrote:

> >>> From: Liam Beguin <lvb@xiphos.com>

> >>>

> >>> The approximation caused by integer divisions can be costly on smaller

> >>> scale values since the decimal part is significant compared to the

> >>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> >>> cases to maintain accuracy.  

> >>

> >> The conversion to int-plus-nano may also carry a cost of accuracy.

> >>

> >> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> >> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> >> digits). So, in this case you lose precision with the new code.

> >>

> >> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> >> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> >>

> >> I'm also wondering if it is wise to not always return the same scale type?

> >> What happens if we want to extend this driver to scale a buffered channel?

> >> Honest question! I don't know, but I fear that this patch may make that

> >> step more difficult to take??

> >>

> >> Jonathan, do you have any input on that?

> > 

> > I'm a bit lost.  As things currently stand IIO buffered data flows all use

> > _RAW.  It's either up to userspace or the inkernel user to query scale

> > and use that to compute the appropriate _processed values.  There have been

> > various discussions over the years on how to add metadata but it's tricky

> > without adding significant complexity and for vast majority of usecases not

> > necessary.  Given the rescaler copes with _raw and _processed inputs, we

> > would only support buffered flows if using the _raw ones.

> > 

> > If nothing changes in configuration of the rescaler, the scale should be

> > static for a given device.  What format that 'scale' takes is something

> > that userspace code or inkernel users should cope fine with given they

> > need to do that anyway for different devices.

> 

> Ok, if 'scale' (and 'offset') of the source channel is to be considered

> static, then it is much safer to ignore the "island problem" and rescale

> each value independently on a case-by-case basis. We should add an

> explicit comment somewhere that we make this assumption.

> 

> Sorry for wasting time and effort by not realizing by myself (and earlier).


No worries, I was also under the impression that the source channel
scale (and offset) could change.

> 

> Maybe something like this?

> 

> /*

>  * The rescaler assumes that the 'scale' and 'offset' properties of

>  * the source channel are static. If they are not, there exist some

>  * corner cases where rounding/truncating might cause confusing

>  * mathematical properties (non-linearity).

>  */

> 


If this is more of a general assumption, is there a place in the
Documentation/ that we could put this comment?

If not, I'll add it here.

> I then propose that we rescale IIO_VAL_FRACTIONAL as before if that works,

> thus preserving any previous exact rescaling, but if there is an overflow

> while doing that, then we fall back to new code that rescales to a

> IIO_VAL_INT_PLUS_NANO value. Trying the gcd-thing as it ended up in v7 still

> seems expensive to me, but maybe I overestimate the cost of gcd? Anyway, my

> guts vote for completely skipping gcd and that we aim directly for

> IIO_VAL_INT_PLUS_NANO in case of overflow while doing the old thing.


I agree with you, I'd much rather drop gcd() from rescale_process_scale()
altogether.

I was planning on keeping IIO_VAL_FRACTIONAL and IIO_VAL_FRACTIONAL_LOG2
combined, but getting rid of the 100ppm condition in favor of a simple
if (rem).

> 

> Having said that, if 'scale' and 'offset' indeed are static, then the gcd

> cost can be mitigated by caching the result. Exact rescaling is always

> nice...

> 

> If IIO_VAL_INT overflows while rescaling, we are SOL whichever way we turn,

> so ignore doing anything about that.


I was thinking of using check_mul_overflow() to do something about the
overflow, but I'm happy to leave it out for now.

> 

> Liam, was it IIO_VAL_FRACTIONAL or IIO_VAL_FRACTIONAL_LOG2 that was your

> real use case? Anyway, your 100 ppm threshold is probably as good a

> compromise as any for this case. However, that threshold does nothing for

> the case where the conversion to IIO_VAL_INT_PLUS_NANO throws away way

> more precision than the existing operations. It would probably be good

> to somehow stay with the old method for the really tiny values, if there

> is a viable test/threshold for when to do what.

> 


My original issue was with IIO_VAL_FRACTIONAL.
I'll look into what we can do for small IIO_VAL_FRACTIONAL_LOG2 values,
and will add more tests for that too.

Thanks,
Liam

> Cheers,

> Peter
Peter Rosin Sept. 2, 2021, 9:52 p.m. UTC | #14
On 2021-09-02 04:27, Liam Beguin wrote:
> On Mon, Aug 30, 2021 at 04:30:54PM +0200, Peter Rosin wrote:

>>

>> Having said that, if 'scale' and 'offset' indeed are static, then the gcd

>> cost can be mitigated by caching the result. Exact rescaling is always

>> nice...

>>

>> If IIO_VAL_INT overflows while rescaling, we are SOL whichever way we turn,

>> so ignore doing anything about that.

> 

> I was thinking of using check_mul_overflow() to do something about the

> overflow, but I'm happy to leave it out for now.


My mistake, you are right. A sufficiently large denominator can of course
be used to dodge overflow in the numerator by sacrificing some accuracy
even if our maximum scale is still limited to 32 bits.

I apparently didn't have my brains about when I wrote the above...

Cheers,
Peter
Liam Beguin Sept. 11, 2021, 11:20 p.m. UTC | #15
On Mon, Aug 30, 2021 at 03:03:52PM +0200, Peter Rosin wrote:
> On 2021-08-29 06:41, Liam Beguin wrote:

> > On Thu, Aug 26, 2021 at 11:53:14AM +0200, Peter Rosin wrote:

> >> On 2021-08-24 22:28, Liam Beguin wrote:

> >>> On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:

> >>>> [I started to write an answer to your plans in the v7 thread, but didn't

> >>>> have time to finish before v8 appeared...]

> >>>>

> >>>> On 2021-08-20 21:17, Liam Beguin wrote:

> >>>>> From: Liam Beguin <lvb@xiphos.com>

> >>>>>

> >>>>> The approximation caused by integer divisions can be costly on smaller

> >>>>> scale values since the decimal part is significant compared to the

> >>>>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> >>>>> cases to maintain accuracy.

> >>>>

> >>>

> >>> Hi Peter,

> >>>

> >>> Thanks for taking time to look at this in detail again. I really

> >>> appreciate all the feedback you've provided.

> >>>

> >>>> The conversion to int-plus-nano may also carry a cost of accuracy.

> >>>>

> >>>> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> >>>> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> >>>> digits). So, in this case you lose precision with the new code.

> >>>>

> >>>> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> >>>> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> >>>>

> >>>

> >>> I see what you mean here.

> >>> I added test cases with these values to see exactly what we get.

> >>

> >> Excellent!

> >>

> >>>

> >>> Expected rel_ppm < 0, but

> >>>     rel_ppm == 1000000

> >>>

> >>>      real=0.000000000

> >>>  expected=0.000000033594

> >>> # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

> >>> Expected rel_ppm < 0, but

> >>>     rel_ppm == 1000000

> >>>

> >>>      real=0.000000000

> >>>  expected=0.000000050318

> >>> # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

> >>>

> >>>

> >>> The main issue is that the first two examples return 0 which night be worst

> >>> that loosing a little precision.

> >>

> >> They shouldn't return zero?

> >>

> >> Here's the new code quoted from the test robot (and assuming

> >> a 64-bit machine, thus ignoring the 32-bit problem on line 56).

> >>

> >>     36		case IIO_VAL_FRACTIONAL:

> >>     37		case IIO_VAL_FRACTIONAL_LOG2:

> >>     38			tmp = (s64)*val * 1000000000LL;

> >>     39			tmp = div_s64(tmp, rescale->denominator);

> >>     40			tmp *= rescale->numerator;

> >>     41	

> >>     42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> >>     43			*val = tmp;

> >>     44	

> >>     45			/*

> >>     46			 * For small values, the approximation can be costly,

> >>     47			 * change scale type to maintain accuracy.

> >>     48			 *

> >>     49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> >>     50			 */

> >>     51			if (scale_type == IIO_VAL_FRACTIONAL)

> >>     52				tmp = *val2;

> >>     53			else

> >>     54				tmp = 1 << *val2;

> >>     55	

> >>   > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {

> >>     57				 *val = div_s64_rem(*val, tmp, &rem2);

> >>     58	

> >>     59				 *val2 = div_s64(rem, tmp);

> >>     60				 if (rem2)

> >>     61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);

> >>     62	

> >>     63				 return IIO_VAL_INT_PLUS_NANO;

> >>     64			 }

> >>     65	

> >>     66			return scale_type;

> >>

> >> When I go through the above manually, I get:

> >>

> >> line 

> >> 38: tmp = 90000000000    ; 90 * 1000000000

> >> 39: tmp = 176817288      ; 90000000000 / 509

> >> 40: tmp = 46149312168    ; 176817288 * 261

> >> 42: rem = 149312168      ; 46149312168 % 1000000000

> >>     tmp = 46             ; 46149312168 / 1000000000

> >> 43: *val = 46

> >> 51: if (<fractional>) [yes]

> >> 52: tmp = 1373754273

> >> 56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]

> >> 57: rem2 = 46            ; 46 % 1373754273

> >>     *val = 0             ; 46 / 1373754273

> >> 59: *val2 = 0            ; 149312168 / 1373754273

> >> 60: if 46 [yes]

> >> 61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273

> >> 63: return <int-plus-nano> [0.000000033]

> >>

> >> and

> >>

> >> line 

> >> 38: tmp = 100000000000   ; 100 * 1000000000

> >> 39: tmp = 14285714       ; 100000000000 / 7000

> >> 40: tmp = 54028570348    ; 176817288 * 3782

> >> 42: rem = 28570348       ; 54028570348 % 1000000000

> >>     tmp = 54             ; 54028570348 / 1000000000

> >> 43: *val = 54

> >> 51: if (<fractional>) [no]

> >> 54: tmp = 1073741824     ; 1 << 30

> >> 56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]

> >> 57: rem2 = 54            ; 54 % 1073741824

> >>     *val = 0             ; 54 / 1073741824

> >> 59: *val2 = 0            ; 28570348 / 1073741824

> >> 60: if 46 [yes]

> >> 61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824

> >> 63: return <int-plus-nano> [0.000000050]

> >>

> >> Why do you get zero, what am I missing?

> > 

> > So... It turns out, I swapped schan and rescaler values which gives us:

> 

> Ahh, good. The explanation is simple!

> 

> > 

> > numerator = 90

> > denominator = 1373754273

> > schan_val = 261

> > schan_val2 = 509

> > 

> > line

> > 38: tmp = 261000000000   ; 261 * 1000000000

> > 39: tmp = 189            ; 261000000000 / 1373754273

> > 40: tmp = 17010          ; 189 * 90

> > 42: rem = 17010          ; 17010 % 1000000000

> >     tmp = 0              ; 17010 / 1000000000

> > 43: *val = 0

> > 51: if (<fractional>) [yes]

> > 52: tmp = 509

> > 56: if (17010 > 10000000 && 0/509 < 100) [no && yes]

> > 66: *val = 0

> >     *val2 = 509

> >     return <fractional> [0.000000000]

> > 

> > Swapping back the values, I get the same results as you!

> > 

> > Also, replacing line 56 from the snippet above with

> > 

> > 	- if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {

> > 	+ if (abs(rem)) {

> > 

> > Fixes these precision errors. It also prevents us from returning

> > different scales if we swap the two divisions (schan and rescaler

> > parameters).

> 

> No, it doesn't fix the precision problems. Not really, it only reduces

> them. See below.

> 

> *snip*

> 

> >>> Considering these null values and the possible issue of not always having the

> >>> same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

> >>> scale?

> >>

> >> No, that absolutely kills the precision for small values that are much

> >> better off as-is. The closer you get to zero, the more the conversion

> >> to int-plus-nano hurts, relatively speaking.

> > 

> > I'm not sure I understand what you mean. The point of switching to

> > IIO_VAL_INT_PLUS_NANO at the moment is to get more precision on small

> > values. Am I missing something?


Hi Peter,

Apologies for the late reply.

> Yes, apparently :-) We always sacrifice accuracy by going to

> IIO_VAL_INT_PLUS_NANO. More is lost with smaller numbers, relatively.

> That is an inherent property of fix-point style representations such

> as IIO_VAL_INT_PLUS_NANO. Unless we get lucky and just happen to be

> able to represent the desired number exactly of course, but that tends

> to be both non-interesting and the exception.


I think I see where our misunderstanding comes from :-)

I understand that mathematically, IIO_VAL_FRACTIONAL is more accurate
than IIO_VAL_INT_PLUS_NANO for rational numbers given that it provides
an exact value to the IIO consumer.

My point is that the IIO core will represent IIO_VAL_FRACTIONAL scales
as fixed point when using iio_format_value().

Also, my current setup, uses drivers/hwmon/iio_hwmon.c which is worst
since it relies on iio_read_channel_processed() to get an integer scale.

(I wonder if it would make sense at some point to update iio_hwmon.c to
use iio_format_value() instead).

> Let's go back to the example from my response to the 8/14 patch, i.e.

> 5/32768 scaled by 3/10000. With the current code this yields

> 

> 15 / 327680000 (0.0000000457763671875)

> 

> Note, the above is /exact/. With IIO_VAL_INT_PLUS_NANO we instead get

> the truncated 0.000000045

> 

> The relative error introduced by the IIO_VAL_INT_PLUS_NANO conversion

> is almost 1.7% in this case. Sure, rounding instead of truncating

> would reduce that to 0.5%, but that's not really a significant

> improvement if you compare to /no/ error. Besides, there are many

> smaller numbers with even bigger relative conversion "noise".

> 

> And remember, this function is used to rescale the scale of the

> raw values. We are going to multiply the scale and the raw values

> at some point. If we have rounding errors in the scale, they will

> multiply with the raw values. It wouldn't look too good if something

> that should be able to reach 3V with a lot of accuracy (ca 26 bits)

> instead caps out at 2.94912V (or hits 3.014656V) because of accuracy

> issues with the scaling (1.7% too low or 0.5% too high).


I understand your point, but a device that has an IIO_VAL_FRACTIONAL
scale with *val=15 and *val2=327680000 will also show a scale of
0.000000045 in the sysfs interface.

Since other parts of the core already behave like this, I'm inclined to
say that this is a more general "issue", and that this kind of precision
loss would only affect consumers making direct use of the scaling
values. With all this, I wonder how careful we really have to be with
these extra digits.

> It's impossible to do better than exact, which is what we have now for

> IIO_VAL_FRACTIONAL and IIO_VAL_INT (for IIO_VAL_FRACTIONAL_LOG2, not

> so much...). At least as long as there's no overflow.


Right, but like I said above, depending on which path you take, that
value might not be exact in the end.

Thanks,
Liam

> 

> Cheers,

> Peter
Liam Beguin Sept. 11, 2021, 11:31 p.m. UTC | #16
On Mon, Aug 30, 2021 at 12:27:24PM +0100, Jonathan Cameron wrote:
> On Sun, 29 Aug 2021 00:41:21 -0400

> Liam Beguin <liambeguin@gmail.com> wrote:

> 

> > On Thu, Aug 26, 2021 at 11:53:14AM +0200, Peter Rosin wrote:

> > > On 2021-08-24 22:28, Liam Beguin wrote:  

> > > > On Mon Aug 23, 2021 at 00:18:55 +0200, Peter Rosin wrote:  

> > > >> [I started to write an answer to your plans in the v7 thread, but didn't

> > > >> have time to finish before v8 appeared...]

> > > >>

> > > >> On 2021-08-20 21:17, Liam Beguin wrote:  

> > > >>> From: Liam Beguin <lvb@xiphos.com>

> > > >>>

> > > >>> The approximation caused by integer divisions can be costly on smaller

> > > >>> scale values since the decimal part is significant compared to the

> > > >>> integer part. Switch to an IIO_VAL_INT_PLUS_NANO scale type in such

> > > >>> cases to maintain accuracy.  

> > > >>  

> > > > 

> > > > Hi Peter,

> > > > 

> > > > Thanks for taking time to look at this in detail again. I really

> > > > appreciate all the feedback you've provided.

> > > >   

> > > >> The conversion to int-plus-nano may also carry a cost of accuracy.

> > > >>

> > > >> 90/1373754273 scaled by 261/509 is 3.359e-8, the old code returns 3.348e-8,

> > > >> but the new one gets you 3.3e-8 (0.000000033, it simply cannot provide more

> > > >> digits). So, in this case you lose precision with the new code.

> > > >>

> > > >> Similar problem with 100 / 2^30 scaled by 3782/7000. It is 5.032e-8, the old

> > > >> code returns 5.029e-8, but the new one gets you the inferior 5.0e-8.

> > > >>  

> > > > 

> > > > I see what you mean here.

> > > > I added test cases with these values to see exactly what we get.  

> > > 

> > > Excellent!

> > >   

> > > > 

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 1000000

> > > > 

> > > >      real=0.000000000

> > > >  expected=0.000000033594

> > > > # iio_rescale_test_scale: not ok 42 - v8 - 90/1373754273 scaled by 261/509

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 1000000

> > > > 

> > > >      real=0.000000000

> > > >  expected=0.000000050318

> > > > # iio_rescale_test_scale: not ok 43 - v8 - 100/1073741824 scaled by 3782/7000

> > > > 

> > > > 

> > > > The main issue is that the first two examples return 0 which night be worst

> > > > that loosing a little precision.  

> > > 

> > > They shouldn't return zero?

> > > 

> > > Here's the new code quoted from the test robot (and assuming

> > > a 64-bit machine, thus ignoring the 32-bit problem on line 56).

> > > 

> > >     36		case IIO_VAL_FRACTIONAL:

> > >     37		case IIO_VAL_FRACTIONAL_LOG2:

> > >     38			tmp = (s64)*val * 1000000000LL;

> > >     39			tmp = div_s64(tmp, rescale->denominator);

> > >     40			tmp *= rescale->numerator;

> > >     41	

> > >     42			tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> > >     43			*val = tmp;

> > >     44	

> > >     45			/*

> > >     46			 * For small values, the approximation can be costly,

> > >     47			 * change scale type to maintain accuracy.

> > >     48			 *

> > >     49			 * 100 vs. 10000000 NANO caps the error to about 100 ppm.

> > >     50			 */

> > >     51			if (scale_type == IIO_VAL_FRACTIONAL)

> > >     52				tmp = *val2;

> > >     53			else

> > >     54				tmp = 1 << *val2;

> > >     55	  

> > >   > 56			 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {  

> > >     57				 *val = div_s64_rem(*val, tmp, &rem2);

> > >     58	

> > >     59				 *val2 = div_s64(rem, tmp);

> > >     60				 if (rem2)

> > >     61					 *val2 += div_s64(rem2 * 1000000000LL, tmp);

> > >     62	

> > >     63				 return IIO_VAL_INT_PLUS_NANO;

> > >     64			 }

> > >     65	

> > >     66			return scale_type;

> > > 

> > > When I go through the above manually, I get:

> > > 

> > > line 

> > > 38: tmp = 90000000000    ; 90 * 1000000000

> > > 39: tmp = 176817288      ; 90000000000 / 509

> > > 40: tmp = 46149312168    ; 176817288 * 261

> > > 42: rem = 149312168      ; 46149312168 % 1000000000

> > >     tmp = 46             ; 46149312168 / 1000000000

> > > 43: *val = 46

> > > 51: if (<fractional>) [yes]

> > > 52: tmp = 1373754273

> > > 56: if (149312168 > 10000000 && 46/1373754273 < 100) [yes && yes]

> > > 57: rem2 = 46            ; 46 % 1373754273

> > >     *val = 0             ; 46 / 1373754273

> > > 59: *val2 = 0            ; 149312168 / 1373754273

> > > 60: if 46 [yes]

> > > 61: *val2 = 33           ; 0 + 46 * 1000000000 / 1373754273

> > > 63: return <int-plus-nano> [0.000000033]

> > > 

> > > and

> > > 

> > > line 

> > > 38: tmp = 100000000000   ; 100 * 1000000000

> > > 39: tmp = 14285714       ; 100000000000 / 7000

> > > 40: tmp = 54028570348    ; 176817288 * 3782

> > > 42: rem = 28570348       ; 54028570348 % 1000000000

> > >     tmp = 54             ; 54028570348 / 1000000000

> > > 43: *val = 54

> > > 51: if (<fractional>) [no]

> > > 54: tmp = 1073741824     ; 1 << 30

> > > 56: if (28570348 > 10000000 && 54/1073741824 < 100) [yes && yes]

> > > 57: rem2 = 54            ; 54 % 1073741824

> > >     *val = 0             ; 54 / 1073741824

> > > 59: *val2 = 0            ; 28570348 / 1073741824

> > > 60: if 46 [yes]

> > > 61: *val2 = 50           ; 0 + 54 * 1000000000 / 1073741824

> > > 63: return <int-plus-nano> [0.000000050]

> > > 

> > > Why do you get zero, what am I missing?  

> > 

> > So... It turns out, I swapped schan and rescaler values which gives us:

> > 

> > numerator = 90

> > denominator = 1373754273

> > schan_val = 261

> > schan_val2 = 509

> > 

> > line

> > 38: tmp = 261000000000   ; 261 * 1000000000

> > 39: tmp = 189            ; 261000000000 / 1373754273

> > 40: tmp = 17010          ; 189 * 90

> > 42: rem = 17010          ; 17010 % 1000000000

> >     tmp = 0              ; 17010 / 1000000000

> > 43: *val = 0

> > 51: if (<fractional>) [yes]

> > 52: tmp = 509

> > 56: if (17010 > 10000000 && 0/509 < 100) [no && yes]

> > 66: *val = 0

> >     *val2 = 509

> >     return <fractional> [0.000000000]

> > 

> > Swapping back the values, I get the same results as you!

> > 

> > Also, replacing line 56 from the snippet above with

> > 

> > 	- if (abs(rem) > 10000000 && abs(div64_s64(*val, tmp)) < 100) {

> > 	+ if (abs(rem)) {

> > 

> > Fixes these precision errors. It also prevents us from returning

> > different scales if we swap the two divisions (schan and rescaler

> > parameters).

> > 

> > >   

> > > > At the same time, I wonder how "real" these values would be. Having such a

> > > > small scale would mean having a large raw value. With 16-bits of resolution,

> > > > that would still give about (1 << 16) * 3.3594e-08 = 0.002201616 mV.  

> > > 

> > > If we cap at 16 bits it sounds as if we probably erase some precision

> > > provided by 24-bit ADCs. We have drivers for those. I didn't really

> > > dig that deep in the driver offerings, but I did find a AD7177 ADC

> > > (but no driver) which is 32-bit. If we don't have any 32-bit ADC driver

> > > yet, it's only a matter of time, methinks. I have personally worked

> > > with 24-bit DACs, and needed every last bit...

> > >   

> > 

> > I was only using 16-bits as an example, but I guess you're right, these

> > values do start to make sense when you're looking at 24-bit and 32-bit

> > ADCs.

> 

> I'd be tempted to be cynical on this.  High resolution devices are rare

> as frankly building a low enough noise board to take advantage is hard.

> Known users of the AFE infrastructure also rare and so as long as we don't

> break any 'current' users via loss of accuracy I'm not that bothered if

> we aren't perfect for 32 bit devices. 


Hi Jonathan,

> I'm guessing we can sometimes sanity check if an overflow will occur

> at probe?  Perhaps do that where possible and print something obvious

> to the log.  Then someone who needs it can figure out the magic maths

> to do this for those high resolution devices!


Good point, I'll see if I can add a check that could help for this.

> > > > We could try to get more precision out of the first division

> > > > 

> > > > 	tmp = (s64)*val * 1000000000LL;

> > > > 	tmp = div_s64(tmp, rescale->denominator);

> > > > 	tmp *= rescale->numerator;

> > > > 	tmp = div_s64_rem(tmp, 1000000000LL, &rem);

> > > > 

> > > > But then, we'd be more likely to overflow. What would be a good middle

> > > > ground?  

> > > 

> > > I don't think we can settle for anything that makes any existing case

> > > worse. That's a regression waiting to happen, and what to do then?

> > >   

> > 

> > Agreed, and looking at this more, there's still ways to improve without

> > having to compromise.

> > Hopefully adding the test suite will make it easier to catch potential

> > regressions in the future :-)

> 

> Absolutely.  Have that test suite is great :)

> 

> > 

> > > >> I'm also wondering if it is wise to not always return the same scale type?

> > > >> What happens if we want to extend this driver to scale a buffered channel?

> > > >> Honest question! I don't know, but I fear that this patch may make that

> > > >> step more difficult to take??  

> > > > 

> > > > That's a fair point, I didn't know it could be a problem to change

> > > > scale.  

> > > 

> > > I don't *know* either? But it would not be completely alien to me if

> > > the buffered case assumes "raw" numbers, and that there is little room

> > > for "meta-data" with each sample.

> 

> Spot on.  Meta data is a pain so an early design decision in IIO was to

> not support it in band.

> 

> > >   

> > > >>

> > > >> Jonathan, do you have any input on that?

> > > >>

> > > >> Some more examples of problematic properties of this patch:

> > > >>

> > > >> 21837/24041 scaled by 427/24727 is 0.01568544672, you get 0.015685446. Ok.

> > > >> But if you reduce the input number, gcd(21837, 24041) -> 29, you have:

> > > >> 753/829 scaled by 427/24727 which still is 0.01568544672 of course, but in

> > > >> this case you get 0.01568154403. Which is less precise. It is unfortunate

> > > >> that input that should be easier to scale may yield worse results.  

> > > > 

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 0

> > > > 

> > > >      real=0.015685445

> > > >  expected=0.015685446719

> > > > # iio_rescale_test_scale: not ok 44 - v8 - 21837/24041 scaled by 427/24727

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 0

> > > > 

> > > >      real=0.015685445

> > > >  expected=0.015685446719

> > > > # iio_rescale_test_scale: not ok 45 - v8 - 753/829 scaled by 427/24727

> > > > 

> > > > It seems like both cases are rounded and give the same result. I do get

> > > > your point though, values that could be simplified might loose more

> > > > precision because of this change in scale type.  

> > > 

> > > I aimed at this:

> > > 

> > > line

> > > 38: tmp = 21837000000000 ; 21837 * 1000000000

> > > 39: tmp = 883123710      ; 21837000000000 / 24727

> > > 40: tmp = 377093824170   ; 883123710 * 427

> > > 42: rem = 93824170       ; 377093824170 % 1000000000

> > >     tmp = 377            ; 377093824170 / 1000000000

> > > 43: *val = 377

> > > 51: if (<fractional>) [yes]

> > > 52: tmp = 24041

> > > 56: if (149312168 > 10000000 && 377/24041 < 100) [yes && yes]

> > > 57: rem2 = 377           ; 377 % 24041

> > >     *val = 0             ; 377 / 24041

> > > 59: *val2 = 3902         ; 93824170 / 24041

> > > 60: if 377 [yes]

> > > 61: *val2 = 15685446     ; 3902 + 377 * 1000000000 / 24041

> > > 63: return <int-plus-nano> [0.0015685446]

> > > 

> > > Why does the test output a 5 at the end and not a 6? It's all

> > > integer arithmetic so there is no room for rounding issues.

> > > 

> > > and

> > > 

> > > line 

> > > 38: tmp = 753000000000   ; 753 * 1000000000

> > > 39: tmp = 30452541       ; 753000000000 / 24727

> > > 40: tmp = 13003235007    ; 30452541 * 427

> > > 42: rem = 3235007        ; 13003235007 % 1000000000

> > >     tmp = 13             ; 13003235007 / 1000000000

> > > 43: *val = 13

> > > 51: if (<fractional>) [yes]

> > > 52: tmp = 829

> > > 56: if (3235007 > 10000000 && 13/829 < 100) [no && yes]

> > > 66: return <fractional> [13/829 ~= 0.015681544]

> > > 

> > > 0.015681544 is pretty different from 0.015685446

> > > 

> > > Again, I don't understand what's going on.

> > >   

> > > >>

> > > >> 760/1373754273 scaled by 427/2727 is 8.662580e-8, and 8.662393e-8 is

> > > >> returned. Which is perhaps not great accuracy, but such is life. However.

> > > >> 761/1373754273 scaled by 427/2727 is 8.673978e-8, which is of course

> > > >> greater, but 8.6e-8 is returned. Which is less than what was returned for

> > > >> the smaller 760/1373754273 value above.  

> > > > 

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 1000000

> > > > 

> > > >      real=0.000000000

> > > >  expected=0.000000086626

> > > > # iio_rescale_test_scale: not ok 46 - v8 - 760/1373754273 scaled by 427/2727

> > > > Expected rel_ppm < 0, but

> > > >     rel_ppm == 1000000

> > > > 

> > > >      real=0.000000000

> > > >  expected=0.000000086740

> > > > # iio_rescale_test_scale: not ok 47 - v8 - 761/1373754273 scaled by 427/2727

> > > > 

> > > > We fall into the same case as the first two examples where the real value is

> > > > null.  

> > > 

> > > I aimed at

> > > 

> > > line

> > > 38: tmp = 760000000000   ; 760 * 1000000000

> > > 39: tmp = 278694536      ; 760000000000 / 2727

> > > 40: tmp = 119002566872   ; 278694536 * 427

> > > 42: rem = 2566872        ; 119002566872 % 1000000000

> > >     tmp = 119            ; 119002566872 / 1000000000

> > > 43: *val = 119

> > > 51: if (<fractional>) [yes]

> > > 52: tmp = 1373754273

> > > 56: if (2566872 > 10000000 && 119/1373754273 < 100) [no && yes]

> > > 66: return <fractional> [119/1373754273 ~= 0.000000086624]

> > > 

> > > and

> > > 

> > > line

> > > 38: tmp = 761000000000   ; 761 * 1000000000

> > > 39: tmp = 279061239      ; 761000000000 / 2727

> > > 40: tmp = 119159149053   ; 279061239 * 427

> > > 42: rem = 159149053      ; 119159149053 % 1000000000

> > >     tmp = 119            ; 119159149053 / 1000000000

> > > 43: *val = 119

> > > 51: if (<fractional>) [yes]

> > > 52: tmp = 1373754273

> > > 56: if (159149053 > 10000000 && 119/1373754273 < 100) [yes && yes]

> > > 57: rem2 = 119           ; 119 % 1373754273

> > >     *val = 0             ; 119 / 1373754273

> > > 59: *val2 = 0            ; 159149053 / 1373754273

> > > 60: if 119 [yes]

> > > 61: *val2 = 86           ; 0 + 119 * 1000000000 / 1373754273

> > > 63: return <int-plus-nano> [0.000000086]

> > >   

> > > > Considering these null values and the possible issue of not always having the

> > > > same scale type, would it be better to always return an IIO_VAL_INT_PLUS_NANO

> > > > scale?  

> > > 

> > > No, that absolutely kills the precision for small values that are much

> > > better off as-is. The closer you get to zero, the more the conversion

> > > to int-plus-nano hurts, relatively speaking.  

> > 

> > I'm not sure I understand what you mean. The point of switching to

> > IIO_VAL_INT_PLUS_NANO at the moment is to get more precision on small

> > values. Am I missing something?

> > 

> > >   

> > > >>

> > > >> Some of these objections are related to what I talked about in v7, i.e.:

> > > >>

> > > >>     Also, changing the calculation so that you get more precision whenever that is

> > > >>     possible feels dangerous. I fear linearity breaks and that bigger input cause

> > > >>     smaller output due to rounding if the bigger value has to be rounded down, but

> > > >>     that this isn't done carefully enough. I.e. attempting to return an exact

> > > >>     fraction and only falling back to the old code when that is not possible is

> > > >>     still not safe since the old code isn't careful enough about rounding. I think

> > > >>     it is really important that bigger input cause bigger (or equal) output.

> > > >>     Otherwise you might trigger instability in feedback loops should a rescaler be

> > > >>     involved in a some regulator function.  

> > > > 

> > > > I think I didn't read this closely enought the first time around. I agree that

> > > > bigger inputs should cause bigger outputs, especially with these rounding

> > > > errors. My original indention was to have all scales withing a tight margin,

> > > > that's why I ended up going with ppm for the test cases.

> > > >   

> > > >>

> > > >> Sadly, I see no elegant solution to your problem.

> > > >>

> > > >> One way forward may be to somehow provide information on the expected

> > > >> input range, and then determine the scaling method based on that

> > > >> instead of the individual values. But, as indicated, there's no real

> > > >> elegance in that. It can't be automated...  

> > > > 

> > > > I guess the issue with that is that unless it's a user parameter, we're

> > > > always going go have these little islands you mentioned in v7...

> > > > 

> > > > Would it be viable to guaranty a MICRO precision instead of NANO, and

> > > > not have the range parameter?  

> > > 

> > > I don't get what you mean here? Returning int-plus-micro can't be it,

> > > since that would be completely pointless and only make it easier to

> > > trigger accuracy problems of the conversion. However, I feel that any

> > > attempt to shift digits but still having the same general approch will

> > > just change the size and position of the islands, and thus not fix the

> > > fundamental problematic border between land and water.  

> > 

> > My apologies, discard this last comment. I was suggesting to guaranty

> > less precision, but consistent over the full range. I don't believe

> > that's a viable option.

> 

> Keep up the good work!  I'm looking forward to this going in (hopefully

> shortly!)


Thanks again for the encouragement, it's been really nice working with
all of you!

Liam

> Jonathan

> 

> > 

> > Thanks again for your time,

> > Liam

> > 

> > > 

> > > Cheers,

> > > Peter


*snip*
diff mbox series

Patch

diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c
index c408c4057c08..7304306c9806 100644
--- a/drivers/iio/afe/iio-rescale.c
+++ b/drivers/iio/afe/iio-rescale.c
@@ -22,7 +22,7 @@  int rescale_process_scale(struct rescale *rescale, int scale_type,
 			  int *val, int *val2)
 {
 	s64 tmp;
-	s32 rem;
+	s32 rem, rem2;
 	u32 mult;
 	u32 neg;
 
@@ -38,8 +38,31 @@  int rescale_process_scale(struct rescale *rescale, int scale_type,
 		tmp = (s64)*val * 1000000000LL;
 		tmp = div_s64(tmp, rescale->denominator);
 		tmp *= rescale->numerator;
-		tmp = div_s64(tmp, 1000000000LL);
+
+		tmp = div_s64_rem(tmp, 1000000000LL, &rem);
 		*val = tmp;
+
+		/*
+		 * For small values, the approximation can be costly,
+		 * change scale type to maintain accuracy.
+		 *
+		 * 100 vs. 10000000 NANO caps the error to about 100 ppm.
+		 */
+		if (scale_type == IIO_VAL_FRACTIONAL)
+			tmp = *val2;
+		else
+			tmp = 1 << *val2;
+
+		 if (abs(rem) > 10000000 && abs(*val / tmp) < 100) {
+			 *val = div_s64_rem(*val, tmp, &rem2);
+
+			 *val2 = div_s64(rem, tmp);
+			 if (rem2)
+				 *val2 += div_s64(rem2 * 1000000000LL, tmp);
+
+			 return IIO_VAL_INT_PLUS_NANO;
+		 }
+
 		return scale_type;
 	case IIO_VAL_INT_PLUS_NANO:
 	case IIO_VAL_INT_PLUS_MICRO: