diff mbox series

tty: serial: qcom-geni-serial: Fix get_clk_div_rate() which otherwise could return a sub-optimal clock rate.

Message ID 1655834239-20812-1-git-send-email-quic_vnivarth@quicinc.com
State New
Headers show
Series tty: serial: qcom-geni-serial: Fix get_clk_div_rate() which otherwise could return a sub-optimal clock rate. | expand

Commit Message

Vijaya Krishna Nivarthi June 21, 2022, 5:57 p.m. UTC
In the logic around call to clk_round_rate, for some corner conditions,
get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
exact clock rate was not found lowest clock was being returned.

Search for suitable clock rate in 2 steps
a) exact match or within 2% tolerance
b) within 5% tolerance
This also takes care of corner conditions.

Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart frequency table. Instead, find suitable frequency with call to clk_round_rate")
Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
---
 drivers/tty/serial/qcom_geni_serial.c | 134 ++++++++++++++++++++++++++--------
 1 file changed, 102 insertions(+), 32 deletions(-)

Comments

kernel test robot June 22, 2022, 9:51 p.m. UTC | #1
Hi Vijaya,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tty/tty-testing]
[also build test ERROR on linus/master v5.19-rc3 next-20220622]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Vijaya-Krishna-Nivarthi/tty-serial-qcom-geni-serial-Fix-get_clk_div_rate-which-otherwise-could-return-a-sub-optimal-clock-rate/20220622-015826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty-testing
config: arm-randconfig-r036-20220622 (https://download.01.org/0day-ci/archive/20220623/202206230511.W02MMaf8-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 8b8d126598ce7bd5243da7f94f69fa1104288bee)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/intel-lab-lkp/linux/commit/668659f1481053090a9dbe9c83bd769de527a5c2
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vijaya-Krishna-Nivarthi/tty-serial-qcom-geni-serial-Fix-get_clk_div_rate-which-otherwise-could-return-a-sub-optimal-clock-rate/20220622-015826
        git checkout 668659f1481053090a9dbe9c83bd769de527a5c2
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> ld.lld: error: undefined symbol: __aeabi_uldivmod
   >>> referenced by qcom_geni_serial.c
   >>>               tty/serial/qcom_geni_serial.o:(find_clk_rate_in_tol) in archive drivers/built-in.a
   >>> did you mean: __aeabi_uidivmod
   >>> defined in: arch/arm/lib/lib.a(lib1funcs.o)
Stephen Boyd June 22, 2022, 11:06 p.m. UTC | #2
Quoting Vijaya Krishna Nivarthi (2022-06-21 10:57:19)
> In the logic around call to clk_round_rate, for some corner conditions,

clk_round_rate(), not the parethesis to indicate it's a function.

> get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
> exact clock rate was not found lowest clock was being returned.
>
> Search for suitable clock rate in 2 steps
> a) exact match or within 2% tolerance
> b) within 5% tolerance
> This also takes care of corner conditions.
>
> Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart frequency table. Instead, find suitable frequency with call to clk_round_rate")
> Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
> ---
>  drivers/tty/serial/qcom_geni_serial.c | 134 ++++++++++++++++++++++++++--------
>  1 file changed, 102 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 2e23b65..8d247c1 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -943,52 +943,123 @@ static int qcom_geni_serial_startup(struct uart_port *uport)
>         return 0;
>  }
>
> -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> -                       unsigned int sampling_rate, unsigned int *clk_div)
> +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk,
> +                       unsigned int *clk_div, unsigned int percent_tol, bool *exact_match)

Do we really need to pass in a bool pointer here for 'exact_match'?
Can't we calculate the exact match value in the callsite and simply pass
a bool (not pointer) to constrain the logic in this function?

>  {
> +       unsigned long freq;
> +       unsigned long div, maxdiv, new_div;
> +       unsigned long long mult;

I think u64 is used more often than unsigned long long.

>         unsigned long ser_clk;
> -       unsigned long desired_clk;
> -       unsigned long freq, prev;
> -       unsigned long div, maxdiv;
> -       int64_t mult;
> -
> -       desired_clk = baud * sampling_rate;
> -       if (!desired_clk) {
> -               pr_err("%s: Invalid frequency\n", __func__);
> -               return 0;
> -       }
> +       unsigned long test_freq, offset, new_freq;
>
> +       ser_clk = 0;
>         maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
> -       prev = 0;
> +       div = 1;
>
> -       for (div = 1; div <= maxdiv; div++) {
> -               mult = div * desired_clk;
> -               if (mult > ULONG_MAX)
> +       while (div <= maxdiv) {
> +               mult = (unsigned long long)div * desired_clk;

Cast to u64?

> +               if (mult != (unsigned long)mult)

What is this checking for? Do we expect the rate to be larger than
32-bits on 32-bit machines?

>                         break;
>
> -               freq = clk_round_rate(clk, (unsigned long)mult);
> -               if (!(freq % desired_clk)) {
> -                       ser_clk = freq;
> -                       break;
> +               /*
> +                * Loop requesting a freq within tolerance and possibly exact freq.
> +                *
> +                * We'll keep track of the lowest freq inexact match we found
> +                * but always try to find a perfect match. NOTE: this algorithm
> +                * could miss a slightly better freq if there's more than one
> +                * freq between (freq - offset) and (freq) but (freq) can't be made
> +                * exactly, but that's OK.
> +                *
> +                * This absolutely relies on the fact that the Qualcomm clock
> +                * driver always rounds up.
> +                * We make use of exact_match as an I/O param.
> +                */
> +
> +               /* look only for exact match if within tolerance is already found */
> +               if (ser_clk)
> +                       offset = 0;
> +               else
> +                       offset = (mult * percent_tol) / 100;

This needs to use div_u64() to be compatible with 32-bit machines.

> +
> +               test_freq = mult - offset;
> +               freq = clk_round_rate(clk, test_freq);
> +
> +               /*
> +                * A dead-on freq is an insta-win, look for it only in 1st run
> +                */
> +               if (*exact_match) {
> +                       if (!(freq % desired_clk)) {
> +                               ser_clk = freq;
> +                               *clk_div = freq / desired_clk;
> +                               return ser_clk;
> +                       }
> +               }
> +
> +               if (!ser_clk) {
> +                       new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
> +                       new_freq = new_div * desired_clk;
> +                       offset = (new_freq * percent_tol) / 100;
> +
> +                       if (new_freq - offset <= freq && freq <= new_freq + offset) {
> +                               /* Save the first (lowest freq) within tolerance */
> +                               ser_clk = freq;
> +                               *clk_div = new_div;
> +                               /* no more search for exact match required in 2nd run */
> +                               if (!(*exact_match))
> +                                       break;
> +                       }
>                 }
>
> -               if (!prev)
> -                       ser_clk = freq;
> -               else if (prev == freq)
> +               div = freq / desired_clk + 1;
> +
> +               /*
> +                * Only time clock framework doesn't round up is if
> +                * we're past the max clock rate. We're done searching
> +                * if that's the case.
> +                */
> +               if (freq < test_freq)
>                         break;
> +       }
> +
> +       *exact_match = false;
> +       return ser_clk;
> +}
> +
> +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> +                       unsigned int sampling_rate, unsigned int *clk_div)
> +{
> +       unsigned long ser_clk;
> +       unsigned long desired_clk;
> +       unsigned long desired_tol;
> +       bool exact_match;
>
> -               prev = freq;
> +       desired_clk = baud * sampling_rate;
> +       if (!desired_clk) {
> +               pr_err("%s: Invalid frequency\n", __func__);
> +               return 0;
>         }
>
> -       if (!ser_clk) {
> -               pr_err("%s: Can't find matching DFS entry for baud %d\n",
> -                                                               __func__, baud);
> +       /* try to find exact clock rate or within 2% tolerance */
> +       ser_clk = 0;
> +       exact_match = true;
> +       desired_tol = 2;
> +
> +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +       if (ser_clk) {
> +               if (!exact_match)
> +                       pr_warn("Cannot find exact match clk_rate, using one within 2 percent tolerance\n");

Should this be a pr_warn_once()? Because otherwise users are going to
see this error potentially quite often if tolerances can't be achieved.

>                 return ser_clk;
>         }
>
> -       *clk_div = ser_clk / desired_clk;
> -       if (!(*clk_div))
> -               *clk_div = 1;
> +       /* try within 5% tolerance now, no need to look for exact match */
> +       exact_match = false;
> +       desired_tol = 5;
> +
> +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +       if (ser_clk)
> +               pr_warn("Cannot find exact match clk_rate, using one within 5 percent tolerance\n");

This is a debug print?

> +       else
> +               pr_err("Cannot find suitable clk_rate, giving up\n");
>
>         return ser_clk;
>  }
kernel test robot June 23, 2022, 12:25 a.m. UTC | #3
Hi Vijaya,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tty/tty-testing]
[also build test ERROR on linus/master v5.19-rc3 next-20220622]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Vijaya-Krishna-Nivarthi/tty-serial-qcom-geni-serial-Fix-get_clk_div_rate-which-otherwise-could-return-a-sub-optimal-clock-rate/20220622-015826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty-testing
config: arm-randconfig-r016-20220622 (https://download.01.org/0day-ci/archive/20220623/202206230849.dxzRCvaU-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/668659f1481053090a9dbe9c83bd769de527a5c2
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vijaya-Krishna-Nivarthi/tty-serial-qcom-geni-serial-Fix-get_clk_div_rate-which-otherwise-could-return-a-sub-optimal-clock-rate/20220622-015826
        git checkout 668659f1481053090a9dbe9c83bd769de527a5c2
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arm-linux-gnueabi-ld: arm-linux-gnueabi-ld: DWARF error: could not find abbrev number 121
   drivers/tty/serial/qcom_geni_serial.o: in function `find_clk_rate_in_tol':
>> qcom_geni_serial.c:(.text+0x764): undefined reference to `__aeabi_uldivmod'
Jiri Slaby (SUSE) June 23, 2022, 6:07 a.m. UTC | #4
On 21. 06. 22, 19:57, Vijaya Krishna Nivarthi wrote:
> In the logic around call to clk_round_rate, for some corner conditions,
> get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
> exact clock rate was not found lowest clock was being returned.
> 
> Search for suitable clock rate in 2 steps
> a) exact match or within 2% tolerance
> b) within 5% tolerance
> This also takes care of corner conditions.
> 
> Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart frequency table. Instead, find suitable frequency with call to clk_round_rate")

Hmm, provided the complexity, was this worth it -- how many typos/bugs 
can be in such complex and twisted functions?

The original intention was not to touch the driver when new HW arrives. 
Now it looks like you'd be chasing corner cases like these for quite 
some releases.

So going back in time, reconsidering the whole thing: how often do you 
expect the original rate table would need to be updated?

NACK

in any way -- see my comment below -- if you really want to go this 
path, you'd need to split this.

> Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
> ---
>   drivers/tty/serial/qcom_geni_serial.c | 134 ++++++++++++++++++++++++++--------
>   1 file changed, 102 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 2e23b65..8d247c1 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -943,52 +943,123 @@ static int qcom_geni_serial_startup(struct uart_port *uport)
>   	return 0;
>   }
>   
> -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> -			unsigned int sampling_rate, unsigned int *clk_div)
> +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk,
> +			unsigned int *clk_div, unsigned int percent_tol, bool *exact_match)
>   {
> +	unsigned long freq;
> +	unsigned long div, maxdiv, new_div;
> +	unsigned long long mult;
>   	unsigned long ser_clk;
> -	unsigned long desired_clk;
> -	unsigned long freq, prev;
> -	unsigned long div, maxdiv;
> -	int64_t mult;
> -
> -	desired_clk = baud * sampling_rate;
> -	if (!desired_clk) {
> -		pr_err("%s: Invalid frequency\n", __func__);
> -		return 0;
> -	}
> +	unsigned long test_freq, offset, new_freq;
>   
> +	ser_clk = 0;
>   	maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
> -	prev = 0;
> +	div = 1;
>   
> -	for (div = 1; div <= maxdiv; div++) {
> -		mult = div * desired_clk;
> -		if (mult > ULONG_MAX)
> +	while (div <= maxdiv) {
> +		mult = (unsigned long long)div * desired_clk;
> +		if (mult != (unsigned long)mult)
>   			break;
>   
> -		freq = clk_round_rate(clk, (unsigned long)mult);
> -		if (!(freq % desired_clk)) {
> -			ser_clk = freq;
> -			break;
> +		/*
> +		 * Loop requesting a freq within tolerance and possibly exact freq.
> +		 *
> +		 * We'll keep track of the lowest freq inexact match we found
> +		 * but always try to find a perfect match. NOTE: this algorithm
> +		 * could miss a slightly better freq if there's more than one
> +		 * freq between (freq - offset) and (freq) but (freq) can't be made
> +		 * exactly, but that's OK.
> +		 *
> +		 * This absolutely relies on the fact that the Qualcomm clock
> +		 * driver always rounds up.
> +		 * We make use of exact_match as an I/O param.
> +		 */
> +
> +		/* look only for exact match if within tolerance is already found */
> +		if (ser_clk)
> +			offset = 0;
> +		else
> +			offset = (mult * percent_tol) / 100;
> +
> +		test_freq = mult - offset;
> +		freq = clk_round_rate(clk, test_freq);
> +
> +		/*
> +		 * A dead-on freq is an insta-win, look for it only in 1st run
> +		 */
> +		if (*exact_match) {
> +			if (!(freq % desired_clk)) {
> +				ser_clk = freq;
> +				*clk_div = freq / desired_clk;
> +				return ser_clk;
> +			}
> +		}
> +
> +		if (!ser_clk) {
> +			new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
> +			new_freq = new_div * desired_clk;
> +			offset = (new_freq * percent_tol) / 100;
> +
> +			if (new_freq - offset <= freq && freq <= new_freq + offset) {
> +				/* Save the first (lowest freq) within tolerance */
> +				ser_clk = freq;
> +				*clk_div = new_div;
> +				/* no more search for exact match required in 2nd run */
> +				if (!(*exact_match))
> +					break;
> +			}
>   		}
>   
> -		if (!prev)
> -			ser_clk = freq;
> -		else if (prev == freq)
> +		div = freq / desired_clk + 1;
> +
> +		/*
> +		 * Only time clock framework doesn't round up is if
> +		 * we're past the max clock rate. We're done searching
> +		 * if that's the case.
> +		 */
> +		if (freq < test_freq)
>   			break;
> +	}
> +
> +	*exact_match = false;
> +	return ser_clk;
> +}
> +
> +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> +			unsigned int sampling_rate, unsigned int *clk_div)

This cannot be reviewed properly. Care to split this into 2-3 patches? 
Looks at the nesting and the complexity, it also looks like you need 
more helper functions.

> +{
> +	unsigned long ser_clk;
> +	unsigned long desired_clk;
> +	unsigned long desired_tol;
> +	bool exact_match;
>   
> -		prev = freq;
> +	desired_clk = baud * sampling_rate;
> +	if (!desired_clk) {
> +		pr_err("%s: Invalid frequency\n", __func__);
> +		return 0;
>   	}
>   
> -	if (!ser_clk) {
> -		pr_err("%s: Can't find matching DFS entry for baud %d\n",
> -								__func__, baud);
> +	/* try to find exact clock rate or within 2% tolerance */
> +	ser_clk = 0;
> +	exact_match = true;
> +	desired_tol = 2;
> +
> +	ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +	if (ser_clk) {
> +		if (!exact_match)
> +			pr_warn("Cannot find exact match clk_rate, using one within 2 percent tolerance\n");
>   		return ser_clk;
>   	}
>   
> -	*clk_div = ser_clk / desired_clk;
> -	if (!(*clk_div))
> -		*clk_div = 1;
> +	/* try within 5% tolerance now, no need to look for exact match */
> +	exact_match = false;
> +	desired_tol = 5;
> +
> +	ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +	if (ser_clk)
> +		pr_warn("Cannot find exact match clk_rate, using one within 5 percent tolerance\n");
> +	else
> +		pr_err("Cannot find suitable clk_rate, giving up\n");
>   
>   	return ser_clk;
>   }
> @@ -1021,8 +1092,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
>   	if (ver >= QUP_SE_VERSION_2_5)
>   		sampling_rate /= 2;
>   
> -	clk_rate = get_clk_div_rate(port->se.clk, baud,
> -		sampling_rate, &clk_div);
> +	clk_rate = get_clk_div_rate(port->se.clk, baud, sampling_rate, &clk_div);
>   	if (!clk_rate)
>   		goto out_restart_rx;
>   

thanks,
Doug Anderson June 23, 2022, 11:21 p.m. UTC | #5
Hi,

On Tue, Jun 21, 2022 at 10:57 AM Vijaya Krishna Nivarthi
<quic_vnivarth@quicinc.com> wrote:
>
> In the logic around call to clk_round_rate, for some corner conditions,
> get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
> exact clock rate was not found lowest clock was being returned.
>
> Search for suitable clock rate in 2 steps
> a) exact match or within 2% tolerance
> b) within 5% tolerance
> This also takes care of corner conditions.
>
> Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart frequency table. Instead, find suitable frequency with call to clk_round_rate")
> Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
> ---
>  drivers/tty/serial/qcom_geni_serial.c | 134 ++++++++++++++++++++++++++--------
>  1 file changed, 102 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 2e23b65..8d247c1 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -943,52 +943,123 @@ static int qcom_geni_serial_startup(struct uart_port *uport)
>         return 0;
>  }
>
> -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> -                       unsigned int sampling_rate, unsigned int *clk_div)
> +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk,
> +                       unsigned int *clk_div, unsigned int percent_tol, bool *exact_match)
>  {
> +       unsigned long freq;
> +       unsigned long div, maxdiv, new_div;
> +       unsigned long long mult;
>         unsigned long ser_clk;
> -       unsigned long desired_clk;
> -       unsigned long freq, prev;
> -       unsigned long div, maxdiv;
> -       int64_t mult;
> -
> -       desired_clk = baud * sampling_rate;
> -       if (!desired_clk) {
> -               pr_err("%s: Invalid frequency\n", __func__);
> -               return 0;
> -       }
> +       unsigned long test_freq, offset, new_freq;
>
> +       ser_clk = 0;
>         maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
> -       prev = 0;
> +       div = 1;
>
> -       for (div = 1; div <= maxdiv; div++) {
> -               mult = div * desired_clk;
> -               if (mult > ULONG_MAX)
> +       while (div <= maxdiv) {
> +               mult = (unsigned long long)div * desired_clk;
> +               if (mult != (unsigned long)mult)
>                         break;
>
> -               freq = clk_round_rate(clk, (unsigned long)mult);
> -               if (!(freq % desired_clk)) {
> -                       ser_clk = freq;
> -                       break;
> +               /*
> +                * Loop requesting a freq within tolerance and possibly exact freq.
> +                *
> +                * We'll keep track of the lowest freq inexact match we found
> +                * but always try to find a perfect match. NOTE: this algorithm
> +                * could miss a slightly better freq if there's more than one
> +                * freq between (freq - offset) and (freq) but (freq) can't be made
> +                * exactly, but that's OK.
> +                *
> +                * This absolutely relies on the fact that the Qualcomm clock
> +                * driver always rounds up.
> +                * We make use of exact_match as an I/O param.
> +                */
> +
> +               /* look only for exact match if within tolerance is already found */
> +               if (ser_clk)
> +                       offset = 0;
> +               else
> +                       offset = (mult * percent_tol) / 100;
> +
> +               test_freq = mult - offset;
> +               freq = clk_round_rate(clk, test_freq);
> +
> +               /*
> +                * A dead-on freq is an insta-win, look for it only in 1st run
> +                */
> +               if (*exact_match) {
> +                       if (!(freq % desired_clk)) {
> +                               ser_clk = freq;
> +                               *clk_div = freq / desired_clk;
> +                               return ser_clk;
> +                       }
> +               }

The "*exact_match" if test isn't needed here. It's not saving you any
significant amount of time. You're still doing an "if" test, right?
...so you're basically saving a mod operation by adding a pointer
dereference and complexity? I don't think that's the right tradeoff.


> +               if (!ser_clk) {
> +                       new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
> +                       new_freq = new_div * desired_clk;
> +                       offset = (new_freq * percent_tol) / 100;
> +
> +                       if (new_freq - offset <= freq && freq <= new_freq + offset) {
> +                               /* Save the first (lowest freq) within tolerance */
> +                               ser_clk = freq;
> +                               *clk_div = new_div;
> +                               /* no more search for exact match required in 2nd run */
> +                               if (!(*exact_match))
> +                                       break;
> +                       }
>                 }
>
> -               if (!prev)
> -                       ser_clk = freq;
> -               else if (prev == freq)
> +               div = freq / desired_clk + 1;
> +
> +               /*
> +                * Only time clock framework doesn't round up is if
> +                * we're past the max clock rate. We're done searching
> +                * if that's the case.
> +                */
> +               if (freq < test_freq)
>                         break;
> +       }
> +
> +       *exact_match = false;
> +       return ser_clk;
> +}
> +
> +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> +                       unsigned int sampling_rate, unsigned int *clk_div)
> +{
> +       unsigned long ser_clk;
> +       unsigned long desired_clk;
> +       unsigned long desired_tol;
> +       bool exact_match;
>
> -               prev = freq;
> +       desired_clk = baud * sampling_rate;
> +       if (!desired_clk) {
> +               pr_err("%s: Invalid frequency\n", __func__);
> +               return 0;
>         }
>
> -       if (!ser_clk) {
> -               pr_err("%s: Can't find matching DFS entry for baud %d\n",
> -                                                               __func__, baud);
> +       /* try to find exact clock rate or within 2% tolerance */
> +       ser_clk = 0;
> +       exact_match = true;
> +       desired_tol = 2;

Don't need a "desired_tol" variable. Just pass 2 into the function.


> +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +       if (ser_clk) {
> +               if (!exact_match)
> +                       pr_warn("Cannot find exact match clk_rate, using one within 2 percent tolerance\n");

IMO get rid of this printout. Just return what you found if it's not
0. It's perfectly fine. ...that means you can fully get rid of the
"exact_match" variable.


>                 return ser_clk;
>         }
>
> -       *clk_div = ser_clk / desired_clk;
> -       if (!(*clk_div))
> -               *clk_div = 1;
> +       /* try within 5% tolerance now, no need to look for exact match */
> +       exact_match = false;
> +       desired_tol = 5;
> +
> +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
> +       if (ser_clk)
> +               pr_warn("Cannot find exact match clk_rate, using one within 5 percent tolerance\n");
> +       else
> +               pr_err("Cannot find suitable clk_rate, giving up\n");

Just keep the error message but not the warning. ...and ideally use
"dev_err" and print out the clock you were trying to achieve.
Vijaya Krishna Nivarthi June 29, 2022, 10:15 a.m. UTC | #6
Hi,


> > +
> > +               /*
> > +                * A dead-on freq is an insta-win, look for it only in 1st run
> > +                */
> > +               if (*exact_match) {
> > +                       if (!(freq % desired_clk)) {
> > +                               ser_clk = freq;
> > +                               *clk_div = freq / desired_clk;
> > +                               return ser_clk;
> > +                       }
> > +               }
> 
> The "*exact_match" if test isn't needed here. It's not saving you any
> significant amount of time. You're still doing an "if" test, right?
> ...so you're basically saving a mod operation by adding a pointer dereference
> and complexity? I don't think that's the right tradeoff.
> 
> 

Removed exact_match check from here.


> >
> > -       if (!ser_clk) {
> > -               pr_err("%s: Can't find matching DFS entry for baud %d\n",
> > -                                                               __func__, baud);
> > +       /* try to find exact clock rate or within 2% tolerance */
> > +       ser_clk = 0;
> > +       exact_match = true;
> > +       desired_tol = 2;
> 
> Don't need a "desired_tol" variable. Just pass 2 into the function.
> 
>

Done

 
> > +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +       if (ser_clk) {
> > +               if (!exact_match)
> > +                       pr_warn("Cannot find exact match clk_rate,
> > + using one within 2 percent tolerance\n");
> 
> IMO get rid of this printout. Just return what you found if it's not 0. It's
> perfectly fine. ...that means you can fully get rid of the "exact_match"
> variable.
> 
>

Done.
But retained exact_match as bool instead of pointer to help early out in 2nd call.
 
> >                 return ser_clk;
> >         }
> >
> > -       *clk_div = ser_clk / desired_clk;
> > -       if (!(*clk_div))
> > -               *clk_div = 1;
> > +       /* try within 5% tolerance now, no need to look for exact match */
> > +       exact_match = false;
> > +       desired_tol = 5;
> > +
> > +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +       if (ser_clk)
> > +               pr_warn("Cannot find exact match clk_rate, using one within 5
> percent tolerance\n");
> > +       else
> > +               pr_err("Cannot find suitable clk_rate, giving up\n");
> 
> Just keep the error message but not the warning. ...and ideally use "dev_err"
> and print out the clock you were trying to achieve.

Done. Retained pr_err since dev wasn’t readily available here.

Thank you.
Vijaya Krishna Nivarthi June 29, 2022, 10:25 a.m. UTC | #7
Hi,


> -----Original Message-----
> From: Stephen Boyd <swboyd@chromium.org>
> Sent: Thursday, June 23, 2022 4:36 AM
> To: Vijaya Krishna Nivarthi (Temp) (QUIC) <quic_vnivarth@quicinc.com>;
> agross@kernel.org; bjorn.andersson@linaro.org;
> gregkh@linuxfoundation.org; jirislaby@kernel.org; linux-arm-
> msm@vger.kernel.org; linux-kernel@vger.kernel.org; linux-
> serial@vger.kernel.org
> Cc: Mukesh Savaliya (QUIC) <quic_msavaliy@quicinc.com>;
> dianders@chromium.org; mka@chromium.org
> Subject: Re: [PATCH] tty: serial: qcom-geni-serial: Fix get_clk_div_rate()
> which otherwise could return a sub-optimal clock rate.
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> Quoting Vijaya Krishna Nivarthi (2022-06-21 10:57:19)
> > In the logic around call to clk_round_rate, for some corner
> > conditions,
> 
> clk_round_rate(), not the parethesis to indicate it's a function.

Done.

> 
> > get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
> > exact clock rate was not found lowest clock was being returned.
> >
> > Search for suitable clock rate in 2 steps
> > a) exact match or within 2% tolerance
> > b) within 5% tolerance
> > This also takes care of corner conditions.
> >
> > Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart
> > frequency table. Instead, find suitable frequency with call to
> > clk_round_rate")
> > Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
> > ---
> >  drivers/tty/serial/qcom_geni_serial.c | 134
> > ++++++++++++++++++++++++++--------
> >  1 file changed, 102 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/tty/serial/qcom_geni_serial.c
> > b/drivers/tty/serial/qcom_geni_serial.c
> > index 2e23b65..8d247c1 100644
> > --- a/drivers/tty/serial/qcom_geni_serial.c
> > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > @@ -943,52 +943,123 @@ static int qcom_geni_serial_startup(struct
> uart_port *uport)
> >         return 0;
> >  }
> >
> > -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> > -                       unsigned int sampling_rate, unsigned int *clk_div)
> > +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int
> desired_clk,
> > +                       unsigned int *clk_div, unsigned int
> > +percent_tol, bool *exact_match)
> 
> Do we really need to pass in a bool pointer here for 'exact_match'?
> Can't we calculate the exact match value in the callsite and simply pass a bool
> (not pointer) to constrain the logic in this function?
>

Passing exact_match as pointer.

 
> >  {
> > +       unsigned long freq;
> > +       unsigned long div, maxdiv, new_div;
> > +       unsigned long long mult;
> 
> I think u64 is used more often than unsigned long long.

Done.

> 
> >         unsigned long ser_clk;
> > -       unsigned long desired_clk;
> > -       unsigned long freq, prev;
> > -       unsigned long div, maxdiv;
> > -       int64_t mult;
> > -
> > -       desired_clk = baud * sampling_rate;
> > -       if (!desired_clk) {
> > -               pr_err("%s: Invalid frequency\n", __func__);
> > -               return 0;
> > -       }
> > +       unsigned long test_freq, offset, new_freq;
> >
> > +       ser_clk = 0;
> >         maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
> > -       prev = 0;
> > +       div = 1;
> >
> > -       for (div = 1; div <= maxdiv; div++) {
> > -               mult = div * desired_clk;
> > -               if (mult > ULONG_MAX)
> > +       while (div <= maxdiv) {
> > +               mult = (unsigned long long)div * desired_clk;
> 
> Cast to u64?

Done.


> 
> > +               if (mult != (unsigned long)mult)
> 
> What is this checking for? Do we expect the rate to be larger than 32-bits on
> 32-bit machines?
>

Since we are multiplying rate with divider this is safety check?

 
> >                         break;
> >
> > -               freq = clk_round_rate(clk, (unsigned long)mult);
> > -               if (!(freq % desired_clk)) {
> > -                       ser_clk = freq;
> > -                       break;
> > +               /*
> > +                * Loop requesting a freq within tolerance and possibly exact freq.
> > +                *
> > +                * We'll keep track of the lowest freq inexact match we found
> > +                * but always try to find a perfect match. NOTE: this algorithm
> > +                * could miss a slightly better freq if there's more than one
> > +                * freq between (freq - offset) and (freq) but (freq) can't be made
> > +                * exactly, but that's OK.
> > +                *
> > +                * This absolutely relies on the fact that the Qualcomm clock
> > +                * driver always rounds up.
> > +                * We make use of exact_match as an I/O param.
> > +                */
> > +
> > +               /* look only for exact match if within tolerance is already found */
> > +               if (ser_clk)
> > +                       offset = 0;
> > +               else
> > +                       offset = (mult * percent_tol) / 100;
> 
> This needs to use div_u64() to be compatible with 32-bit machines.
>

Done. Thank you.

 
> > +
> > +               test_freq = mult - offset;
> > +               freq = clk_round_rate(clk, test_freq);
> > +
> > +               /*
> > +                * A dead-on freq is an insta-win, look for it only in 1st run
> > +                */
> > +               if (*exact_match) {
> > +                       if (!(freq % desired_clk)) {
> > +                               ser_clk = freq;
> > +                               *clk_div = freq / desired_clk;
> > +                               return ser_clk;
> > +                       }
> > +               }
> > +
> > +               if (!ser_clk) {
> > +                       new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
> > +                       new_freq = new_div * desired_clk;
> > +                       offset = (new_freq * percent_tol) / 100;
> > +
> > +                       if (new_freq - offset <= freq && freq <= new_freq + offset) {
> > +                               /* Save the first (lowest freq) within tolerance */
> > +                               ser_clk = freq;
> > +                               *clk_div = new_div;
> > +                               /* no more search for exact match required in 2nd run */
> > +                               if (!(*exact_match))
> > +                                       break;
> > +                       }
> >                 }
> >
> > -               if (!prev)
> > -                       ser_clk = freq;
> > -               else if (prev == freq)
> > +               div = freq / desired_clk + 1;
> > +
> > +               /*
> > +                * Only time clock framework doesn't round up is if
> > +                * we're past the max clock rate. We're done searching
> > +                * if that's the case.
> > +                */
> > +               if (freq < test_freq)
> >                         break;
> > +       }
> > +
> > +       *exact_match = false;
> > +       return ser_clk;
> > +}
> > +
> > +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> > +                       unsigned int sampling_rate, unsigned int
> > +*clk_div) {
> > +       unsigned long ser_clk;
> > +       unsigned long desired_clk;
> > +       unsigned long desired_tol;
> > +       bool exact_match;
> >
> > -               prev = freq;
> > +       desired_clk = baud * sampling_rate;
> > +       if (!desired_clk) {
> > +               pr_err("%s: Invalid frequency\n", __func__);
> > +               return 0;
> >         }
> >
> > -       if (!ser_clk) {
> > -               pr_err("%s: Can't find matching DFS entry for baud %d\n",
> > -                                                               __func__, baud);
> > +       /* try to find exact clock rate or within 2% tolerance */
> > +       ser_clk = 0;
> > +       exact_match = true;
> > +       desired_tol = 2;
> > +
> > +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +       if (ser_clk) {
> > +               if (!exact_match)
> > +                       pr_warn("Cannot find exact match clk_rate,
> > + using one within 2 percent tolerance\n");
> 
> Should this be a pr_warn_once()? Because otherwise users are going to see
> this error potentially quite often if tolerances can't be achieved.
> 

Removed the message and implemented as per Doug's suggestion.


> >                 return ser_clk;
> >         }
> >
> > -       *clk_div = ser_clk / desired_clk;
> > -       if (!(*clk_div))
> > -               *clk_div = 1;
> > +       /* try within 5% tolerance now, no need to look for exact match */
> > +       exact_match = false;
> > +       desired_tol = 5;
> > +
> > +       ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +       if (ser_clk)
> > +               pr_warn("Cannot find exact match clk_rate, using one
> > + within 5 percent tolerance\n");
> 
> This is a debug print?
>

Removed the message and implemented as per Doug's suggestion.


> > +       else
> > +               pr_err("Cannot find suitable clk_rate, giving up\n");
> >
> >         return ser_clk;
> >  }

Thank you.
Vijaya Krishna Nivarthi June 29, 2022, 10:29 a.m. UTC | #8
Hi

> -----Original Message-----
> From: Jiri Slaby <jirislaby@kernel.org>
> Sent: Thursday, June 23, 2022 11:38 AM
> To: Vijaya Krishna Nivarthi (Temp) (QUIC) <quic_vnivarth@quicinc.com>;
> agross@kernel.org; bjorn.andersson@linaro.org;
> gregkh@linuxfoundation.org; linux-arm-msm@vger.kernel.org; linux-
> serial@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: Mukesh Savaliya (QUIC) <quic_msavaliy@quicinc.com>;
> dianders@chromium.org; mka@chromium.org; swboyd@chromium.org
> Subject: Re: [PATCH] tty: serial: qcom-geni-serial: Fix get_clk_div_rate()
> which otherwise could return a sub-optimal clock rate.
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> On 21. 06. 22, 19:57, Vijaya Krishna Nivarthi wrote:
> > In the logic around call to clk_round_rate, for some corner
> > conditions,
> > get_clk_div_rate() could return an sub-optimal clock rate. Also, if an
> > exact clock rate was not found lowest clock was being returned.
> >
> > Search for suitable clock rate in 2 steps
> > a) exact match or within 2% tolerance
> > b) within 5% tolerance
> > This also takes care of corner conditions.
> >
> > Fixes: c2194bc999d4 ("tty: serial: qcom-geni-serial: Remove uart
> > frequency table. Instead, find suitable frequency with call to
> > clk_round_rate")
> 
> Hmm, provided the complexity, was this worth it -- how many typos/bugs
> can be in such complex and twisted functions?
> 
> The original intention was not to touch the driver when new HW arrives.
> Now it looks like you'd be chasing corner cases like these for quite some
> releases.
> 
> So going back in time, reconsidering the whole thing: how often do you
> expect the original rate table would need to be updated?
> 
> NACK
> 
> in any way -- see my comment below -- if you really want to go this path,
> you'd need to split this.
> 

I simplified the patch to address other feedbacks too and I believe its less complex and more readable now.
The function are about 50 LOC and 20 LOC (without comments) and splitting them further is possibly difficult?
Thank you.


> > Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
> > ---
> >   drivers/tty/serial/qcom_geni_serial.c | 134
> ++++++++++++++++++++++++++--------
> >   1 file changed, 102 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/tty/serial/qcom_geni_serial.c
> > b/drivers/tty/serial/qcom_geni_serial.c
> > index 2e23b65..8d247c1 100644
> > --- a/drivers/tty/serial/qcom_geni_serial.c
> > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > @@ -943,52 +943,123 @@ static int qcom_geni_serial_startup(struct
> uart_port *uport)
> >       return 0;
> >   }
> >
> > -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> > -                     unsigned int sampling_rate, unsigned int *clk_div)
> > +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int
> desired_clk,
> > +                     unsigned int *clk_div, unsigned int percent_tol,
> > +bool *exact_match)
> >   {
> > +     unsigned long freq;
> > +     unsigned long div, maxdiv, new_div;
> > +     unsigned long long mult;
> >       unsigned long ser_clk;
> > -     unsigned long desired_clk;
> > -     unsigned long freq, prev;
> > -     unsigned long div, maxdiv;
> > -     int64_t mult;
> > -
> > -     desired_clk = baud * sampling_rate;
> > -     if (!desired_clk) {
> > -             pr_err("%s: Invalid frequency\n", __func__);
> > -             return 0;
> > -     }
> > +     unsigned long test_freq, offset, new_freq;
> >
> > +     ser_clk = 0;
> >       maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
> > -     prev = 0;
> > +     div = 1;
> >
> > -     for (div = 1; div <= maxdiv; div++) {
> > -             mult = div * desired_clk;
> > -             if (mult > ULONG_MAX)
> > +     while (div <= maxdiv) {
> > +             mult = (unsigned long long)div * desired_clk;
> > +             if (mult != (unsigned long)mult)
> >                       break;
> >
> > -             freq = clk_round_rate(clk, (unsigned long)mult);
> > -             if (!(freq % desired_clk)) {
> > -                     ser_clk = freq;
> > -                     break;
> > +             /*
> > +              * Loop requesting a freq within tolerance and possibly exact freq.
> > +              *
> > +              * We'll keep track of the lowest freq inexact match we found
> > +              * but always try to find a perfect match. NOTE: this algorithm
> > +              * could miss a slightly better freq if there's more than one
> > +              * freq between (freq - offset) and (freq) but (freq) can't be made
> > +              * exactly, but that's OK.
> > +              *
> > +              * This absolutely relies on the fact that the Qualcomm clock
> > +              * driver always rounds up.
> > +              * We make use of exact_match as an I/O param.
> > +              */
> > +
> > +             /* look only for exact match if within tolerance is already found */
> > +             if (ser_clk)
> > +                     offset = 0;
> > +             else
> > +                     offset = (mult * percent_tol) / 100;
> > +
> > +             test_freq = mult - offset;
> > +             freq = clk_round_rate(clk, test_freq);
> > +
> > +             /*
> > +              * A dead-on freq is an insta-win, look for it only in 1st run
> > +              */
> > +             if (*exact_match) {
> > +                     if (!(freq % desired_clk)) {
> > +                             ser_clk = freq;
> > +                             *clk_div = freq / desired_clk;
> > +                             return ser_clk;
> > +                     }
> > +             }
> > +
> > +             if (!ser_clk) {
> > +                     new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
> > +                     new_freq = new_div * desired_clk;
> > +                     offset = (new_freq * percent_tol) / 100;
> > +
> > +                     if (new_freq - offset <= freq && freq <= new_freq + offset) {
> > +                             /* Save the first (lowest freq) within tolerance */
> > +                             ser_clk = freq;
> > +                             *clk_div = new_div;
> > +                             /* no more search for exact match required in 2nd run */
> > +                             if (!(*exact_match))
> > +                                     break;
> > +                     }
> >               }
> >
> > -             if (!prev)
> > -                     ser_clk = freq;
> > -             else if (prev == freq)
> > +             div = freq / desired_clk + 1;
> > +
> > +             /*
> > +              * Only time clock framework doesn't round up is if
> > +              * we're past the max clock rate. We're done searching
> > +              * if that's the case.
> > +              */
> > +             if (freq < test_freq)
> >                       break;
> > +     }
> > +
> > +     *exact_match = false;
> > +     return ser_clk;
> > +}
> > +
> > +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
> > +                     unsigned int sampling_rate, unsigned int
> > +*clk_div)
> 
> This cannot be reviewed properly. Care to split this into 2-3 patches?
> Looks at the nesting and the complexity, it also looks like you need more
> helper functions.
> 
> > +{
> > +     unsigned long ser_clk;
> > +     unsigned long desired_clk;
> > +     unsigned long desired_tol;
> > +     bool exact_match;
> >
> > -             prev = freq;
> > +     desired_clk = baud * sampling_rate;
> > +     if (!desired_clk) {
> > +             pr_err("%s: Invalid frequency\n", __func__);
> > +             return 0;
> >       }
> >
> > -     if (!ser_clk) {
> > -             pr_err("%s: Can't find matching DFS entry for baud %d\n",
> > -                                                             __func__, baud);
> > +     /* try to find exact clock rate or within 2% tolerance */
> > +     ser_clk = 0;
> > +     exact_match = true;
> > +     desired_tol = 2;
> > +
> > +     ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +     if (ser_clk) {
> > +             if (!exact_match)
> > +                     pr_warn("Cannot find exact match clk_rate, using
> > + one within 2 percent tolerance\n");
> >               return ser_clk;
> >       }
> >
> > -     *clk_div = ser_clk / desired_clk;
> > -     if (!(*clk_div))
> > -             *clk_div = 1;
> > +     /* try within 5% tolerance now, no need to look for exact match */
> > +     exact_match = false;
> > +     desired_tol = 5;
> > +
> > +     ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol,
> &exact_match);
> > +     if (ser_clk)
> > +             pr_warn("Cannot find exact match clk_rate, using one within 5
> percent tolerance\n");
> > +     else
> > +             pr_err("Cannot find suitable clk_rate, giving up\n");
> >
> >       return ser_clk;
> >   }
> > @@ -1021,8 +1092,7 @@ static void qcom_geni_serial_set_termios(struct
> uart_port *uport,
> >       if (ver >= QUP_SE_VERSION_2_5)
> >               sampling_rate /= 2;
> >
> > -     clk_rate = get_clk_div_rate(port->se.clk, baud,
> > -             sampling_rate, &clk_div);
> > +     clk_rate = get_clk_div_rate(port->se.clk, baud, sampling_rate,
> > + &clk_div);
> >       if (!clk_rate)
> >               goto out_restart_rx;
> >
> 
> thanks,
> --
> js
> suse labs
diff mbox series

Patch

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 2e23b65..8d247c1 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -943,52 +943,123 @@  static int qcom_geni_serial_startup(struct uart_port *uport)
 	return 0;
 }
 
-static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
-			unsigned int sampling_rate, unsigned int *clk_div)
+static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk,
+			unsigned int *clk_div, unsigned int percent_tol, bool *exact_match)
 {
+	unsigned long freq;
+	unsigned long div, maxdiv, new_div;
+	unsigned long long mult;
 	unsigned long ser_clk;
-	unsigned long desired_clk;
-	unsigned long freq, prev;
-	unsigned long div, maxdiv;
-	int64_t mult;
-
-	desired_clk = baud * sampling_rate;
-	if (!desired_clk) {
-		pr_err("%s: Invalid frequency\n", __func__);
-		return 0;
-	}
+	unsigned long test_freq, offset, new_freq;
 
+	ser_clk = 0;
 	maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT;
-	prev = 0;
+	div = 1;
 
-	for (div = 1; div <= maxdiv; div++) {
-		mult = div * desired_clk;
-		if (mult > ULONG_MAX)
+	while (div <= maxdiv) {
+		mult = (unsigned long long)div * desired_clk;
+		if (mult != (unsigned long)mult)
 			break;
 
-		freq = clk_round_rate(clk, (unsigned long)mult);
-		if (!(freq % desired_clk)) {
-			ser_clk = freq;
-			break;
+		/*
+		 * Loop requesting a freq within tolerance and possibly exact freq.
+		 *
+		 * We'll keep track of the lowest freq inexact match we found
+		 * but always try to find a perfect match. NOTE: this algorithm
+		 * could miss a slightly better freq if there's more than one
+		 * freq between (freq - offset) and (freq) but (freq) can't be made
+		 * exactly, but that's OK.
+		 *
+		 * This absolutely relies on the fact that the Qualcomm clock
+		 * driver always rounds up.
+		 * We make use of exact_match as an I/O param.
+		 */
+
+		/* look only for exact match if within tolerance is already found */
+		if (ser_clk)
+			offset = 0;
+		else
+			offset = (mult * percent_tol) / 100;
+
+		test_freq = mult - offset;
+		freq = clk_round_rate(clk, test_freq);
+
+		/*
+		 * A dead-on freq is an insta-win, look for it only in 1st run
+		 */
+		if (*exact_match) {
+			if (!(freq % desired_clk)) {
+				ser_clk = freq;
+				*clk_div = freq / desired_clk;
+				return ser_clk;
+			}
+		}
+
+		if (!ser_clk) {
+			new_div = DIV_ROUND_CLOSEST(freq, desired_clk);
+			new_freq = new_div * desired_clk;
+			offset = (new_freq * percent_tol) / 100;
+
+			if (new_freq - offset <= freq && freq <= new_freq + offset) {
+				/* Save the first (lowest freq) within tolerance */
+				ser_clk = freq;
+				*clk_div = new_div;
+				/* no more search for exact match required in 2nd run */
+				if (!(*exact_match))
+					break;
+			}
 		}
 
-		if (!prev)
-			ser_clk = freq;
-		else if (prev == freq)
+		div = freq / desired_clk + 1;
+
+		/*
+		 * Only time clock framework doesn't round up is if
+		 * we're past the max clock rate. We're done searching
+		 * if that's the case.
+		 */
+		if (freq < test_freq)
 			break;
+	}
+
+	*exact_match = false;
+	return ser_clk;
+}
+
+static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud,
+			unsigned int sampling_rate, unsigned int *clk_div)
+{
+	unsigned long ser_clk;
+	unsigned long desired_clk;
+	unsigned long desired_tol;
+	bool exact_match;
 
-		prev = freq;
+	desired_clk = baud * sampling_rate;
+	if (!desired_clk) {
+		pr_err("%s: Invalid frequency\n", __func__);
+		return 0;
 	}
 
-	if (!ser_clk) {
-		pr_err("%s: Can't find matching DFS entry for baud %d\n",
-								__func__, baud);
+	/* try to find exact clock rate or within 2% tolerance */
+	ser_clk = 0;
+	exact_match = true;
+	desired_tol = 2;
+
+	ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
+	if (ser_clk) {
+		if (!exact_match)
+			pr_warn("Cannot find exact match clk_rate, using one within 2 percent tolerance\n");
 		return ser_clk;
 	}
 
-	*clk_div = ser_clk / desired_clk;
-	if (!(*clk_div))
-		*clk_div = 1;
+	/* try within 5% tolerance now, no need to look for exact match */
+	exact_match = false;
+	desired_tol = 5;
+
+	ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, desired_tol, &exact_match);
+	if (ser_clk)
+		pr_warn("Cannot find exact match clk_rate, using one within 5 percent tolerance\n");
+	else
+		pr_err("Cannot find suitable clk_rate, giving up\n");
 
 	return ser_clk;
 }
@@ -1021,8 +1092,7 @@  static void qcom_geni_serial_set_termios(struct uart_port *uport,
 	if (ver >= QUP_SE_VERSION_2_5)
 		sampling_rate /= 2;
 
-	clk_rate = get_clk_div_rate(port->se.clk, baud,
-		sampling_rate, &clk_div);
+	clk_rate = get_clk_div_rate(port->se.clk, baud, sampling_rate, &clk_div);
 	if (!clk_rate)
 		goto out_restart_rx;