[RFC] timekeeping: Rework frequency adjustments to work better w/ nohz

Message ID	1389067023-13541-1-git-send-email-john.stultz@linaro.org
State	Superseded
Headers	show Return-Path: <patchwork-forward+bncBCJ7RPMX4EMBBGXWVWLAKGQEQ3EKHBQ@linaro.org> MIME-Version: 1.0 Received-SPF: neutral (google.com: 209.85.212.47 is neither permitted nor denied by best guess record for domain of patch+caf_=patchwork-forward=linaro.org@linaro.org) client-ip=209.85.212.47; Received-SPF: neutral (google.com: 209.85.192.181 is neither permitted nor denied by best guess record for domain of john.stultz@linaro.org) client-ip=209.85.192.181; From: John Stultz <john.stultz@linaro.org> To: LKML <linux-kernel@vger.kernel.org> Cc: John Stultz <john.stultz@linaro.org>, Miroslav Lichvar <mlichvar@redhat.com>, Richard Cochran <richardcochran@gmail.com>, Prarit Bhargava <prarit@redhat.com> Subject: [PATCH] [RFC] timekeeping: Rework frequency adjustments to work better w/ nohz Date: Mon, 6 Jan 2014 19:57:03 -0800 Message-Id: <1389067023-13541-1-git-send-email-john.stultz@linaro.org> Precedence: list Mailing-list: list patchwork-forward@linaro.org; contact patchwork-forward+owners@linaro.org

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 87b4f00..15354d4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1048,54 +1048,50 @@ static int __init timekeeping_init_ops(void) device_initcall(timekeeping_init_ops); /* - * If the error is already larger, we look ahead even further - * to compensate for late or lost adjustments. + * Calculate the future error caused by incorrect freq value + * and adjust the timekeeper to correct that. */ -static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, - s64 error, s64 *interval, - s64 *offset) +static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, + s64 interval, + s64 offset) { s64 tick_error, i; - u32 look_ahead, adj; - s32 error2, mult; + u32 adj; + s32 mult = 1; - /* - * Use the current error value to determine how much to look ahead. - * The larger the error the slower we adjust for it to avoid problems - * with losing too many ticks, otherwise we would overadjust and - * produce an even larger error. The smaller the adjustment the - * faster we try to adjust for it, as lost ticks can do less harm - * here. This is tuned so that an error of about 1 msec is adjusted - * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). - */ - error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); - error2 = abs(error2); - for (look_ahead = 0; error2 > 0; look_ahead++) - error2 >>= 2; + /* Calculate current error per tick */ + tick_error = ntp_tick_length() >> tk->ntp_error_shift; + tick_error -= tk->xtime_interval; - /* - * Now calculate the error in (1 << look_ahead) ticks, but first - * remove the single look ahead already included in the error. - */ - tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); - tick_error -= tk->xtime_interval >> 1; - error = ((error - tick_error) >> look_ahead) + tick_error; + /* Don't worry about correcting it if its small */ + if (likely(abs(tick_error) < 2*interval)) + return; - /* Finally calculate the adjustment shift value. */ - i = *interval; - mult = 1; - if (error < 0) { - error = -error; - *interval = -*interval; - *offset = -*offset; - mult = -1; + if (tick_error < 0) { + interval = -interval; + offset = -offset; + mult = -mult; } - for (adj = 0; error > i; adj++) - error >>= 1; - *interval <<= adj; - *offset <<= adj; - return mult << adj; + /* Sort out the magnitude of the correction */ + tick_error = abs(tick_error); + i = abs(interval); + for (adj = 0; tick_error > i; adj++) + tick_error >>= 1; + + /* scale the corrections */ + interval <<= adj; + offset <<= adj; + mult <<= adj; + + /* + * Apply the correction to the timekeeper. + * See long comment in timekeeping_adjust to explain the math. + */ + tk->mult += mult; + tk->xtime_interval += interval; + tk->xtime_nsec -= offset; + } /* @@ -1108,65 +1104,23 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) s64 error, interval = tk->cycle_interval; int adj; - /* - * The point of this is to check if the error is greater than half - * an interval. - * - * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. - * - * Note we subtract one in the shift, so that error is really error*2. - * This "saves" dividing(shifting) interval twice, but keeps the - * (error > interval) comparison as still measuring if error is - * larger than half an interval. - * - * Note: It does not "save" on aggravation when reading the code. - */ - error = tk->ntp_error >> (tk->ntp_error_shift - 1); - if (error > interval) { - /* - * We now divide error by 4(via shift), which checks if - * the error is greater than twice the interval. - * If it is greater, we need a bigadjust, if its smaller, - * we can adjust by 1. - */ - error >>= 2; - /* - * XXX - In update_wall_time, we round up to the next - * nanosecond, and store the amount rounded up into - * the error. This causes the likely below to be unlikely. - * - * The proper fix is to avoid rounding up by using - * the high precision tk->xtime_nsec instead of - * xtime.tv_nsec everywhere. Fixing this will take some - * time. - */ - if (likely(error <= interval)) - adj = 1; - else - adj = timekeeping_bigadjust(tk, error, &interval, &offset); - } else { - if (error < -interval) { - /* See comment above, this is just switched for the negative */ - error >>= 2; - if (likely(error >= -interval)) { - adj = -1; - interval = -interval; - offset = -offset; - } else { - adj = timekeeping_bigadjust(tk, error, &interval, &offset); - } - } else { - goto out_adjust; - } - } + /* First correct for the current frequency error */ + timekeeping_freqadjust(tk, interval, offset); + + + /* Next make a small adjustment to fix any cumulative error */ + error = tk->ntp_error >> tk->ntp_error_shift; + if (likely(abs(error) <= interval/2)) + goto out_adjust; + + if (error < 0) { + adj = -1; + interval = -interval; + offset = -offset; + } else + adj = 1; + - if (unlikely(tk->clock->maxadj && - (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { - printk_once(KERN_WARNING - "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->clock->name, (long)tk->mult + adj, - (long)tk->clock->mult + tk->clock->maxadj); - } /* * So the following can be confusing. * @@ -1213,15 +1167,21 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * xtime_nsec_2 = xtime_nsec_1 - offset * Which simplfies to: * xtime_nsec -= offset - * - * XXX - TODO: Doc ntp_error calculation. */ tk->mult += adj; tk->xtime_interval += interval; tk->xtime_nsec -= offset; - tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; out_adjust: + + if (unlikely(tk->clock->maxadj && + (tk->mult > tk->clock->mult + tk->clock->maxadj))) { + printk_once(KERN_WARNING + "Adjusting %s more than 11%% (%ld vs %ld)\n", + tk->clock->name, (long)tk->mult, + (long)tk->clock->mult + tk->clock->maxadj); + } + /* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource @@ -1241,7 +1201,6 @@ out_adjust: tk->xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } - } /**

[RFC] timekeeping: Rework frequency adjustments to work better w/ nohz

Commit Message

Comments

Patch