From patchwork Fri Apr 15 19:19:50 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 562119 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 14E0FC433FE for ; Fri, 15 Apr 2022 19:19:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1347781AbiDOTWW (ORCPT ); Fri, 15 Apr 2022 15:22:22 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37524 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S240222AbiDOTWV (ORCPT ); Fri, 15 Apr 2022 15:22:21 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [IPv6:2a0a:51c0:0:12e:550::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 19E193E5F2; Fri, 15 Apr 2022 12:19:52 -0700 (PDT) Message-ID: <20220415161206.419880163@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050390; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=7jYVnFbvS1wwhh/xx/6Po12K0RSM7iGrhAKv7OZOS5Y=; b=ibn5CEoItB13fo1TcH5WhfU42+e2Hv5azziNbxi3BJLqgRMgXTUfaxkrIDyi7VxobycJ95 yhY+NjHwSBhn0nbKSN7UqLsM2UPHKcN12SZEGbPGrPBaZkflkaBx1jEHxOuTcE9EcZg6k5 9yEFPSr+4hZns3aIghYzqDXDK3j2jCDfnHWxVa8Hn9BnArKxdX4agbF/RtDtkrSkxskjU0 U7O6KqclwS/PJg6lEIDEB5iyKgzAb+gEEVXgoiSMBNnBApYvKW8wZ1vMNRFTcyp83Wxvt0 DqX6lrrCnMT1DPW6+SpHjfGBJonoWci049BZHKLYL6XS082qhKrLuUNphH2wUg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050390; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=7jYVnFbvS1wwhh/xx/6Po12K0RSM7iGrhAKv7OZOS5Y=; b=J+OlnM5CPKWmBhZNPQx401r0dC9MlMfnI44nxQy0clF2y+zEZeX+iWc0sDj82hsDNw4g1u Gl8cYQfpED9qenBQ== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 01/10] x86/aperfmperf: Dont wake idle CPUs in arch_freq_get_on_cpu() References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:50 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org aperfmperf_get_khz() already excludes idle CPUs from APERF/MPERF sampling and that's a reasonable decision. There is no point in sending up to two IPIs to an idle CPU just because someone reads a sysfs file. Signed-off-by: Thomas Gleixner Acked-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/aperfmperf.c | 3 +++ 1 file changed, 3 insertions(+) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -139,6 +139,9 @@ unsigned int arch_freq_get_on_cpu(int cp if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) return 0; + if (rcu_is_idle_cpu(cpu)) + return 0; + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) return per_cpu(samples.khz, cpu); From patchwork Fri Apr 15 19:19:51 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 562118 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 450D4C433FE for ; Fri, 15 Apr 2022 19:20:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1348966AbiDOTW1 (ORCPT ); Fri, 15 Apr 2022 15:22:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37548 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348806AbiDOTWY (ORCPT ); Fri, 15 Apr 2022 15:22:24 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EC0C53EA8B; Fri, 15 Apr 2022 12:19:53 -0700 (PDT) Message-ID: <20220415161206.478362457@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050392; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=GSSdo5ztLjb2tpVUVQ6uXiZn/hHCCOaaME2098EotYY=; b=W8uqFEG7ZeiOLNrZx4ReJo9mPL2dLlP/r7OOSMzaIeQHDOPh2vx89+Bk9cO8Yn0UIhMjQe /NNncvqky1q8HaYYLIaluonWYQ+ZmqKT+BsFq98p0RBuCKcRIGPWvsUSqCqKDWcI+aNpjR 4HSgzDIWmF14EV6DMOoybOKAyQqt+V52nOA74g1WN52qwBqjSc/jjucDNFsD5avnbfzB2n GTBo/15UKM1sY+W4AJGp0ihBzw745oldg3OFufT3JWa+OfuS1azOCaX2C7uQst7cNq2QxE Tfndo4cKOl5lfJdF6BF1eft2W4ecR33XBvD7Z/jXy1zkV6PXhIIxUJtK3pd6WA== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050392; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=GSSdo5ztLjb2tpVUVQ6uXiZn/hHCCOaaME2098EotYY=; b=PabvyLdNHOaFSvNOG9jSOKBPLzrcQZORWX5zc8xXJKXlTLmJRjuumuEmD8T5ptABP/Sr7r RooY0hCAi73SyJBQ== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 02/10] x86/smp: Move APERF/MPERF code where it belongs References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:51 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org as this can share code with the preexisting APERF/MPERF code. No functional change. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 366 ++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/smpboot.c | 355 ------------------------------------- 2 files changed, 362 insertions(+), 359 deletions(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -6,15 +6,19 @@ * Copyright (C) 2017 Intel Corp. * Author: Len Brown */ - +#include #include #include #include #include -#include -#include -#include #include +#include +#include +#include +#include + +#include +#include #include "cpu.h" @@ -152,3 +156,357 @@ unsigned int arch_freq_get_on_cpu(int cp return per_cpu(samples.khz, cpu); } + +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) +/* + * APERF/MPERF frequency ratio computation. + * + * The scheduler wants to do frequency invariant accounting and needs a <1 + * ratio to account for the 'current' frequency, corresponding to + * freq_curr / freq_max. + * + * Since the frequency freq_curr on x86 is controlled by micro-controller and + * our P-state setting is little more than a request/hint, we need to observe + * the effective frequency 'BusyMHz', i.e. the average frequency over a time + * interval after discarding idle time. This is given by: + * + * BusyMHz = delta_APERF / delta_MPERF * freq_base + * + * where freq_base is the max non-turbo P-state. + * + * The freq_max term has to be set to a somewhat arbitrary value, because we + * can't know which turbo states will be available at a given point in time: + * it all depends on the thermal headroom of the entire package. We set it to + * the turbo level with 4 cores active. + * + * Benchmarks show that's a good compromise between the 1C turbo ratio + * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, + * which would ignore the entire turbo range (a conspicuous part, making + * freq_curr/freq_max always maxed out). + * + * An exception to the heuristic above is the Atom uarch, where we choose the + * highest turbo level for freq_max since Atom's are generally oriented towards + * power efficiency. + * + * Setting freq_max to anything less than the 1C turbo ratio makes the ratio + * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. + */ + +DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); + +static DEFINE_PER_CPU(u64, arch_prev_aperf); +static DEFINE_PER_CPU(u64, arch_prev_mperf); +static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; +static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; + +void arch_set_max_freq_ratio(bool turbo_disabled) +{ + arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : + arch_turbo_freq_ratio; +} +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); + +static bool turbo_disabled(void) +{ + u64 misc_en; + int err; + + err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); + if (err) + return false; + + return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); +} + +static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +{ + int err; + + err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); + if (err) + return false; + + err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ + *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ + + return true; +} + +#define X86_MATCH(model) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) + +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { + X86_MATCH(XEON_PHI_KNL), + X86_MATCH(XEON_PHI_KNM), + {} +}; + +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { + X86_MATCH(SKYLAKE_X), + {} +}; + +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { + X86_MATCH(ATOM_GOLDMONT), + X86_MATCH(ATOM_GOLDMONT_D), + X86_MATCH(ATOM_GOLDMONT_PLUS), + {} +}; + +static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, + int num_delta_fratio) +{ + int fratio, delta_fratio, found; + int err, i; + u64 msr; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); + if (err) + return false; + + fratio = (msr >> 8) & 0xFF; + i = 16; + found = 0; + do { + if (found >= num_delta_fratio) { + *turbo_freq = fratio; + return true; + } + + delta_fratio = (msr >> (i + 5)) & 0x7; + + if (delta_fratio) { + found += 1; + fratio -= delta_fratio; + } + + i += 8; + } while (i < 64); + + return true; +} + +static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) +{ + u64 ratios, counts; + u32 group_size; + int err, i; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); + if (err) + return false; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); + if (err) + return false; + + for (i = 0; i < 64; i += 8) { + group_size = (counts >> i) & 0xFF; + if (group_size >= size) { + *turbo_freq = (ratios >> i) & 0xFF; + return true; + } + } + + return false; +} + +static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +{ + u64 msr; + int err; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ + + /* The CPU may have less than 4 cores */ + if (!*turbo_freq) + *turbo_freq = msr & 0xFF; /* 1C turbo */ + + return true; +} + +static bool intel_set_max_freq_ratio(void) +{ + u64 base_freq, turbo_freq; + u64 turbo_ratio; + + if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) + goto out; + + if (x86_match_cpu(has_glm_turbo_ratio_limits) && + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) + goto out; + + if (x86_match_cpu(has_knl_turbo_ratio_limits) && + knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) + goto out; + + if (x86_match_cpu(has_skx_turbo_ratio_limits) && + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) + goto out; + + if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) + goto out; + + return false; + +out: + /* + * Some hypervisors advertise X86_FEATURE_APERFMPERF + * but then fill all MSR's with zeroes. + * Some CPUs have turbo boost but don't declare any turbo ratio + * in MSR_TURBO_RATIO_LIMIT. + */ + if (!base_freq || !turbo_freq) { + pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); + return false; + } + + turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); + if (!turbo_ratio) { + pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); + return false; + } + + arch_turbo_freq_ratio = turbo_ratio; + arch_set_max_freq_ratio(turbo_disabled()); + + return true; +} + +static void init_counter_refs(void) +{ + u64 aperf, mperf; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + this_cpu_write(arch_prev_aperf, aperf); + this_cpu_write(arch_prev_mperf, mperf); +} + +#ifdef CONFIG_PM_SLEEP +static struct syscore_ops freq_invariance_syscore_ops = { + .resume = init_counter_refs, +}; + +static void register_freq_invariance_syscore_ops(void) +{ + /* Bail out if registered already. */ + if (freq_invariance_syscore_ops.node.prev) + return; + + register_syscore_ops(&freq_invariance_syscore_ops); +} +#else +static inline void register_freq_invariance_syscore_ops(void) {} +#endif + +void init_freq_invariance(bool secondary, bool cppc_ready) +{ + bool ret = false; + + if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) + return; + + if (secondary) { + if (static_branch_likely(&arch_scale_freq_key)) { + init_counter_refs(); + } + return; + } + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + ret = intel_set_max_freq_ratio(); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (!cppc_ready) { + return; + } + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); + } + + if (ret) { + init_counter_refs(); + static_branch_enable(&arch_scale_freq_key); + register_freq_invariance_syscore_ops(); + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); + } else { + pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); + } +} + +static void disable_freq_invariance_workfn(struct work_struct *work) +{ + static_branch_disable(&arch_scale_freq_key); +} + +static DECLARE_WORK(disable_freq_invariance_work, + disable_freq_invariance_workfn); + +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; + +void arch_scale_freq_tick(void) +{ + u64 freq_scale; + u64 aperf, mperf; + u64 acnt, mcnt; + + if (!arch_scale_freq_invariant()) + return; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + acnt = aperf - this_cpu_read(arch_prev_aperf); + mcnt = mperf - this_cpu_read(arch_prev_mperf); + + this_cpu_write(arch_prev_aperf, aperf); + this_cpu_write(arch_prev_mperf, mperf); + + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) + goto error; + + if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) + goto error; + + freq_scale = div64_u64(acnt, mcnt); + if (!freq_scale) + goto error; + + if (freq_scale > SCHED_CAPACITY_SCALE) + freq_scale = SCHED_CAPACITY_SCALE; + + this_cpu_write(arch_freq_scale, freq_scale); + return; + +error: + pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); + schedule_work(&disable_freq_invariance_work); +} +#endif /* CONFIG_X86_64 && CONFIG_SMP */ --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include @@ -1847,357 +1846,3 @@ void native_play_dead(void) } #endif - -#ifdef CONFIG_X86_64 -/* - * APERF/MPERF frequency ratio computation. - * - * The scheduler wants to do frequency invariant accounting and needs a <1 - * ratio to account for the 'current' frequency, corresponding to - * freq_curr / freq_max. - * - * Since the frequency freq_curr on x86 is controlled by micro-controller and - * our P-state setting is little more than a request/hint, we need to observe - * the effective frequency 'BusyMHz', i.e. the average frequency over a time - * interval after discarding idle time. This is given by: - * - * BusyMHz = delta_APERF / delta_MPERF * freq_base - * - * where freq_base is the max non-turbo P-state. - * - * The freq_max term has to be set to a somewhat arbitrary value, because we - * can't know which turbo states will be available at a given point in time: - * it all depends on the thermal headroom of the entire package. We set it to - * the turbo level with 4 cores active. - * - * Benchmarks show that's a good compromise between the 1C turbo ratio - * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, - * which would ignore the entire turbo range (a conspicuous part, making - * freq_curr/freq_max always maxed out). - * - * An exception to the heuristic above is the Atom uarch, where we choose the - * highest turbo level for freq_max since Atom's are generally oriented towards - * power efficiency. - * - * Setting freq_max to anything less than the 1C turbo ratio makes the ratio - * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. - */ - -DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); - -static DEFINE_PER_CPU(u64, arch_prev_aperf); -static DEFINE_PER_CPU(u64, arch_prev_mperf); -static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; -static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; - -void arch_set_max_freq_ratio(bool turbo_disabled) -{ - arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : - arch_turbo_freq_ratio; -} -EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); - -static bool turbo_disabled(void) -{ - u64 misc_en; - int err; - - err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); - if (err) - return false; - - return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); -} - -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) -{ - int err; - - err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); - if (err) - return false; - - err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ - *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ - - return true; -} - -#define X86_MATCH(model) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ - INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) - -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { - X86_MATCH(XEON_PHI_KNL), - X86_MATCH(XEON_PHI_KNM), - {} -}; - -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { - X86_MATCH(SKYLAKE_X), - {} -}; - -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { - X86_MATCH(ATOM_GOLDMONT), - X86_MATCH(ATOM_GOLDMONT_D), - X86_MATCH(ATOM_GOLDMONT_PLUS), - {} -}; - -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, - int num_delta_fratio) -{ - int fratio, delta_fratio, found; - int err, i; - u64 msr; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); - if (err) - return false; - - fratio = (msr >> 8) & 0xFF; - i = 16; - found = 0; - do { - if (found >= num_delta_fratio) { - *turbo_freq = fratio; - return true; - } - - delta_fratio = (msr >> (i + 5)) & 0x7; - - if (delta_fratio) { - found += 1; - fratio -= delta_fratio; - } - - i += 8; - } while (i < 64); - - return true; -} - -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) -{ - u64 ratios, counts; - u32 group_size; - int err, i; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); - if (err) - return false; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); - if (err) - return false; - - for (i = 0; i < 64; i += 8) { - group_size = (counts >> i) & 0xFF; - if (group_size >= size) { - *turbo_freq = (ratios >> i) & 0xFF; - return true; - } - } - - return false; -} - -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) -{ - u64 msr; - int err; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ - - /* The CPU may have less than 4 cores */ - if (!*turbo_freq) - *turbo_freq = msr & 0xFF; /* 1C turbo */ - - return true; -} - -static bool intel_set_max_freq_ratio(void) -{ - u64 base_freq, turbo_freq; - u64 turbo_ratio; - - if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) - goto out; - - if (x86_match_cpu(has_glm_turbo_ratio_limits) && - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) - goto out; - - if (x86_match_cpu(has_knl_turbo_ratio_limits) && - knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) - goto out; - - if (x86_match_cpu(has_skx_turbo_ratio_limits) && - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) - goto out; - - if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) - goto out; - - return false; - -out: - /* - * Some hypervisors advertise X86_FEATURE_APERFMPERF - * but then fill all MSR's with zeroes. - * Some CPUs have turbo boost but don't declare any turbo ratio - * in MSR_TURBO_RATIO_LIMIT. - */ - if (!base_freq || !turbo_freq) { - pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); - return false; - } - - turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); - if (!turbo_ratio) { - pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); - return false; - } - - arch_turbo_freq_ratio = turbo_ratio; - arch_set_max_freq_ratio(turbo_disabled()); - - return true; -} - -static void init_counter_refs(void) -{ - u64 aperf, mperf; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); -} - -#ifdef CONFIG_PM_SLEEP -static struct syscore_ops freq_invariance_syscore_ops = { - .resume = init_counter_refs, -}; - -static void register_freq_invariance_syscore_ops(void) -{ - /* Bail out if registered already. */ - if (freq_invariance_syscore_ops.node.prev) - return; - - register_syscore_ops(&freq_invariance_syscore_ops); -} -#else -static inline void register_freq_invariance_syscore_ops(void) {} -#endif - -void init_freq_invariance(bool secondary, bool cppc_ready) -{ - bool ret = false; - - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) - return; - - if (secondary) { - if (static_branch_likely(&arch_scale_freq_key)) { - init_counter_refs(); - } - return; - } - - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = intel_set_max_freq_ratio(); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (!cppc_ready) { - return; - } - ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); - } - - if (ret) { - init_counter_refs(); - static_branch_enable(&arch_scale_freq_key); - register_freq_invariance_syscore_ops(); - pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); - } else { - pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); - } -} - -static void disable_freq_invariance_workfn(struct work_struct *work) -{ - static_branch_disable(&arch_scale_freq_key); -} - -static DECLARE_WORK(disable_freq_invariance_work, - disable_freq_invariance_workfn); - -DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; - -void arch_scale_freq_tick(void) -{ - u64 freq_scale; - u64 aperf, mperf; - u64 acnt, mcnt; - - if (!arch_scale_freq_invariant()) - return; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - acnt = aperf - this_cpu_read(arch_prev_aperf); - mcnt = mperf - this_cpu_read(arch_prev_mperf); - - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); - - if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) - goto error; - - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) - goto error; - - freq_scale = div64_u64(acnt, mcnt); - if (!freq_scale) - goto error; - - if (freq_scale > SCHED_CAPACITY_SCALE) - freq_scale = SCHED_CAPACITY_SCALE; - - this_cpu_write(arch_freq_scale, freq_scale); - return; - -error: - pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); - schedule_work(&disable_freq_invariance_work); -} -#endif /* CONFIG_X86_64 */ From patchwork Fri Apr 15 19:19:53 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 563816 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 42F78C433EF for ; Fri, 15 Apr 2022 19:20:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1348825AbiDOTW0 (ORCPT ); Fri, 15 Apr 2022 15:22:26 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37550 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348799AbiDOTWY (ORCPT ); Fri, 15 Apr 2022 15:22:24 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ED6A648E77; Fri, 15 Apr 2022 12:19:54 -0700 (PDT) Message-ID: <20220415161206.536733494@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050393; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=ykE17w4lSXk/0eDFAm6MD4uH2sdqxkH5OoCX5fvgous=; b=PLfiZ9jXMCez8OUGdHF55LRBLjKU/RPxmbl89CDp+DYiQBqNWd1NRtosQ9MbmK3my7nfER tyEB2Cqkf86JeCDB1T1E4kOCl3lmIlj6G8h/UYfe9shl7zKMpG54K2L89Om2HrGPyN+oDk NXVL7BP6ifQYhFKdEX3wdVydy5HqJK+m7iagoVnbn4rlzuNl8fsfoEI4F4YLWYcMiGw3kW 5sCgrdfP1pntlNinBzrBe6QSVmwq4PLuMnWnt2O6T8t7vOn+yUOcFxB5EHIM0qj7KRK9p4 nrOdDyea1njMnwAhe3yRifeGtMKtliuZdd+o9HOimn8/jGZIJb2Y8Y4V1Sh+zw== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050393; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=ykE17w4lSXk/0eDFAm6MD4uH2sdqxkH5OoCX5fvgous=; b=k+U1hmyNo7XL4KEqo1PlhF/QxCF51ampKFcw/N7IGHqhgtpSOpZPxe0zQ3Bi9aluQgsiqX nZjcMIjd/Hc69uDg== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 03/10] x86/aperfmperf: Separate AP/BP frequency invariance init References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:53 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org This code is convoluted and because it can be invoked post init via the ACPI/CPPC code, all of the initialization functionality is built in instead of being part of init text and init data. As a first step create separate calls for the boot and the application processors. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/topology.h | 12 +++++------- arch/x86/kernel/acpi/cppc.c | 3 ++- arch/x86/kernel/cpu/aperfmperf.c | 23 +++++++++++------------ arch/x86/kernel/smpboot.c | 4 ++-- 4 files changed, 20 insertions(+), 22 deletions(-) --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -216,14 +216,12 @@ extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick extern void arch_set_max_freq_ratio(bool turbo_disabled); -void init_freq_invariance(bool secondary, bool cppc_ready); +extern void bp_init_freq_invariance(bool cppc_ready); +extern void ap_init_freq_invariance(void); #else -static inline void arch_set_max_freq_ratio(bool turbo_disabled) -{ -} -static inline void init_freq_invariance(bool secondary, bool cppc_ready) -{ -} +static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } +static inline void bp_init_freq_invariance(bool cppc_ready) { } +static inline void ap_init_freq_invariance(void) { } #endif #ifdef CONFIG_ACPI_CPPC_LIB --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -96,7 +96,8 @@ void init_freq_invariance_cppc(void) mutex_lock(&freq_invariance_lock); - init_freq_invariance(secondary, true); + if (!secondary) + bp_init_freq_invariance(true); secondary = true; mutex_unlock(&freq_invariance_lock); --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -428,31 +428,24 @@ static void register_freq_invariance_sys static inline void register_freq_invariance_syscore_ops(void) {} #endif -void init_freq_invariance(bool secondary, bool cppc_ready) +void bp_init_freq_invariance(bool cppc_ready) { - bool ret = false; + bool ret; - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return; - if (secondary) { - if (static_branch_likely(&arch_scale_freq_key)) { - init_counter_refs(); - } - return; - } + init_counter_refs(); if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ret = intel_set_max_freq_ratio(); else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (!cppc_ready) { + if (!cppc_ready) return; - } ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); } if (ret) { - init_counter_refs(); static_branch_enable(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); @@ -461,6 +454,12 @@ void init_freq_invariance(bool secondary } } +void ap_init_freq_invariance(void) +{ + if (cpu_feature_enabled(X86_FEATURE_APERFMPERF)) + init_counter_refs(); +} + static void disable_freq_invariance_workfn(struct work_struct *work) { static_branch_disable(&arch_scale_freq_key); --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -186,7 +186,7 @@ static void smp_callin(void) */ set_cpu_sibling_map(raw_smp_processor_id()); - init_freq_invariance(true, false); + ap_init_freq_invariance(); /* * Get our bogomips. @@ -1396,7 +1396,7 @@ void __init native_smp_prepare_cpus(unsi { smp_prepare_cpus_common(); - init_freq_invariance(false, false); + bp_init_freq_invariance(false); smp_sanity_check(); switch (apic_intr_mode) { From patchwork Fri Apr 15 19:19:54 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 563815 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0B6FEC433EF for ; Fri, 15 Apr 2022 19:20:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349073AbiDOTW2 (ORCPT ); Fri, 15 Apr 2022 15:22:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37580 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348785AbiDOTW0 (ORCPT ); Fri, 15 Apr 2022 15:22:26 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B29E63467D; Fri, 15 Apr 2022 12:19:56 -0700 (PDT) Message-ID: <20220415161206.592465719@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050395; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=Vbg+gQ8Rxj+zh2dlsK2MvI2YGFj6irjpOgNZhjAS9Pk=; b=3mdSgZHopcXMcpNGI/NA35kkALwjF6jDUs3UKfpeT28exkDTndSlutzy0vO+pJem7DD/SN CO5jWLT2/u2BW0hq/d5Xd2tYRAlpNe5ca8E1Klar2QRVHLl/Z8CDh1JMPwZHXkrkTozcJ9 nLuu/D5xU7eBX0o4iyv35D5RDOuK6F7TdFQ7gCRoAuX2shLc9EH+vj1SkjNwrAQBUT3XOl cG6PP/VPvKtGHqoRRfaL7wodtnx+hXeAQRE9AZ1MZ62/UmPTyEKB74+KJ0FCR+MgW59Tt1 t+tqDoI8IvAapl2ufvs/qsQiypV6o/SftpsFzrtBwmPuNJc9E3Rld90L2pCLEw== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050395; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=Vbg+gQ8Rxj+zh2dlsK2MvI2YGFj6irjpOgNZhjAS9Pk=; b=p7jYvFKMWytjpvilv77Uf1pDvLUcr3aa1hwkMa1myMH0veXK0lYXC9hgUh9C71VOTiDMU1 cjUWWTcFqRdgIFBg== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 04/10] x86/aperfmperf: Untangle Intel and AMD frequency invariance init References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:54 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org AMD boot CPU initialization happens late via ACPI/CPPC which prevents the Intel parts from being marked __init. Split out the common code and provide a dedicated interface for the AMD initialization and mark the Intel specific code and data __init. The remaining text size is almost cut in half: text: 2614 -> 1350 init.text: 0 -> 786 Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/topology.h | 13 ++------ arch/x86/kernel/acpi/cppc.c | 29 +++++++----------- arch/x86/kernel/cpu/aperfmperf.c | 62 ++++++++++++++++++++------------------- arch/x86/kernel/smpboot.c | 2 - 4 files changed, 49 insertions(+), 57 deletions(-) --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -216,24 +216,19 @@ extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick extern void arch_set_max_freq_ratio(bool turbo_disabled); -extern void bp_init_freq_invariance(bool cppc_ready); +extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); +extern void bp_init_freq_invariance(void); extern void ap_init_freq_invariance(void); #else static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } -static inline void bp_init_freq_invariance(bool cppc_ready) { } +static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } +static inline void bp_init_freq_invariance(void) { } static inline void ap_init_freq_invariance(void) { } #endif #ifdef CONFIG_ACPI_CPPC_LIB void init_freq_invariance_cppc(void); #define arch_init_invariance_cppc init_freq_invariance_cppc - -bool amd_set_max_freq_ratio(u64 *ratio); -#else -static inline bool amd_set_max_freq_ratio(u64 *ratio) -{ - return false; -} #endif #endif /* _ASM_X86_TOPOLOGY_H */ --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -50,20 +50,17 @@ int cpc_write_ffh(int cpunum, struct cpc return err; } -bool amd_set_max_freq_ratio(u64 *ratio) +static void amd_set_max_freq_ratio(void) { struct cppc_perf_caps perf_caps; u64 highest_perf, nominal_perf; u64 perf_ratio; int rc; - if (!ratio) - return false; - rc = cppc_get_perf_caps(0, &perf_caps); if (rc) { pr_debug("Could not retrieve perf counters (%d)\n", rc); - return false; + return; } highest_perf = amd_get_highest_perf(); @@ -71,7 +68,7 @@ bool amd_set_max_freq_ratio(u64 *ratio) if (!highest_perf || !nominal_perf) { pr_debug("Could not retrieve highest or nominal performance\n"); - return false; + return; } perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); @@ -79,26 +76,24 @@ bool amd_set_max_freq_ratio(u64 *ratio) perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; if (!perf_ratio) { pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); - return false; + return; } - *ratio = perf_ratio; - arch_set_max_freq_ratio(false); - - return true; + freq_invariance_set_perf_ratio(perf_ratio, false); } static DEFINE_MUTEX(freq_invariance_lock); void init_freq_invariance_cppc(void) { - static bool secondary; + static bool init_done; - mutex_lock(&freq_invariance_lock); - - if (!secondary) - bp_init_freq_invariance(true); - secondary = true; + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) + return; + mutex_lock(&freq_invariance_lock); + if (!init_done) + amd_set_max_freq_ratio(); + init_done = true; mutex_unlock(&freq_invariance_lock); } --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -206,7 +206,7 @@ void arch_set_max_freq_ratio(bool turbo_ } EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); -static bool turbo_disabled(void) +static bool __init turbo_disabled(void) { u64 misc_en; int err; @@ -218,7 +218,7 @@ static bool turbo_disabled(void) return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { int err; @@ -240,26 +240,26 @@ static bool slv_set_max_freq_ratio(u64 * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = { X86_MATCH(XEON_PHI_KNL), X86_MATCH(XEON_PHI_KNM), {} }; -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = { X86_MATCH(SKYLAKE_X), {} }; -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = { X86_MATCH(ATOM_GOLDMONT), X86_MATCH(ATOM_GOLDMONT_D), X86_MATCH(ATOM_GOLDMONT_PLUS), {} }; -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, - int num_delta_fratio) +static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, + int num_delta_fratio) { int fratio, delta_fratio, found; int err, i; @@ -297,7 +297,7 @@ static bool knl_set_max_freq_ratio(u64 * return true; } -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) +static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) { u64 ratios, counts; u32 group_size; @@ -328,7 +328,7 @@ static bool skx_set_max_freq_ratio(u64 * return false; } -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { u64 msr; int err; @@ -351,7 +351,7 @@ static bool core_set_max_freq_ratio(u64 return true; } -static bool intel_set_max_freq_ratio(void) +static bool __init intel_set_max_freq_ratio(void) { u64 base_freq, turbo_freq; u64 turbo_ratio; @@ -418,40 +418,42 @@ static struct syscore_ops freq_invarianc static void register_freq_invariance_syscore_ops(void) { - /* Bail out if registered already. */ - if (freq_invariance_syscore_ops.node.prev) - return; - register_syscore_ops(&freq_invariance_syscore_ops); } #else static inline void register_freq_invariance_syscore_ops(void) {} #endif -void bp_init_freq_invariance(bool cppc_ready) +static void freq_invariance_enable(void) +{ + if (static_branch_unlikely(&arch_scale_freq_key)) { + WARN_ON_ONCE(1); + return; + } + static_branch_enable(&arch_scale_freq_key); + register_freq_invariance_syscore_ops(); + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); +} + +void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { - bool ret; + arch_turbo_freq_ratio = ratio; + arch_set_max_freq_ratio(turbo_disabled); + freq_invariance_enable(); +} +void __init bp_init_freq_invariance(void) +{ if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return; init_counter_refs(); - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = intel_set_max_freq_ratio(); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (!cppc_ready) - return; - ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); - } + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; - if (ret) { - static_branch_enable(&arch_scale_freq_key); - register_freq_invariance_syscore_ops(); - pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); - } else { - pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); - } + if (intel_set_max_freq_ratio()) + freq_invariance_enable(); } void ap_init_freq_invariance(void) --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1396,7 +1396,7 @@ void __init native_smp_prepare_cpus(unsi { smp_prepare_cpus_common(); - bp_init_freq_invariance(false); + bp_init_freq_invariance(); smp_sanity_check(); switch (apic_intr_mode) { From patchwork Fri Apr 15 19:19:56 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 562116 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7BA8FC433F5 for ; Fri, 15 Apr 2022 19:20:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349037AbiDOTWq (ORCPT ); Fri, 15 Apr 2022 15:22:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37620 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349044AbiDOTW1 (ORCPT ); Fri, 15 Apr 2022 15:22:27 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 15FE63EA8B; Fri, 15 Apr 2022 12:19:58 -0700 (PDT) Message-ID: <20220415161206.648485667@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050396; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=MF+qC2b0dJXg+oWZw84Sqe0wCeDxratrkAn7t5wMy58=; b=lN/Ir/t5CAtSnnNArbT4UHK+I31zNocAKQbzU6/XaqOweCDkAjninvFHeB7bqF09HE/T0W OaDAHd/bIYPu7GNcb4OD+k61SxNgwDtJAbMdpbQDbxWNRAekNkEskPQFs9Ln/EIU9A9RDl 5F8Rb7nSmdQrowXLg+3N6wD5FyB8BQaPwTMw1rXjno2yVO7itltOnv9wwtqYj5TCPLIlvU EeSZ8iJOCncNDUO61Pcge6Y6upbt48DrEwzPymS44LUe3DnxbmowfsF6T4Gbw6EJIW00mn iqFoikvHHePXdoeGMjmwh+NnQALfUtZUa+7KqWOzRhHq6OMRUgF7MmC0kEmxTQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050396; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=MF+qC2b0dJXg+oWZw84Sqe0wCeDxratrkAn7t5wMy58=; b=erZk8fiVQlqE6cG6sr4GDcQgl/rSmofTc9yhYqX4/jlB0u/eLJN6chvug1bvG89nCXAMfW QXreSiOxq4uFB4Bg== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 05/10] x86/aperfmperf: Put frequency invariance aperf/mperf data into a struct References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:56 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Preparation for sharing code with the CPU frequency portion of the aperf/mperf code. No functional change. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -22,6 +22,13 @@ #include "cpu.h" +struct aperfmperf { + u64 aperf; + u64 mperf; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples); + struct aperfmperf_sample { unsigned int khz; atomic_t scfpending; @@ -194,8 +201,6 @@ unsigned int arch_freq_get_on_cpu(int cp DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); -static DEFINE_PER_CPU(u64, arch_prev_aperf); -static DEFINE_PER_CPU(u64, arch_prev_mperf); static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; @@ -407,8 +412,8 @@ static void init_counter_refs(void) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); + this_cpu_write(cpu_samples.aperf, aperf); + this_cpu_write(cpu_samples.mperf, mperf); } #ifdef CONFIG_PM_SLEEP @@ -474,9 +479,8 @@ DEFINE_PER_CPU(unsigned long, arch_freq_ void arch_scale_freq_tick(void) { - u64 freq_scale; - u64 aperf, mperf; - u64 acnt, mcnt; + struct aperfmperf *s = this_cpu_ptr(&cpu_samples); + u64 aperf, mperf, acnt, mcnt, freq_scale; if (!arch_scale_freq_invariant()) return; @@ -484,11 +488,11 @@ void arch_scale_freq_tick(void) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - acnt = aperf - this_cpu_read(arch_prev_aperf); - mcnt = mperf - this_cpu_read(arch_prev_mperf); + acnt = aperf - s->aperf; + mcnt = mperf - s->mperf; - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); + s->aperf = aperf; + s->mperf = mperf; if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; From patchwork Fri Apr 15 19:19:57 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 563814 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0DDB2C433EF for ; Fri, 15 Apr 2022 19:20:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S241384AbiDOTWp (ORCPT ); Fri, 15 Apr 2022 15:22:45 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37650 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349078AbiDOTW2 (ORCPT ); Fri, 15 Apr 2022 15:22:28 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A749F3E5F2; Fri, 15 Apr 2022 12:19:59 -0700 (PDT) Message-ID: <20220415161206.706185092@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050398; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=FTV8lnVO85bSeErs2BRV8ovYyYYdf2QvdWY7WdpkMVY=; b=QM69v4F9Rs7cCn1x18m1a4OxD9Wo9tW8sYaLQpsfNJhJvEsCvtaLjIf5wzAxmtOiOs/z29 be1ejBwTW5za5cEagfS/MuWS107TDCk+dD33oL8ZJ1XRejRxFNyf50ex3dh10ZDLa15V2t uj96a8RxvryGzp69MlN9pLDIdCzMLleQdoll5JS2qfV2Eeh7QkWPIyc6CLAaMw0KE2QnST NJ0ieovXJEuI21z12g1JXJTfI59dBXPpJ0AFLH3jFh8qU55Sf30JQdcYCc33b3YIb2qXt6 r1zS6b/oZK6rsqCw7n71TdURWVLJVzylhpzydkN4Ua9tBZkfLAJd8fa2biePhA== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050398; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=FTV8lnVO85bSeErs2BRV8ovYyYYdf2QvdWY7WdpkMVY=; b=F+kg3SLCvyNvc7RHgfJ3DzCpfJqGJFaGISAoYOFiYoLzj4TyGiKBd75Ir9QVn1X2RYGjEp T4pRaaZcFoxqKXCg== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 06/10] x86/aperfmperf: Restructure arch_scale_freq_tick() References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:57 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Preparation for sharing code with the CPU frequency portion of the aperf/mperf code. No functional change. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -477,22 +477,9 @@ static DECLARE_WORK(disable_freq_invaria DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; -void arch_scale_freq_tick(void) +static void scale_freq_tick(u64 acnt, u64 mcnt) { - struct aperfmperf *s = this_cpu_ptr(&cpu_samples); - u64 aperf, mperf, acnt, mcnt, freq_scale; - - if (!arch_scale_freq_invariant()) - return; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - acnt = aperf - s->aperf; - mcnt = mperf - s->mperf; - - s->aperf = aperf; - s->mperf = mperf; + u64 freq_scale; if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; @@ -514,4 +501,23 @@ void arch_scale_freq_tick(void) pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } + +void arch_scale_freq_tick(void) +{ + struct aperfmperf *s = this_cpu_ptr(&cpu_samples); + u64 acnt, mcnt, aperf, mperf; + + if (!arch_scale_freq_invariant()) + return; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + acnt = aperf - s->aperf; + mcnt = mperf - s->mperf; + + s->aperf = aperf; + s->mperf = mperf; + + scale_freq_tick(acnt, mcnt); +} #endif /* CONFIG_X86_64 && CONFIG_SMP */ From patchwork Fri Apr 15 19:19:59 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 562117 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 440DAC4332F for ; Fri, 15 Apr 2022 19:20:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1350103AbiDOTWn (ORCPT ); Fri, 15 Apr 2022 15:22:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37702 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349163AbiDOTWb (ORCPT ); Fri, 15 Apr 2022 15:22:31 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6070949CB7; Fri, 15 Apr 2022 12:20:01 -0700 (PDT) Message-ID: <20220415161206.761988704@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050400; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=IvI5nbb3fvM/PCuIhyl3Bqh32RXyrehLlCthCBqe5jg=; b=SeV447QGZ7IDW8XqxiW5WQlCVVYONOAt8FTX7IYXePLcTnmkGW000uWPVEkewWI0cAABY8 UolXkGrE0a0LmgF3dwNC4tCCChfSTQ67GkS+qYtANwHtka+Yhr0mNorM8qZIfSNGeYhs1m NVIutzS6S4ZOK6uhu6fOqrOO7kirZVO13OYklLexzBg6zzYLyqQUBD+pD7IVgerSAXLsBs QcJ+BEesQCT1b4BY9ogck3jhT1j9TkRSrNG9HQT0gFa6bprKX0q309GIEde6Uk83XGxcgr urIrDnRjls8Z355TJkvI4+Az3RLQ//6ojHI6Y9WzfxiqQ2q/S/cUcUPpj2PGZg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050400; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=IvI5nbb3fvM/PCuIhyl3Bqh32RXyrehLlCthCBqe5jg=; b=GiAar3/6U8/VpXFHcDm3gBmXdzxteX8ugGBowYgCfKnuusFqaD1FqwPvB9iOj/T2oUOXMg 7E4ku/vHPnrj7YCA== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 07/10] x86/aperfmperf: Make parts of the frequency invariance code unconditional References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:19:59 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org The frequency invariance support is currently limited to x86/64 and SMP, which is the vast majority of machines. arch_scale_freq_tick() is called every tick on all CPUs and reads the APERF and MPERF MSRs. The CPU frequency getters function do the same via dedicated IPIs. While it could be argued that on systems where frequency invariance support is disabled (32bit, !SMP) the per tick read of the APERF and MPERF MSRs can be avoided, it does not make sense to keep the extra code and the resulting runtime issues of mass IPIs around. As a first step split out the non frequency invariance specific initialization code and the read MSR portion of arch_scale_freq_tick(). The rest of the code is still conditional and guarded with a static key. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpu.h | 2 + arch/x86/include/asm/topology.h | 4 -- arch/x86/kernel/cpu/aperfmperf.c | 63 +++++++++++++++++++++++---------------- arch/x86/kernel/smpboot.c | 3 - 4 files changed, 41 insertions(+), 31 deletions(-) --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -36,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, i #endif #endif +extern void ap_init_aperfmperf(void); + int mwait_usable(const struct cpuinfo_x86 *); unsigned int x86_family(unsigned int sig); --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -217,13 +217,9 @@ extern void arch_scale_freq_tick(void); extern void arch_set_max_freq_ratio(bool turbo_disabled); extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); -extern void bp_init_freq_invariance(void); -extern void ap_init_freq_invariance(void); #else static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } -static inline void bp_init_freq_invariance(void) { } -static inline void ap_init_freq_invariance(void) { } #endif #ifdef CONFIG_ACPI_CPPC_LIB --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -164,6 +165,17 @@ unsigned int arch_freq_get_on_cpu(int cp return per_cpu(samples.khz, cpu); } +static void init_counter_refs(void) +{ + u64 aperf, mperf; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + this_cpu_write(cpu_samples.aperf, aperf); + this_cpu_write(cpu_samples.mperf, mperf); +} + #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) /* * APERF/MPERF frequency ratio computation. @@ -405,17 +417,6 @@ static bool __init intel_set_max_freq_ra return true; } -static void init_counter_refs(void) -{ - u64 aperf, mperf; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - this_cpu_write(cpu_samples.aperf, aperf); - this_cpu_write(cpu_samples.mperf, mperf); -} - #ifdef CONFIG_PM_SLEEP static struct syscore_ops freq_invariance_syscore_ops = { .resume = init_counter_refs, @@ -447,13 +448,8 @@ void freq_invariance_set_perf_ratio(u64 freq_invariance_enable(); } -void __init bp_init_freq_invariance(void) +static void __init bp_init_freq_invariance(void) { - if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) - return; - - init_counter_refs(); - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; @@ -461,12 +457,6 @@ void __init bp_init_freq_invariance(void freq_invariance_enable(); } -void ap_init_freq_invariance(void) -{ - if (cpu_feature_enabled(X86_FEATURE_APERFMPERF)) - init_counter_refs(); -} - static void disable_freq_invariance_workfn(struct work_struct *work) { static_branch_disable(&arch_scale_freq_key); @@ -481,6 +471,9 @@ static void scale_freq_tick(u64 acnt, u6 { u64 freq_scale; + if (!arch_scale_freq_invariant()) + return; + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; @@ -501,13 +494,17 @@ static void scale_freq_tick(u64 acnt, u6 pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } +#else +static inline void bp_init_freq_invariance(void) { } +static inline void scale_freq_tick(u64 acnt, u64 mcnt) { } +#endif /* CONFIG_X86_64 && CONFIG_SMP */ void arch_scale_freq_tick(void) { struct aperfmperf *s = this_cpu_ptr(&cpu_samples); u64 acnt, mcnt, aperf, mperf; - if (!arch_scale_freq_invariant()) + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return; rdmsrl(MSR_IA32_APERF, aperf); @@ -520,4 +517,20 @@ void arch_scale_freq_tick(void) scale_freq_tick(acnt, mcnt); } -#endif /* CONFIG_X86_64 && CONFIG_SMP */ + +static int __init bp_init_aperfmperf(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) + return 0; + + init_counter_refs(); + bp_init_freq_invariance(); + return 0; +} +early_initcall(bp_init_aperfmperf); + +void ap_init_aperfmperf(void) +{ + if (cpu_feature_enabled(X86_FEATURE_APERFMPERF)) + init_counter_refs(); +} --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -186,7 +186,7 @@ static void smp_callin(void) */ set_cpu_sibling_map(raw_smp_processor_id()); - ap_init_freq_invariance(); + ap_init_aperfmperf(); /* * Get our bogomips. @@ -1396,7 +1396,6 @@ void __init native_smp_prepare_cpus(unsi { smp_prepare_cpus_common(); - bp_init_freq_invariance(); smp_sanity_check(); switch (apic_intr_mode) { From patchwork Fri Apr 15 19:20:01 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 563813 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E443AC433EF for ; Fri, 15 Apr 2022 19:20:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349137AbiDOTWr (ORCPT ); Fri, 15 Apr 2022 15:22:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37728 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349213AbiDOTWc (ORCPT ); Fri, 15 Apr 2022 15:22:32 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [IPv6:2a0a:51c0:0:12e:550::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E759C4ECDF; Fri, 15 Apr 2022 12:20:02 -0700 (PDT) Message-ID: <20220415161206.817702355@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050401; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=qYlJQfHkHU3OcbS1sZiLCAQudGl6ztMYHcAA/fZFZtk=; b=LGrEszF/kAbRdvl567m34oAJJwz6dSvwrSujKNtfqxO5EjZbvxWHXoa7KE4CADQ81/OLF+ RW5qo5WMfL3b5HhfId8wAF/jGnftRn0wRUX1Ua4lA9yUqDXMYMlEcjJy1LKezycjjGGSgv ZetZKg6MxikTsS2pZrIkn/z86aqmMDTPIz1He9z6zajGmttD2jaWoNifRxOIl6brLYYBH2 ws9U79kWuK4WGwDDRJNQdL0QJQFFV21//V9+fTxdT1C9JEZW+1l8Msv1tFAOv71/7UhgXN 4x2hI2Rj5H/9Bb/D+kJdUTU04zYVIqolE0sMvLohIvfaMmx3puYP5rythOfPRQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050401; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=qYlJQfHkHU3OcbS1sZiLCAQudGl6ztMYHcAA/fZFZtk=; b=xY8Yx/ZD3ZaZx9KpDsiElK70nuaf4eZgpdAo75DVNrviEEvSAVT7PUlS2wbwCiCLjZ4pwl cYiTgYGA3aULu3Dw== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 08/10] x86/aperfmperf: Store aperf/mperf data for cpu frequency reads References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:20:01 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Now that the MSR readout is unconditional, store the results in the per CPU data structure along with a jiffies timestamp for the CPU frequency readout code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -24,11 +24,17 @@ #include "cpu.h" struct aperfmperf { + seqcount_t seq; + unsigned long last_update; + u64 acnt; + u64 mcnt; u64 aperf; u64 mperf; }; -static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = { + .seq = SEQCNT_ZERO(cpu_samples.seq) +}; struct aperfmperf_sample { unsigned int khz; @@ -515,6 +521,12 @@ void arch_scale_freq_tick(void) s->aperf = aperf; s->mperf = mperf; + raw_write_seqcount_begin(&s->seq); + s->last_update = jiffies; + s->acnt = acnt; + s->mcnt = mcnt; + raw_write_seqcount_end(&s->seq); + scale_freq_tick(acnt, mcnt); } From patchwork Fri Apr 15 19:20:02 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 562115 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C435AC433EF for ; Fri, 15 Apr 2022 19:20:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349226AbiDOTWt (ORCPT ); Fri, 15 Apr 2022 15:22:49 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37800 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349335AbiDOTWe (ORCPT ); Fri, 15 Apr 2022 15:22:34 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 981F4496BC; Fri, 15 Apr 2022 12:20:04 -0700 (PDT) Message-ID: <20220415161206.875029458@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050403; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=n2wSeLMIGPJqIrOHCJy5cyY5D0QNAlM7n2T6MTd7xwc=; b=b92wp7O0MUGwNsU9mIHqekKePY2y+Z5Svz757bDoLoT2kfhzp1BaT2CpKrZyUTP3Ze5SK6 3pfo6KVA+RtT/+uCcqwadflc7T3n+mJ5dJ7Epg1tPgnUBZp9Gss9vdFqVuouUtcZBM0Huk 99qZgNjvs0cGWt7tPuqS2wb8Ciw3g7i3Ogrq296n0vodf5zi6614yxQ6Sf7rNWk0mdg9qJ iTTF1OyXEMwfGq3fdVCaIFHRPBgmp5q1FF/W0QMU4tgfH4HR82qNwUNGINXKL5T8Kx3ou9 SQ6TdZD+Q8r1CpIlDLbv4iETaSAFKWuduPLt5dxEgCxAILTM8PhkjocMtNb9Zg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050403; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=n2wSeLMIGPJqIrOHCJy5cyY5D0QNAlM7n2T6MTd7xwc=; b=Q5e+J4epcR3IihMAKz+MJov65aDoE016IQxpDVcVGrmEqMWy4ffEgQ9f5gmNTR24hRf1QE mlsEn3LZwo4py0CQ== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" , Eric Dumazet Subject: [patch 09/10] x86/aperfmperf: Replace aperfmperf_get_khz() References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:20:02 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org The frequency invariance infrastructure provides the APERF/MPERF samples already. Utilize them for the cpu frequency display in /proc/cpuinfo. The sample is considered valid for 20ms. So for idle or isolated NOHZ full CPUs the function returns 0, which is matching the previous behaviour. This gets rid of the mass IPIs and a delay of 20ms for stabilizing observed by Eric when reading /proc/cpuinfo. Reported-by: Eric Dumazet Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 77 +++++++++++++++++---------------------- fs/proc/cpuinfo.c | 6 --- include/linux/cpufreq.h | 1 3 files changed, 35 insertions(+), 49 deletions(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -100,49 +100,6 @@ static bool aperfmperf_snapshot_cpu(int return time_delta <= APERFMPERF_STALE_THRESHOLD_MS; } -unsigned int aperfmperf_get_khz(int cpu) -{ - if (!cpu_khz) - return 0; - - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) - return 0; - - if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) - return 0; - - if (rcu_is_idle_cpu(cpu)) - return 0; /* Idle CPUs are completely uninteresting. */ - - aperfmperf_snapshot_cpu(cpu, ktime_get(), true); - return per_cpu(samples.khz, cpu); -} - -void arch_freq_prepare_all(void) -{ - ktime_t now = ktime_get(); - bool wait = false; - int cpu; - - if (!cpu_khz) - return; - - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) - return; - - for_each_online_cpu(cpu) { - if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) - continue; - if (rcu_is_idle_cpu(cpu)) - continue; /* Idle CPUs are completely uninteresting. */ - if (!aperfmperf_snapshot_cpu(cpu, now, false)) - wait = true; - } - - if (wait) - msleep(APERFMPERF_REFRESH_DELAY_MS); -} - unsigned int arch_freq_get_on_cpu(int cpu) { struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); @@ -529,6 +486,40 @@ void arch_scale_freq_tick(void) scale_freq_tick(acnt, mcnt); } +/* + * Discard samples older than the define maximum sample age of 20ms. There + * is no point in sending IPIs in such a case. If the scheduler tick was + * not running then the CPU is either idle or isolated. + */ +#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) + +unsigned int aperfmperf_get_khz(int cpu) +{ + struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); + unsigned long last; + unsigned int seq; + u64 acnt, mcnt; + + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) + return 0; + + do { + seq = raw_read_seqcount_begin(&s->seq); + last = s->last_update; + acnt = s->acnt; + mcnt = s->mcnt; + } while (read_seqcount_retry(&s->seq, seq)); + + /* + * Bail on invalid count and when the last update was too long ago, + * which covers idle and NOHZ full CPUs. + */ + if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE) + return 0; + + return div64_u64((cpu_khz * acnt), mcnt); +} + static int __init bp_init_aperfmperf(void) { if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -5,14 +5,10 @@ #include #include -__weak void arch_freq_prepare_all(void) -{ -} - extern const struct seq_operations cpuinfo_op; + static int cpuinfo_open(struct inode *inode, struct file *file) { - arch_freq_prepare_all(); return seq_open(file, &cpuinfo_op); } --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1199,7 +1199,6 @@ static inline void sched_cpufreq_governo struct cpufreq_governor *old_gov) { } #endif -extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale From patchwork Fri Apr 15 19:20:04 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 563812 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8A5ACC433F5 for ; Fri, 15 Apr 2022 19:20:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349276AbiDOTWu (ORCPT ); Fri, 15 Apr 2022 15:22:50 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37702 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349450AbiDOTWf (ORCPT ); Fri, 15 Apr 2022 15:22:35 -0400 Received: from galois.linutronix.de (Galois.linutronix.de [IPv6:2a0a:51c0:0:12e:550::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2128449CB7; Fri, 15 Apr 2022 12:20:06 -0700 (PDT) Message-ID: <20220415161206.934040006@linutronix.de> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1650050404; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=/7LOTx5hx+68ftEvKF2Kc2mzFZcFNFGa3+VDAnNipgs=; b=GaUoxDIsPqmzxlXffXRK+ealpE55XymsmTSL/XJsrOjViIeYvJ37JvNgZgVlK7gv+dOIW7 qWQEMSwdB/NaheS/wdMHBDJpnKAjNKYw80HWKv1DcD3LrAm2npH7ZHbzd/m/wuvf/vUTjW vIXCCav3r6fzk2s/JA5R9hlKbpSqV+OSfAG5PL5Bbt+Hsdf6rGtfsedPGrc6PEr+uctYLQ 3Bf2i0wdffu/Xn9oOzvRL+chJZUYrlZ0G8hx33AynsTSVUMGb2QyVl2tt4McIxiOHRH9Hz tCABN3rlmuV9yjmzKatydBi5Oiff9Z1P/wse2hNahQu6Snj9HtLpy3KvmpBlrA== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1650050404; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: references:references; bh=/7LOTx5hx+68ftEvKF2Kc2mzFZcFNFGa3+VDAnNipgs=; b=dAza+VAiQGEv5S45kUuVVsNj3+xxkX4pyoGiKm1qRfoTV/b9TE6V0A9YEQK8Tm6uvfYIoT AglGamnDJTRveqBA== From: Thomas Gleixner To: LKML Cc: x86@kernel.org, "Rafael J. Wysocki" , linux-pm@vger.kernel.org, Eric Dumazet , "Paul E. McKenney" Subject: [patch 10/10] x86/aperfmperf: Replace arch_freq_get_on_cpu() References: <20220415133356.179706384@linutronix.de> MIME-Version: 1.0 Date: Fri, 15 Apr 2022 21:20:04 +0200 (CEST) Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Reading the current CPU frequency from /sys/..../scaling_cur_freq involves in the worst case two IPIs due to the ad hoc sampling. The frequency invariance infrastructure provides the APERF/MPERF samples already. Utilize them and consolidate this with the /proc/cpuinfo readout. The sample is considered valid for 20ms. So for idle or isolated NOHZ full CPUs the function returns 0, which is matching the previous behaviour. The resulting text size vs. the original APERF/MPERF plus the separate frequency invariance code: text: 2411 -> 723 init.text: 0 -> 767 Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/aperfmperf.c | 94 --------------------------------------- arch/x86/kernel/cpu/proc.c | 2 2 files changed, 2 insertions(+), 94 deletions(-) --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -35,98 +35,6 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(str .seq = SEQCNT_ZERO(cpu_samples.seq) }; -struct aperfmperf_sample { - unsigned int khz; - atomic_t scfpending; - ktime_t time; - u64 aperf; - u64 mperf; -}; - -static DEFINE_PER_CPU(struct aperfmperf_sample, samples); - -#define APERFMPERF_CACHE_THRESHOLD_MS 10 -#define APERFMPERF_REFRESH_DELAY_MS 10 -#define APERFMPERF_STALE_THRESHOLD_MS 1000 - -/* - * aperfmperf_snapshot_khz() - * On the current CPU, snapshot APERF, MPERF, and jiffies - * unless we already did it within 10ms - * calculate kHz, save snapshot - */ -static void aperfmperf_snapshot_khz(void *dummy) -{ - u64 aperf, aperf_delta; - u64 mperf, mperf_delta; - struct aperfmperf_sample *s = this_cpu_ptr(&samples); - unsigned long flags; - - local_irq_save(flags); - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - local_irq_restore(flags); - - aperf_delta = aperf - s->aperf; - mperf_delta = mperf - s->mperf; - - /* - * There is no architectural guarantee that MPERF - * increments faster than we can read it. - */ - if (mperf_delta == 0) - return; - - s->time = ktime_get(); - s->aperf = aperf; - s->mperf = mperf; - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); - atomic_set_release(&s->scfpending, 0); -} - -static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait) -{ - s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu)); - struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); - - /* Don't bother re-computing within the cache threshold time. */ - if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) - return true; - - if (!atomic_xchg(&s->scfpending, 1) || wait) - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait); - - /* Return false if the previous iteration was too long ago. */ - return time_delta <= APERFMPERF_STALE_THRESHOLD_MS; -} - -unsigned int arch_freq_get_on_cpu(int cpu) -{ - struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); - - if (!cpu_khz) - return 0; - - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) - return 0; - - if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) - return 0; - - if (rcu_is_idle_cpu(cpu)) - return 0; - - if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) - return per_cpu(samples.khz, cpu); - - msleep(APERFMPERF_REFRESH_DELAY_MS); - atomic_set(&s->scfpending, 1); - smp_mb(); /* ->scfpending before smp_call_function_single(). */ - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); - - return per_cpu(samples.khz, cpu); -} - static void init_counter_refs(void) { u64 aperf, mperf; @@ -493,7 +401,7 @@ void arch_scale_freq_tick(void) */ #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) -unsigned int aperfmperf_get_khz(int cpu) +unsigned int arch_freq_get_on_cpu(int cpu) { struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); unsigned long last; --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -84,7 +84,7 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = aperfmperf_get_khz(cpu); + unsigned int freq = arch_freq_get_on_cpu(cpu); if (!freq) freq = cpufreq_quick_get(cpu);