Message ID | 20241022034608.32396-1-mario.limonciello@amd.com |
---|---|
Headers | show |
Series | x86 Heterogeneous design identification | expand |
On Mon, Oct 21, 2024 at 10:46:07PM -0500, Mario Limonciello wrote: > * Take this patch from Pawan's series and fix up for feedback left on it. > * Add in AMD case as well Yah, this one is adding way more boilerplate code than it should. IOW, I'm thinking of something simpler like this: --- diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 98eced5084ca..44122b077932 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -53,6 +53,7 @@ static inline void bus_lock_init(void) {} #ifdef CONFIG_CPU_SUP_INTEL u8 get_this_hybrid_cpu_type(void); u32 get_this_hybrid_cpu_native_id(void); +enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c); #else static inline u8 get_this_hybrid_cpu_type(void) { @@ -63,6 +64,18 @@ static inline u32 get_this_hybrid_cpu_native_id(void) { return 0; } +static enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c) +{ + return TOPO_CPU_TYPE_UNKNOWN; +} +#endif +#ifdef CONFIG_CPU_SUP_AMD +enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c); +#else +static inline enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c) +{ + return TOPO_CPU_TYPE_UNKNOWN; +} #endif #ifdef CONFIG_IA32_FEAT_CTL void init_ia32_feat_ctl(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4a686f0e5dbf..c0975815980c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -105,6 +105,24 @@ struct cpuinfo_topology { // Cache level topology IDs u32 llc_id; u32 l2c_id; + + // Hardware defined CPU-type + union { + u32 cpu_type; + struct { + // CPUID.1A.EAX[23-0] + u32 intel_native_model_id :24; + // CPUID.1A.EAX[31-24] + u32 intel_type :8; + }; + struct { + // CPUID 0x80000026.EBX + u32 amd_num_processors :16, + amd_power_eff_ranking :8, + amd_native_model_id :4, + amd_type :4; + }; + }; }; struct cpuinfo_x86 { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index aef70336d624..c43d4b3324bc 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -114,6 +114,12 @@ enum x86_topology_domains { TOPO_MAX_DOMAIN, }; +enum x86_topology_cpu_type { + TOPO_CPU_TYPE_PERFORMANCE, + TOPO_CPU_TYPE_EFFICIENCY, + TOPO_CPU_TYPE_UNKNOWN, +}; + struct x86_topology_system { unsigned int dom_shifts[TOPO_MAX_DOMAIN]; unsigned int dom_size[TOPO_MAX_DOMAIN]; @@ -149,6 +155,8 @@ extern unsigned int __max_threads_per_core; extern unsigned int __num_threads_per_package; extern unsigned int __num_cores_per_package; +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); + static inline unsigned int topology_max_packages(void) { return __max_logical_packages; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fab5caec0b72..7d8d62fdc112 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1205,3 +1205,12 @@ void amd_check_microcode(void) if (cpu_feature_enabled(X86_FEATURE_ZEN2)) on_each_cpu(zenbleed_check_cpu, NULL, 1); } + +enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c) +{ + switch (c->topo.amd_type) { + case 0: return TOPO_CPU_TYPE_PERFORMANCE; + case 1: return TOPO_CPU_TYPE_EFFICIENCY; + } + return TOPO_CPU_TYPE_UNKNOWN; +} diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 3baf3e435834..10719aba6276 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "die_id: %u\n", c->topo.die_id); seq_printf(m, "cu_id: %u\n", c->topo.cu_id); seq_printf(m, "core_id: %u\n", c->topo.core_id); + seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c)); seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); seq_printf(m, "llc_id: %u\n", c->topo.llc_id); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d1de300af173..7b1011d0b369 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -907,3 +907,12 @@ u32 get_this_hybrid_cpu_native_id(void) return cpuid_eax(0x0000001a) & (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1); } + +enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c) +{ + switch (c->topo.intel_type) { + case 0x20: return TOPO_CPU_TYPE_EFFICIENCY; + case 0x40: return TOPO_CPU_TYPE_PERFORMANCE; + } + return TOPO_CPU_TYPE_UNKNOWN; +} diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 7d476fa697ca..03b3c9c3a45e 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan) if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) has_topoext = cpu_parse_topology_ext(tscan); + if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) + tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); + if (!has_topoext && !parse_8000_0008(tscan)) return; diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 9a6069e7133c..38220b64c6b3 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -27,6 +27,23 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, } } +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c) +{ + enum x86_topology_cpu_type type; + + if (c->x86_vendor == X86_VENDOR_INTEL) + type = get_intel_cpu_type(c); + if (c->x86_vendor == X86_VENDOR_AMD) + type = get_amd_cpu_type(c); + + if (type == TOPO_CPU_TYPE_PERFORMANCE) + return "performance"; + else if (type == TOPO_CPU_TYPE_EFFICIENCY) + return "efficiency"; + else + return "unknown"; +} + static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) { struct { @@ -87,6 +104,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) .cu_id = 0xff, .llc_id = BAD_APICID, .l2c_id = BAD_APICID, + .cpu_type = TOPO_CPU_TYPE_UNKNOWN, }; struct cpuinfo_x86 *c = tscan->c; struct { @@ -132,6 +150,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) case X86_VENDOR_INTEL: if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) parse_legacy(tscan); + if (c->cpuid_level >= 0x1a) + c->topo.cpu_type = cpuid_eax(0x1a); break; case X86_VENDOR_HYGON: if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
On 10/22/2024 07:03, Borislav Petkov wrote: > On Tue, Oct 22, 2024 at 01:57:20PM +0200, Borislav Petkov wrote: >> diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c >> index 9a6069e7133c..38220b64c6b3 100644 >> --- a/arch/x86/kernel/cpu/topology_common.c >> +++ b/arch/x86/kernel/cpu/topology_common.c >> @@ -27,6 +27,23 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, >> } >> } >> >> +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c) >> +{ >> + enum x86_topology_cpu_type type; >> + >> + if (c->x86_vendor == X86_VENDOR_INTEL) >> + type = get_intel_cpu_type(c); >> + if (c->x86_vendor == X86_VENDOR_AMD) >> + type = get_amd_cpu_type(c); >> + >> + if (type == TOPO_CPU_TYPE_PERFORMANCE) >> + return "performance"; >> + else if (type == TOPO_CPU_TYPE_EFFICIENCY) >> + return "efficiency"; >> + else >> + return "unknown"; >> +} > > I guess you still need topology_cpu_type() in your next patch but that's easy > - you simply call it in get_topology_cpu_type_name(). > > The point being debugfs will dump the name of the core type and not some magic > number which no one knows. Yeah; makes sense. I'll pull your suggestions in.
On 10/22/2024 11:03, Dave Hansen wrote: > On 10/22/24 04:57, Borislav Petkov wrote: >> +enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c) >> +{ >> + switch (c->topo.intel_type) { >> + case 0x20: return TOPO_CPU_TYPE_EFFICIENCY; >> + case 0x40: return TOPO_CPU_TYPE_PERFORMANCE; >> + } >> + return TOPO_CPU_TYPE_UNKNOWN; >> +} > > This makes me feel a _bit_ uneasy. 0x20 here really does mean "Atom > microarchitecture" and 0x40 means "Core microarchitecture". > > We want to encourage folks to use this new ABI when they want to find > the fastest core to run on. But we don't want them to use it to bind to > a CPU and then deploy Atom-specific optimizations. > > We *also* don't want the in-kernel code to do be doing things like: > > if (get_intel_cpu_type() == TOPO_CPU_TYPE_EFFICIENCY) > setup_force_cpu_bug(FOO); > > That would fall over if Intel ever mixed fast and slow core types with > the same microarchitecture, which is what AMD is doing today. > > Having: > > TOPO_CPU_TYPE_EFFICIENCY, and > TOPO_CPU_TYPE_PERFORMANCE > > is totally fine in generic code. But we also need to preserve the: > > TOPO_HW_CPU_TYPE_INTEL_ATOM > TOPO_HW_CPU_TYPE_INTEL_CORE > > values also for use in vendor-specific code. What you're suggesting is to keep an enum in the intel.c code and any code that needs to match atom vs core can directly use c->topo.intel_type == TOPO_HW_CPU_TYPE_INTEL_ATOM Right? > > In the ABI, I think we should probably make this an explicit > power/performance interface rather than "cpu_type". As much as I like > the human readable "performance" and "efficiency", I'm worried it won't > be flexible enough for future maniacal hardware designers. To be 100% > clear, all the hardware designs that I know of would fit in a two-bucket > ("performance" and "efficiency") scheme. But we've got to decide > whether to commit to that forever. As it stands today none of this is exported anywhere but debugfs; so I wouldn't say we have ABI concerns (yet). Could we wait until the one that breaks the mold shows up?
On 10/22/24 09:13, Mario Limonciello wrote: > On 10/22/2024 11:03, Dave Hansen wrote: ... >> Having: >> >> TOPO_CPU_TYPE_EFFICIENCY, and >> TOPO_CPU_TYPE_PERFORMANCE >> >> is totally fine in generic code. But we also need to preserve the: >> >> TOPO_HW_CPU_TYPE_INTEL_ATOM >> TOPO_HW_CPU_TYPE_INTEL_CORE >> >> values also for use in vendor-specific code. > > What you're suggesting is to keep an enum in the intel.c code and any > code that needs to match atom vs core can directly use > > c->topo.intel_type == TOPO_HW_CPU_TYPE_INTEL_ATOM > > Right? Yep, exactly. > As it stands today none of this is exported anywhere but debugfs; so > I wouldn't say we have ABI concerns (yet). Could we wait until the > one that breaks the mold shows up? Oh, debugfs is fine. I was, for some reason, assuming that the strings were getting spit out in sysfs proper somewhere, not debugfs. Sorry for the noise.
On Tue, Oct 22, 2024 at 11:13:00AM -0500, Mario Limonciello wrote: > > This makes me feel a _bit_ uneasy. 0x20 here really does mean "Atom > > microarchitecture" and 0x40 means "Core microarchitecture". > > > > We want to encourage folks to use this new ABI when they want to find > > the fastest core to run on. But we don't want them to use it to bind to > > a CPU and then deploy Atom-specific optimizations. > > > > We *also* don't want the in-kernel code to do be doing things like: > > > > if (get_intel_cpu_type() == TOPO_CPU_TYPE_EFFICIENCY) > > setup_force_cpu_bug(FOO); > > > > That would fall over if Intel ever mixed fast and slow core types with > > the same microarchitecture, which is what AMD is doing today. > > > > Having: > > > > TOPO_CPU_TYPE_EFFICIENCY, and > > TOPO_CPU_TYPE_PERFORMANCE > > > > is totally fine in generic code. But we also need to preserve the: > > > > TOPO_HW_CPU_TYPE_INTEL_ATOM > > TOPO_HW_CPU_TYPE_INTEL_CORE > > > > values also for use in vendor-specific code. > > What you're suggesting is to keep an enum in the intel.c code and any code > that needs to match atom vs core can directly use > > c->topo.intel_type == TOPO_HW_CPU_TYPE_INTEL_ATOM To be able to match ATOM and CORE in the affected processor table, the enums need to be defined in a way that they can be used in the common code. Specially for !CONFIG_CPU_SUP_INTEL case.
On Tue, Oct 22, 2024 at 01:57:20PM +0200, Borislav Petkov wrote: > On Mon, Oct 21, 2024 at 10:46:07PM -0500, Mario Limonciello wrote: > > * Take this patch from Pawan's series and fix up for feedback left on it. > > * Add in AMD case as well > > Yah, this one is adding way more boilerplate code than it should. IOW, I'm > thinking of something simpler like this: > > --- > > diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h > index 98eced5084ca..44122b077932 100644 > --- a/arch/x86/include/asm/cpu.h > +++ b/arch/x86/include/asm/cpu.h > @@ -53,6 +53,7 @@ static inline void bus_lock_init(void) {} > #ifdef CONFIG_CPU_SUP_INTEL > u8 get_this_hybrid_cpu_type(void); > u32 get_this_hybrid_cpu_native_id(void); > +enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c); > #else > static inline u8 get_this_hybrid_cpu_type(void) > { > @@ -63,6 +64,18 @@ static inline u32 get_this_hybrid_cpu_native_id(void) > { > return 0; > } > +static enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c) > +{ > + return TOPO_CPU_TYPE_UNKNOWN; > +} > +#endif > +#ifdef CONFIG_CPU_SUP_AMD > +enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c); > +#else > +static inline enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c) > +{ > + return TOPO_CPU_TYPE_UNKNOWN; > +} > #endif > #ifdef CONFIG_IA32_FEAT_CTL > void init_ia32_feat_ctl(struct cpuinfo_x86 *c); > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index 4a686f0e5dbf..c0975815980c 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -105,6 +105,24 @@ struct cpuinfo_topology { > // Cache level topology IDs > u32 llc_id; > u32 l2c_id; > + > + // Hardware defined CPU-type > + union { > + u32 cpu_type; > + struct { > + // CPUID.1A.EAX[23-0] > + u32 intel_native_model_id :24; > + // CPUID.1A.EAX[31-24] > + u32 intel_type :8; > + }; > + struct { > + // CPUID 0x80000026.EBX > + u32 amd_num_processors :16, > + amd_power_eff_ranking :8, > + amd_native_model_id :4, > + amd_type :4; > + }; > + }; > }; > > struct cpuinfo_x86 { > diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h > index aef70336d624..c43d4b3324bc 100644 > --- a/arch/x86/include/asm/topology.h > +++ b/arch/x86/include/asm/topology.h > @@ -114,6 +114,12 @@ enum x86_topology_domains { > TOPO_MAX_DOMAIN, > }; > > +enum x86_topology_cpu_type { > + TOPO_CPU_TYPE_PERFORMANCE, > + TOPO_CPU_TYPE_EFFICIENCY, > + TOPO_CPU_TYPE_UNKNOWN, > +}; > + > struct x86_topology_system { > unsigned int dom_shifts[TOPO_MAX_DOMAIN]; > unsigned int dom_size[TOPO_MAX_DOMAIN]; > @@ -149,6 +155,8 @@ extern unsigned int __max_threads_per_core; > extern unsigned int __num_threads_per_package; > extern unsigned int __num_cores_per_package; > > +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); > + > static inline unsigned int topology_max_packages(void) > { > return __max_logical_packages; > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index fab5caec0b72..7d8d62fdc112 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -1205,3 +1205,12 @@ void amd_check_microcode(void) > if (cpu_feature_enabled(X86_FEATURE_ZEN2)) > on_each_cpu(zenbleed_check_cpu, NULL, 1); > } > + > +enum x86_topology_cpu_type get_amd_cpu_type(struct cpuinfo_x86 *c) > +{ > + switch (c->topo.amd_type) { > + case 0: return TOPO_CPU_TYPE_PERFORMANCE; > + case 1: return TOPO_CPU_TYPE_EFFICIENCY; > + } > + return TOPO_CPU_TYPE_UNKNOWN; > +} > diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c > index 3baf3e435834..10719aba6276 100644 > --- a/arch/x86/kernel/cpu/debugfs.c > +++ b/arch/x86/kernel/cpu/debugfs.c > @@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) > seq_printf(m, "die_id: %u\n", c->topo.die_id); > seq_printf(m, "cu_id: %u\n", c->topo.cu_id); > seq_printf(m, "core_id: %u\n", c->topo.core_id); > + seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c)); > seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); > seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); > seq_printf(m, "llc_id: %u\n", c->topo.llc_id); > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c > index d1de300af173..7b1011d0b369 100644 > --- a/arch/x86/kernel/cpu/intel.c > +++ b/arch/x86/kernel/cpu/intel.c > @@ -907,3 +907,12 @@ u32 get_this_hybrid_cpu_native_id(void) > return cpuid_eax(0x0000001a) & > (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1); > } > + > +enum x86_topology_cpu_type get_intel_cpu_type(struct cpuinfo_x86 *c) > +{ > + switch (c->topo.intel_type) { > + case 0x20: return TOPO_CPU_TYPE_EFFICIENCY; > + case 0x40: return TOPO_CPU_TYPE_PERFORMANCE; > + } > + return TOPO_CPU_TYPE_UNKNOWN; > +} The name of the function is a bit misleading, as it suggests that it returns the Intel specific CPU type(ATOM/CORE), but it actually returns the performance/efficiency generic types. I would suggest to name the function based on what it returns, like: get_generic_cpu_type(struct cpuinfo_x86 *c) { enum x86_topology_cpu_type type; if (c->x86_vendor == X86_VENDOR_INTEL) { switch (c->topo.intel_type) { case 0x20: return TOPO_CPU_TYPE_EFFICIENCY; case 0x40: return TOPO_CPU_TYPE_PERFORMANCE; } if (c->x86_vendor == X86_VENDOR_AMD) { switch (c->topo.amd_type) { case 0: return TOPO_CPU_TYPE_PERFORMANCE; case 1: return TOPO_CPU_TYPE_EFFICIENCY; } return TOPO_CPU_TYPE_UNKNOWN; } get_intel_cpu_type(struct cpuinfo_x86 *c) { return c->topo.intel_type; } get_amd_cpu_type(struct cpuinfo_x86 *c) { return c->topo.amd_type; }
On Tue, Oct 22, 2024 at 09:40:15PM -0700, Pawan Gupta wrote: > get_generic_cpu_type(struct cpuinfo_x86 *c) > { > enum x86_topology_cpu_type type; > > if (c->x86_vendor == X86_VENDOR_INTEL) { > switch (c->topo.intel_type) { > case 0x20: return TOPO_CPU_TYPE_EFFICIENCY; > case 0x40: return TOPO_CPU_TYPE_PERFORMANCE; > } > if (c->x86_vendor == X86_VENDOR_AMD) { > switch (c->topo.amd_type) { > case 0: return TOPO_CPU_TYPE_PERFORMANCE; > case 1: return TOPO_CPU_TYPE_EFFICIENCY; > } > > return TOPO_CPU_TYPE_UNKNOWN; > } Ok... > get_intel_cpu_type(struct cpuinfo_x86 *c) > { > return c->topo.intel_type; > } > > get_amd_cpu_type(struct cpuinfo_x86 *c) > { > return c->topo.amd_type; ... except those silly wrappers can go. There's a reason cpuinfo_x86 has well-defined members which can be used directly.