Message ID | 20240117173458.2312669-5-quic_sibis@quicinc.com |
---|---|
State | New |
Headers | show |
Series | firmware: arm_scmi: Qualcomm Vendor Protocol | expand |
On 1/18/24 02:11, Dmitry Baryshkov wrote: > On Wed, 17 Jan 2024 at 19:36, Sibi Sankar <quic_sibis@quicinc.com> wrote: >> >> From: Shivnandan Kumar <quic_kshivnan@quicinc.com> >> >> This patch introduces a client driver that interacts with the SCMI QCOM > > git grep This.patch Documentation/process/ > >> vendor protocol and passes on the required tuneables to start various >> features running on the SCMI controller. > > Is there any word about the 'memlat'? No. Unless one reads into the > patch, one can not come up with the idea of what is being introduced. ack, will fix it in the re-spin. > >> >> Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> >> Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> >> Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> >> Co-developed-by: Amir Vajid <avajid@quicinc.com> >> Signed-off-by: Amir Vajid <avajid@quicinc.com> >> Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> >> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> >> --- >> drivers/soc/qcom/Kconfig | 10 + >> drivers/soc/qcom/Makefile | 1 + >> drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > > Should it go to drivers/firmware/arm_scmi instead? Or maybe to drivers/devfreq? I don't think it should go into arm_scmi unless Sudeep wants it there. As for drivers/devfreq, I would have moved it there if this driver benfitted being classified as a devfreq device. We can't use any of the available governors on it and the tuneables appear way too custom. These are the reasons why I placed it in drivers/soc/qcom instead. > >> 3 files changed, 497 insertions(+) >> create mode 100644 drivers/soc/qcom/qcom_scmi_client.c >> >> diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig >> index c6ca4de42586..1530558aebfb 100644 >> --- a/drivers/soc/qcom/Kconfig >> +++ b/drivers/soc/qcom/Kconfig >> @@ -264,6 +264,16 @@ config QCOM_ICC_BWMON >> the fixed bandwidth votes from cpufreq (CPU nodes) thus achieve high >> memory throughput even with lower CPU frequencies. >> >> +config QCOM_SCMI_CLIENT >> + tristate "Qualcomm Technologies Inc. SCMI client driver" >> + depends on QCOM_SCMI_VENDOR_PROTOCOL || COMPILE_TEST >> + default n >> + help >> + SCMI client driver registers for SCMI QCOM vendor protocol. >> + >> + This driver interacts with the vendor protocol and passes on the required >> + tuneables to start various features running on the SCMI controller. >> + >> config QCOM_INLINE_CRYPTO_ENGINE >> tristate >> select QCOM_SCM >> diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile >> index 05b3d54e8dc9..c2a51293c886 100644 >> --- a/drivers/soc/qcom/Makefile >> +++ b/drivers/soc/qcom/Makefile >> @@ -32,5 +32,6 @@ obj-$(CONFIG_QCOM_APR) += apr.o >> obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o >> obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o >> obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o >> +obj-$(CONFIG_QCOM_SCMI_CLIENT) += qcom_scmi_client.o >> qcom_ice-objs += ice.o >> obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o >> diff --git a/drivers/soc/qcom/qcom_scmi_client.c b/drivers/soc/qcom/qcom_scmi_client.c >> new file mode 100644 >> index 000000000000..418aa7900496 >> --- /dev/null >> +++ b/drivers/soc/qcom/qcom_scmi_client.c >> @@ -0,0 +1,486 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Copyright (c) 2024, The Linux Foundation. All rights reserved. >> + */ >> + >> +#include <linux/cpu.h> >> +#include <linux/err.h> >> +#include <linux/errno.h> >> +#include <linux/init.h> >> +#include <linux/kernel.h> >> +#include <linux/module.h> >> +#include <linux/of.h> >> +#include <linux/platform_device.h> >> +#include <linux/qcom_scmi_vendor.h> >> +#include <linux/scmi_protocol.h> >> + >> +#define MAX_MEMORY_TYPES 3 >> +#define MEMLAT_ALGO_STR 0x74616C6D656D /* "memlat" */ >> +#define INVALID_IDX 0xFF >> +#define MAX_NAME_LEN 20 >> +#define MAX_MAP_ENTRIES 6 >> +#define MAX_MONITOR_CNT 4 >> +#define SCMI_VENDOR_MSG_START 3 >> +#define SCMI_VENDOR_MSG_MODULE_START 16 >> + >> +enum scmi_memlat_protocol_cmd { >> + MEMLAT_SET_LOG_LEVEL = SCMI_VENDOR_MSG_START, >> + MEMLAT_FLUSH_LOGBUF, >> + MEMLAT_SET_MEM_GROUP = SCMI_VENDOR_MSG_MODULE_START, >> + MEMLAT_SET_MONITOR, >> + MEMLAT_SET_COMMON_EV_MAP, >> + MEMLAT_SET_GRP_EV_MAP, >> + MEMLAT_ADAPTIVE_LOW_FREQ, >> + MEMLAT_ADAPTIVE_HIGH_FREQ, >> + MEMLAT_GET_ADAPTIVE_CUR_FREQ, >> + MEMLAT_IPM_CEIL, >> + MEMLAT_FE_STALL_FLOOR, >> + MEMLAT_BE_STALL_FLOOR, >> + MEMLAT_WB_PCT, >> + MEMLAT_IPM_FILTER, >> + MEMLAT_FREQ_SCALE_PCT, >> + MEMLAT_FREQ_SCALE_CEIL_MHZ, >> + MEMLAT_FREQ_SCALE_FLOOR_MHZ, >> + MEMLAT_SAMPLE_MS, >> + MEMLAT_MON_FREQ_MAP, >> + MEMLAT_SET_MIN_FREQ, >> + MEMLAT_SET_MAX_FREQ, >> + MEMLAT_GET_CUR_FREQ, >> + MEMLAT_START_TIMER, >> + MEMLAT_STOP_TIMER, >> + MEMLAT_GET_TIMESTAMP, >> + MEMLAT_MAX_MSG >> +}; >> + >> +struct map_table { >> + u16 v1; >> + u16 v2; >> +}; >> + > > Any documentation for these structures? It won't bite you, but it will > help reviewers and other developers. ack > >> +struct map_param_msg { >> + u32 hw_type; >> + u32 mon_idx; >> + u32 nr_rows; >> + struct map_table tbl[MAX_MAP_ENTRIES]; >> +} __packed; >> + >> +struct node_msg { >> + u32 cpumask; >> + u32 hw_type; >> + u32 mon_type; >> + u32 mon_idx; >> + char mon_name[MAX_NAME_LEN]; >> +}; >> + >> +struct scalar_param_msg { >> + u32 hw_type; >> + u32 mon_idx; >> + u32 val; >> +}; >> + >> +enum common_ev_idx { >> + INST_IDX, >> + CYC_IDX, >> + FE_STALL_IDX, >> + BE_STALL_IDX, >> + NUM_COMMON_EVS >> +}; >> + >> +enum grp_ev_idx { >> + MISS_IDX, >> + WB_IDX, >> + ACC_IDX, >> + NUM_GRP_EVS >> +}; >> + >> +#define EV_CPU_CYCLES 0 >> +#define EV_INST_RETIRED 2 >> +#define EV_L2_D_RFILL 5 >> + >> +struct ev_map_msg { >> + u32 num_evs; >> + u32 hw_type; >> + u32 cid[NUM_COMMON_EVS]; >> +}; >> + >> +struct cpufreq_memfreq_map { >> + unsigned int cpufreq_mhz; >> + unsigned int memfreq_khz; >> +}; >> + >> +struct scmi_monitor_info { >> + struct cpufreq_memfreq_map *freq_map; >> + char mon_name[MAX_NAME_LEN]; >> + u32 mon_idx; >> + u32 mon_type; >> + u32 ipm_ceil; >> + u32 mask; >> + u32 freq_map_len; >> +}; >> + >> +struct scmi_memory_info { >> + struct scmi_monitor_info *monitor[MAX_MONITOR_CNT]; >> + u32 hw_type; >> + int monitor_cnt; >> + u32 min_freq; >> + u32 max_freq; >> +}; >> + >> +struct scmi_memlat_info { >> + struct scmi_protocol_handle *ph; >> + const struct qcom_scmi_vendor_ops *ops; >> + struct scmi_memory_info *memory[MAX_MEMORY_TYPES]; >> + int memory_cnt; >> +}; >> + >> +static int get_mask(struct device_node *np, u32 *mask) >> +{ >> + struct device_node *dev_phandle; >> + struct device *cpu_dev; >> + int cpu, i = 0; >> + int ret = -ENODEV; >> + >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); >> + while (dev_phandle) { >> + for_each_possible_cpu(cpu) { >> + cpu_dev = get_cpu_device(cpu); >> + if (cpu_dev && cpu_dev->of_node == dev_phandle) { >> + *mask |= BIT(cpu); >> + ret = 0; >> + break; >> + } >> + } > > There is of_cpu_node_to_id(). No need to reinvent it. ack > >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); >> + } >> + >> + return ret; >> +} >> + >> +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev, >> + struct device_node *of_node, >> + u32 *cnt) >> +{ >> + int len, nf, i, j; >> + u32 data; >> + struct cpufreq_memfreq_map *tbl; >> + int ret; >> + >> + if (!of_find_property(of_node, "qcom,cpufreq-memfreq-tbl", &len)) >> + return NULL; >> + len /= sizeof(data); >> + >> + if (len % 2 || len == 0) >> + return NULL; >> + nf = len / 2; >> + >> + tbl = devm_kzalloc(dev, (nf + 1) * sizeof(struct cpufreq_memfreq_map), >> + GFP_KERNEL); >> + if (!tbl) >> + return NULL; >> + >> + for (i = 0, j = 0; i < nf; i++, j += 2) { >> + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", >> + j, &data); >> + if (ret < 0) >> + return NULL; >> + tbl[i].cpufreq_mhz = data / 1000; >> + >> + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", >> + j + 1, &data); >> + if (ret < 0) >> + return NULL; >> + >> + tbl[i].memfreq_khz = data; >> + pr_debug("Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, >> + tbl[i].memfreq_khz); >> + } >> + *cnt = nf; >> + tbl[i].cpufreq_mhz = 0; > > This looks like a lame version of the OPP table. I didn't know listing multiple frequencies in a opp was allowed. We can probably get away with it here since we just parse the data here and not populate data in the opp core. > >> + >> + return tbl; >> +} >> + >> +static int process_scmi_memlat_of_node(struct scmi_device *sdev, struct scmi_memlat_info *info) >> +{ >> + struct device_node *memlat_np, *memory_np, *monitor_np; >> + struct scmi_memory_info *memory; >> + struct scmi_monitor_info *monitor; >> + int ret = 0, i = 0, j; >> + u32 memfreq[2]; >> + >> + of_node_get(sdev->handle->dev->of_node); >> + memlat_np = of_find_node_by_name(sdev->handle->dev->of_node, "memlat"); >> + >> + info->memory_cnt = of_get_child_count(memlat_np); >> + if (info->memory_cnt <= 0) >> + pr_err("No memory nodes present\n"); >> + >> + for_each_child_of_node(memlat_np, memory_np) { >> + memory = devm_kzalloc(&sdev->dev, sizeof(*memory), GFP_KERNEL); >> + if (!memory) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + >> + ret = of_property_read_u32(memory_np, "reg", &memory->hw_type); >> + if (ret) { >> + pr_err("Failed to read memory type\n"); >> + goto err; >> + } >> + >> + memory->monitor_cnt = of_get_child_count(memory_np); >> + if (memory->monitor_cnt <= 0) { >> + pr_err("No monitor nodes present\n"); >> + ret = -EINVAL; >> + goto err; >> + } >> + >> + ret = of_property_read_u32_array(memory_np, "freq-table-khz", memfreq, 2); >> + if (ret && (ret != -EINVAL)) { >> + pr_err("Failed to read min/max freq %d\n", ret); >> + goto err; >> + } > > foo-min-frequency and foo-max-frequency please. ack > >> + >> + memory->min_freq = memfreq[0]; >> + memory->max_freq = memfreq[1]; >> + info->memory[i] = memory; >> + j = 0; >> + i++; >> + >> + for_each_child_of_node(memory_np, monitor_np) { >> + monitor = devm_kzalloc(&sdev->dev, sizeof(*monitor), GFP_KERNEL); >> + if (!monitor) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + >> + monitor->mon_type = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 1 : 0; >> + monitor->ipm_ceil = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 0 : 20000000; > > Huh? lol, will add more comments. > >> + >> + if (get_mask(monitor_np, &monitor->mask)) { >> + pr_err("Failed to populate cpu mask %d\n", ret); >> + goto err; >> + } >> + >> + monitor->freq_map = init_cpufreq_memfreq_map(&sdev->dev, monitor_np, >> + &monitor->freq_map_len); >> + snprintf(monitor->mon_name, MAX_NAME_LEN, "monitor-%d", j); >> + monitor->mon_idx = j; >> + >> + memory->monitor[j] = monitor; >> + j++; >> + } >> + } >> + >> + return 0; >> + >> +err: >> + of_node_put(memlat_np); >> + >> + return ret; >> +} >> + >> +static int configure_cpucp_common_events(struct scmi_memlat_info *info) >> +{ >> + const struct qcom_scmi_vendor_ops *ops = info->ops; >> + u8 ev_map[NUM_COMMON_EVS]; >> + struct ev_map_msg msg; >> + int ret; >> + >> + memset(ev_map, 0xFF, NUM_COMMON_EVS); >> + >> + msg.num_evs = NUM_COMMON_EVS; >> + msg.hw_type = INVALID_IDX; >> + msg.cid[INST_IDX] = EV_INST_RETIRED; >> + msg.cid[CYC_IDX] = EV_CPU_CYCLES; >> + msg.cid[FE_STALL_IDX] = INVALID_IDX; >> + msg.cid[BE_STALL_IDX] = INVALID_IDX; >> + >> + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_COMMON_EV_MAP, >> + sizeof(msg)); >> + return ret; >> +} >> + >> +static int configure_cpucp_grp(struct scmi_memlat_info *info, int memory_index) >> +{ >> + const struct qcom_scmi_vendor_ops *ops = info->ops; >> + struct scmi_memory_info *memory = info->memory[memory_index]; >> + struct ev_map_msg ev_msg; >> + u8 ev_map[NUM_GRP_EVS]; >> + struct node_msg msg; >> + int ret; >> + >> + msg.cpumask = 0; >> + msg.hw_type = memory->hw_type; >> + msg.mon_type = 0; >> + msg.mon_idx = 0; >> + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MEM_GROUP, sizeof(msg)); >> + if (ret < 0) { >> + pr_err("Failed to configure mem type %d\n", memory->hw_type); >> + return ret; >> + } >> + >> + memset(ev_map, 0xFF, NUM_GRP_EVS); >> + ev_msg.num_evs = NUM_GRP_EVS; >> + ev_msg.hw_type = memory->hw_type; >> + ev_msg.cid[MISS_IDX] = EV_L2_D_RFILL; >> + ev_msg.cid[WB_IDX] = INVALID_IDX; >> + ev_msg.cid[ACC_IDX] = INVALID_IDX; >> + ret = ops->set_param(info->ph, &ev_msg, MEMLAT_ALGO_STR, MEMLAT_SET_GRP_EV_MAP, >> + sizeof(ev_msg)); >> + if (ret < 0) { >> + pr_err("Failed to configure event map for mem type %d\n", memory->hw_type); >> + return ret; >> + } >> + >> + return ret; >> +} >> + >> +static int configure_cpucp_mon(struct scmi_memlat_info *info, int memory_index, int monitor_index) >> +{ >> + const struct qcom_scmi_vendor_ops *ops = info->ops; >> + struct scmi_memory_info *memory = info->memory[memory_index]; >> + struct scmi_monitor_info *monitor = memory->monitor[monitor_index]; >> + struct scalar_param_msg scalar_msg; >> + struct map_param_msg map_msg; >> + struct node_msg msg; >> + int ret; >> + int i; >> + >> + msg.cpumask = monitor->mask; >> + msg.hw_type = memory->hw_type; >> + msg.mon_type = monitor->mon_type; >> + msg.mon_idx = monitor->mon_idx; >> + strscpy(msg.mon_name, monitor->mon_name, sizeof(msg.mon_name)); >> + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MONITOR, sizeof(msg)); >> + if (ret < 0) { >> + pr_err("Failed to configure monitor %s\n", monitor->mon_name); >> + return ret; >> + } >> + >> + scalar_msg.hw_type = memory->hw_type; >> + scalar_msg.mon_idx = monitor->mon_idx; >> + scalar_msg.val = monitor->ipm_ceil; >> + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_IPM_CEIL, >> + sizeof(scalar_msg)); >> + if (ret < 0) { >> + pr_err("Failed to set ipm ceil for %s\n", monitor->mon_name); >> + return ret; >> + } >> + >> + map_msg.hw_type = memory->hw_type; >> + map_msg.mon_idx = monitor->mon_idx; >> + map_msg.nr_rows = monitor->freq_map_len; >> + for (i = 0; i < monitor->freq_map_len; i++) { >> + map_msg.tbl[i].v1 = monitor->freq_map[i].cpufreq_mhz; >> + map_msg.tbl[i].v2 = monitor->freq_map[i].memfreq_khz / 1000; >> + } > > So this table goes 1:1 to the firmware? Is it going to be the same for > all versions of the SoC? If so, it might be better to turn it into the > static data inside the driver. If it doesn't change, there is no need > to put it to DT. The table does go directly to the firmware but obviously varies across SoCs. Also since it's a SCMI client driver we don't have a way to distinguish between SoCs based on compatibles. So it made more sense to move it to the device tree instead. -Sibi > >> + ret = ops->set_param(info->ph, &map_msg, MEMLAT_ALGO_STR, MEMLAT_MON_FREQ_MAP, >> + sizeof(map_msg)); >> + if (ret < 0) { >> + pr_err("Failed to configure freq_map for %s\n", monitor->mon_name); >> + return ret; >> + } >> + >> + scalar_msg.hw_type = memory->hw_type; >> + scalar_msg.mon_idx = monitor->mon_idx; >> + scalar_msg.val = memory->min_freq; >> + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MIN_FREQ, >> + sizeof(scalar_msg)); >> + if (ret < 0) { >> + pr_err("Failed to set min_freq for %s\n", monitor->mon_name); >> + return ret; >> + } >> + >> + scalar_msg.hw_type = memory->hw_type; >> + scalar_msg.mon_idx = monitor->mon_idx; >> + scalar_msg.val = memory->max_freq; >> + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MAX_FREQ, >> + sizeof(scalar_msg)); >> + if (ret < 0) >> + pr_err("Failed to set max_freq for %s\n", monitor->mon_name); >> + >> + return ret; >> +} >> + >> +static int cpucp_memlat_init(struct scmi_device *sdev) >> +{ >> + const struct scmi_handle *handle = sdev->handle; >> + const struct qcom_scmi_vendor_ops *ops; >> + struct scmi_protocol_handle *ph; >> + struct scmi_memlat_info *info; >> + u32 cpucp_sample_ms = 8; >> + int ret, i, j; >> + >> + if (!handle) >> + return -ENODEV; >> + >> + ops = handle->devm_protocol_get(sdev, QCOM_SCMI_VENDOR_PROTOCOL, &ph); >> + if (IS_ERR(ops)) >> + return PTR_ERR(ops); >> + >> + info = devm_kzalloc(&sdev->dev, sizeof(*info), GFP_KERNEL); >> + if (!info) >> + return -ENOMEM; >> + >> + ret = process_scmi_memlat_of_node(sdev, info); >> + if (ret) >> + pr_err("Failed to configure common events: %d\n", ret); >> + >> + info->ph = ph; >> + info->ops = ops; >> + >> + ret = configure_cpucp_common_events(info); >> + if (ret < 0) >> + pr_err("Failed to configure common events: %d\n", ret); >> + >> + for (i = 0; i < info->memory_cnt; i++) { >> + ret = configure_cpucp_grp(info, i); >> + if (ret < 0) >> + pr_err("Failed to configure mem group: %d\n", ret); >> + >> + for (j = 0; j < info->memory[i]->monitor_cnt; j++) { >> + /* Configure per monitor parameters */ >> + ret = configure_cpucp_mon(info, i, j); >> + if (ret < 0) >> + pr_err("Failed to configure monitor: %d\n", ret); >> + } >> + } >> + >> + ret = ops->set_param(ph, &cpucp_sample_ms, MEMLAT_ALGO_STR, MEMLAT_SAMPLE_MS, >> + sizeof(cpucp_sample_ms)); >> + if (ret < 0) >> + pr_err("Failed to set cpucp sample_ms ret = %d\n", ret); >> + >> + /* Start sampling and voting timer */ >> + ret = ops->start_activity(ph, NULL, MEMLAT_ALGO_STR, MEMLAT_START_TIMER, 0); >> + if (ret < 0) >> + pr_err("Error in starting the mem group timer %d\n", ret); >> + >> + dev_set_drvdata(&sdev->dev, info); >> + >> + return ret; >> +} >> + >> +static int scmi_client_probe(struct scmi_device *sdev) >> +{ >> + cpucp_memlat_init(sdev); >> + >> + return 0; >> +} >> + >> +static const struct scmi_device_id scmi_id_table[] = { >> + { .protocol_id = QCOM_SCMI_VENDOR_PROTOCOL, .name = "qcom_scmi_vendor_protocol" }, >> + { }, >> +}; >> +MODULE_DEVICE_TABLE(scmi, scmi_id_table); >> + >> +static struct scmi_driver qcom_scmi_client_drv = { >> + .name = "qcom-scmi-driver", >> + .probe = scmi_client_probe, >> + .id_table = scmi_id_table, >> +}; >> +module_scmi_driver(qcom_scmi_client_drv); >> + >> +MODULE_DESCRIPTION("QTI SCMI client driver"); >> +MODULE_LICENSE("GPL"); >> -- >> 2.34.1 >> >> > >
On 1/18/24 01:58, Konrad Dybcio wrote: > > > On 1/17/24 18:34, Sibi Sankar wrote: >> From: Shivnandan Kumar <quic_kshivnan@quicinc.com> >> >> This patch introduces a client driver that interacts with the SCMI QCOM >> vendor protocol and passes on the required tuneables to start various >> features running on the SCMI controller. >> >> Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> >> Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> >> Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> >> Co-developed-by: Amir Vajid <avajid@quicinc.com> >> Signed-off-by: Amir Vajid <avajid@quicinc.com> >> Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> >> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> >> --- > > [...] > > >> + >> +struct cpufreq_memfreq_map { >> + unsigned int cpufreq_mhz; >> + unsigned int memfreq_khz; >> +}; > > Weird use of tabs will fix it in the next re-spin. > > [...] > >> +static int get_mask(struct device_node *np, u32 *mask) >> +{ >> + struct device_node *dev_phandle; >> + struct device *cpu_dev; >> + int cpu, i = 0; >> + int ret = -ENODEV; > > Don't initialize ret here, return 0 instead of breaking and return > enodev otherwise. ack > >> + >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); >> + while (dev_phandle) { >> + for_each_possible_cpu(cpu) { >> + cpu_dev = get_cpu_device(cpu); >> + if (cpu_dev && cpu_dev->of_node == dev_phandle) { >> + *mask |= BIT(cpu); >> + ret = 0; >> + break; >> + } >> + } > > of_cpu_node_to_id() ack > >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); >> + } >> + >> + return ret; >> +} > > >> + >> +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct >> device *dev, >> + struct device_node *of_node, >> + u32 *cnt) > > I really feel like this is trying to reinvent OPP.. > > if you structure your entries like so: > > opp-0 { > opp-hz = /bits/ 64 <12341234 43214321>; > }; > > you'll be able to use all the fantastic APIs that have been > created over the years! I didn't know listing multiple frequencies in a opp was allowed. We can probably get away with it here since we just parse the data here and not populate data in the opp core. > > [...] > >> + monitor->mon_type = (of_property_read_bool(monitor_np, >> "qcom,compute-mon")) ? 1 : 0; >> + monitor->ipm_ceil = (of_property_read_bool(monitor_np, >> "qcom,compute-mon")) ? 0 : 20000000; > > What does it even mean for a monitor to be a compute mon? > When a monitor is marked compute-mon it means that the table is followed religiously irrespective whether the instruction per miss count threshold (ipm) is exceeded or not. Equivalent to having a cpufreq map -> l3/DDR bw mapping upstream. > There seem to be no dt-bindings for properties referenced in this > driver, neither in the series nor in the dependencies. This is > strictly required. Ack Thanks again for reviewing the series. :) -Sibi > > Konrad
On Mon, 12 Feb 2024 at 12:24, Sibi Sankar <quic_sibis@quicinc.com> wrote: > > > > On 1/18/24 02:11, Dmitry Baryshkov wrote: > > On Wed, 17 Jan 2024 at 19:36, Sibi Sankar <quic_sibis@quicinc.com> wrote: > >> > >> From: Shivnandan Kumar <quic_kshivnan@quicinc.com> > >> > >> This patch introduces a client driver that interacts with the SCMI QCOM > > > > git grep This.patch Documentation/process/ > > > >> vendor protocol and passes on the required tuneables to start various > >> features running on the SCMI controller. > > > > Is there any word about the 'memlat'? No. Unless one reads into the > > patch, one can not come up with the idea of what is being introduced. > > ack, will fix it in the re-spin. > > > > >> > >> Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> > >> Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > >> Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > >> Co-developed-by: Amir Vajid <avajid@quicinc.com> > >> Signed-off-by: Amir Vajid <avajid@quicinc.com> > >> Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> > >> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> > >> --- > >> drivers/soc/qcom/Kconfig | 10 + > >> drivers/soc/qcom/Makefile | 1 + > >> drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > > > > Should it go to drivers/firmware/arm_scmi instead? Or maybe to drivers/devfreq? > > I don't think it should go into arm_scmi unless Sudeep wants it there. > As for drivers/devfreq, I would have moved it there if this driver > benfitted being classified as a devfreq device. We can't use any of > the available governors on it and the tuneables appear way too custom. > These are the reasons why I placed it in drivers/soc/qcom instead. > > > > >> 3 files changed, 497 insertions(+) > >> create mode 100644 drivers/soc/qcom/qcom_scmi_client.c > >> > >> diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig > >> index c6ca4de42586..1530558aebfb 100644 > >> --- a/drivers/soc/qcom/Kconfig > >> +++ b/drivers/soc/qcom/Kconfig > >> @@ -264,6 +264,16 @@ config QCOM_ICC_BWMON > >> the fixed bandwidth votes from cpufreq (CPU nodes) thus achieve high > >> memory throughput even with lower CPU frequencies. > >> > >> +config QCOM_SCMI_CLIENT > >> + tristate "Qualcomm Technologies Inc. SCMI client driver" > >> + depends on QCOM_SCMI_VENDOR_PROTOCOL || COMPILE_TEST > >> + default n > >> + help > >> + SCMI client driver registers for SCMI QCOM vendor protocol. > >> + > >> + This driver interacts with the vendor protocol and passes on the required > >> + tuneables to start various features running on the SCMI controller. > >> + > >> config QCOM_INLINE_CRYPTO_ENGINE > >> tristate > >> select QCOM_SCM > >> diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile > >> index 05b3d54e8dc9..c2a51293c886 100644 > >> --- a/drivers/soc/qcom/Makefile > >> +++ b/drivers/soc/qcom/Makefile > >> @@ -32,5 +32,6 @@ obj-$(CONFIG_QCOM_APR) += apr.o > >> obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o > >> obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o > >> obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o > >> +obj-$(CONFIG_QCOM_SCMI_CLIENT) += qcom_scmi_client.o > >> qcom_ice-objs += ice.o > >> obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o > >> diff --git a/drivers/soc/qcom/qcom_scmi_client.c b/drivers/soc/qcom/qcom_scmi_client.c > >> new file mode 100644 > >> index 000000000000..418aa7900496 > >> --- /dev/null > >> +++ b/drivers/soc/qcom/qcom_scmi_client.c > >> @@ -0,0 +1,486 @@ > >> +// SPDX-License-Identifier: GPL-2.0-only > >> +/* > >> + * Copyright (c) 2024, The Linux Foundation. All rights reserved. > >> + */ > >> + > >> +#include <linux/cpu.h> > >> +#include <linux/err.h> > >> +#include <linux/errno.h> > >> +#include <linux/init.h> > >> +#include <linux/kernel.h> > >> +#include <linux/module.h> > >> +#include <linux/of.h> > >> +#include <linux/platform_device.h> > >> +#include <linux/qcom_scmi_vendor.h> > >> +#include <linux/scmi_protocol.h> > >> + > >> +#define MAX_MEMORY_TYPES 3 > >> +#define MEMLAT_ALGO_STR 0x74616C6D656D /* "memlat" */ > >> +#define INVALID_IDX 0xFF > >> +#define MAX_NAME_LEN 20 > >> +#define MAX_MAP_ENTRIES 6 > >> +#define MAX_MONITOR_CNT 4 > >> +#define SCMI_VENDOR_MSG_START 3 > >> +#define SCMI_VENDOR_MSG_MODULE_START 16 > >> + > >> +enum scmi_memlat_protocol_cmd { > >> + MEMLAT_SET_LOG_LEVEL = SCMI_VENDOR_MSG_START, > >> + MEMLAT_FLUSH_LOGBUF, > >> + MEMLAT_SET_MEM_GROUP = SCMI_VENDOR_MSG_MODULE_START, > >> + MEMLAT_SET_MONITOR, > >> + MEMLAT_SET_COMMON_EV_MAP, > >> + MEMLAT_SET_GRP_EV_MAP, > >> + MEMLAT_ADAPTIVE_LOW_FREQ, > >> + MEMLAT_ADAPTIVE_HIGH_FREQ, > >> + MEMLAT_GET_ADAPTIVE_CUR_FREQ, > >> + MEMLAT_IPM_CEIL, > >> + MEMLAT_FE_STALL_FLOOR, > >> + MEMLAT_BE_STALL_FLOOR, > >> + MEMLAT_WB_PCT, > >> + MEMLAT_IPM_FILTER, > >> + MEMLAT_FREQ_SCALE_PCT, > >> + MEMLAT_FREQ_SCALE_CEIL_MHZ, > >> + MEMLAT_FREQ_SCALE_FLOOR_MHZ, > >> + MEMLAT_SAMPLE_MS, > >> + MEMLAT_MON_FREQ_MAP, > >> + MEMLAT_SET_MIN_FREQ, > >> + MEMLAT_SET_MAX_FREQ, > >> + MEMLAT_GET_CUR_FREQ, > >> + MEMLAT_START_TIMER, > >> + MEMLAT_STOP_TIMER, > >> + MEMLAT_GET_TIMESTAMP, > >> + MEMLAT_MAX_MSG > >> +}; > >> + > >> +struct map_table { > >> + u16 v1; > >> + u16 v2; > >> +}; > >> + > > > > Any documentation for these structures? It won't bite you, but it will > > help reviewers and other developers. > > ack > > > > >> +struct map_param_msg { > >> + u32 hw_type; > >> + u32 mon_idx; > >> + u32 nr_rows; > >> + struct map_table tbl[MAX_MAP_ENTRIES]; > >> +} __packed; > >> + > >> +struct node_msg { > >> + u32 cpumask; > >> + u32 hw_type; > >> + u32 mon_type; > >> + u32 mon_idx; > >> + char mon_name[MAX_NAME_LEN]; > >> +}; > >> + > >> +struct scalar_param_msg { > >> + u32 hw_type; > >> + u32 mon_idx; > >> + u32 val; > >> +}; > >> + > >> +enum common_ev_idx { > >> + INST_IDX, > >> + CYC_IDX, > >> + FE_STALL_IDX, > >> + BE_STALL_IDX, > >> + NUM_COMMON_EVS > >> +}; > >> + > >> +enum grp_ev_idx { > >> + MISS_IDX, > >> + WB_IDX, > >> + ACC_IDX, > >> + NUM_GRP_EVS > >> +}; > >> + > >> +#define EV_CPU_CYCLES 0 > >> +#define EV_INST_RETIRED 2 > >> +#define EV_L2_D_RFILL 5 > >> + > >> +struct ev_map_msg { > >> + u32 num_evs; > >> + u32 hw_type; > >> + u32 cid[NUM_COMMON_EVS]; > >> +}; > >> + > >> +struct cpufreq_memfreq_map { > >> + unsigned int cpufreq_mhz; > >> + unsigned int memfreq_khz; > >> +}; > >> + > >> +struct scmi_monitor_info { > >> + struct cpufreq_memfreq_map *freq_map; > >> + char mon_name[MAX_NAME_LEN]; > >> + u32 mon_idx; > >> + u32 mon_type; > >> + u32 ipm_ceil; > >> + u32 mask; > >> + u32 freq_map_len; > >> +}; > >> + > >> +struct scmi_memory_info { > >> + struct scmi_monitor_info *monitor[MAX_MONITOR_CNT]; > >> + u32 hw_type; > >> + int monitor_cnt; > >> + u32 min_freq; > >> + u32 max_freq; > >> +}; > >> + > >> +struct scmi_memlat_info { > >> + struct scmi_protocol_handle *ph; > >> + const struct qcom_scmi_vendor_ops *ops; > >> + struct scmi_memory_info *memory[MAX_MEMORY_TYPES]; > >> + int memory_cnt; > >> +}; > >> + > >> +static int get_mask(struct device_node *np, u32 *mask) > >> +{ > >> + struct device_node *dev_phandle; > >> + struct device *cpu_dev; > >> + int cpu, i = 0; > >> + int ret = -ENODEV; > >> + > >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); > >> + while (dev_phandle) { > >> + for_each_possible_cpu(cpu) { > >> + cpu_dev = get_cpu_device(cpu); > >> + if (cpu_dev && cpu_dev->of_node == dev_phandle) { > >> + *mask |= BIT(cpu); > >> + ret = 0; > >> + break; > >> + } > >> + } > > > > There is of_cpu_node_to_id(). No need to reinvent it. > > ack > > > > >> + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); > >> + } > >> + > >> + return ret; > >> +} > >> + > >> +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev, > >> + struct device_node *of_node, > >> + u32 *cnt) > >> +{ > >> + int len, nf, i, j; > >> + u32 data; > >> + struct cpufreq_memfreq_map *tbl; > >> + int ret; > >> + > >> + if (!of_find_property(of_node, "qcom,cpufreq-memfreq-tbl", &len)) > >> + return NULL; > >> + len /= sizeof(data); > >> + > >> + if (len % 2 || len == 0) > >> + return NULL; > >> + nf = len / 2; > >> + > >> + tbl = devm_kzalloc(dev, (nf + 1) * sizeof(struct cpufreq_memfreq_map), > >> + GFP_KERNEL); > >> + if (!tbl) > >> + return NULL; > >> + > >> + for (i = 0, j = 0; i < nf; i++, j += 2) { > >> + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", > >> + j, &data); > >> + if (ret < 0) > >> + return NULL; > >> + tbl[i].cpufreq_mhz = data / 1000; > >> + > >> + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", > >> + j + 1, &data); > >> + if (ret < 0) > >> + return NULL; > >> + > >> + tbl[i].memfreq_khz = data; > >> + pr_debug("Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, > >> + tbl[i].memfreq_khz); > >> + } > >> + *cnt = nf; > >> + tbl[i].cpufreq_mhz = 0; > > > > This looks like a lame version of the OPP table. > > I didn't know listing multiple frequencies in a opp was allowed. We can > probably get away with it here since we just parse the data here and not > populate data in the opp core. You are describing driver behaviour. DT describes hardware. So, the proper way to describe this kind of data is the OPP table.
On Mon, Feb 12, 2024 at 03:54:27PM +0530, Sibi Sankar wrote: > > > On 1/18/24 02:11, Dmitry Baryshkov wrote: > > On Wed, 17 Jan 2024 at 19:36, Sibi Sankar <quic_sibis@quicinc.com> wrote: Hi, I'll comment this patch fully, just a remark down below about this mail-thread. > > > > > > From: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > > > > > This patch introduces a client driver that interacts with the SCMI QCOM > > > > git grep This.patch Documentation/process/ > > > > > vendor protocol and passes on the required tuneables to start various > > > features running on the SCMI controller. > > > > Is there any word about the 'memlat'? No. Unless one reads into the > > patch, one can not come up with the idea of what is being introduced. > > ack, will fix it in the re-spin. > > > > > > > > > Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > > Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > > Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > > Co-developed-by: Amir Vajid <avajid@quicinc.com> > > > Signed-off-by: Amir Vajid <avajid@quicinc.com> > > > Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> > > > Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> > > > --- > > > drivers/soc/qcom/Kconfig | 10 + > > > drivers/soc/qcom/Makefile | 1 + > > > drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > > > > Should it go to drivers/firmware/arm_scmi instead? Or maybe to drivers/devfreq? > > I don't think it should go into arm_scmi unless Sudeep wants it there. > As for drivers/devfreq, I would have moved it there if this driver > benfitted being classified as a devfreq device. We can't use any of > the available governors on it and the tuneables appear way too custom. > These are the reasons why I placed it in drivers/soc/qcom instead. > I think we used to host a couple of generic SCMI driver related to standard protocols but they have been moved out of driver/firmware/arm_scmi into the related subsystem...not sure if Sudeep thinks otherwise but I suppose we want to host only SCMI drivers that are clearly lacking a place where to stay... > > > > > 3 files changed, 497 insertions(+) > > > create mode 100644 drivers/soc/qcom/qcom_scmi_client.c [snip] > > > +static int configure_cpucp_mon(struct scmi_memlat_info *info, int memory_index, int monitor_index) > > > +{ > > > + const struct qcom_scmi_vendor_ops *ops = info->ops; > > > + struct scmi_memory_info *memory = info->memory[memory_index]; > > > + struct scmi_monitor_info *monitor = memory->monitor[monitor_index]; > > > + struct scalar_param_msg scalar_msg; > > > + struct map_param_msg map_msg; > > > + struct node_msg msg; > > > + int ret; > > > + int i; > > > + > > > + msg.cpumask = monitor->mask; > > > + msg.hw_type = memory->hw_type; > > > + msg.mon_type = monitor->mon_type; > > > + msg.mon_idx = monitor->mon_idx; > > > + strscpy(msg.mon_name, monitor->mon_name, sizeof(msg.mon_name)); > > > + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MONITOR, sizeof(msg)); > > > + if (ret < 0) { > > > + pr_err("Failed to configure monitor %s\n", monitor->mon_name); > > > + return ret; > > > + } > > > + > > > + scalar_msg.hw_type = memory->hw_type; > > > + scalar_msg.mon_idx = monitor->mon_idx; > > > + scalar_msg.val = monitor->ipm_ceil; > > > + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_IPM_CEIL, > > > + sizeof(scalar_msg)); > > > + if (ret < 0) { > > > + pr_err("Failed to set ipm ceil for %s\n", monitor->mon_name); > > > + return ret; > > > + } > > > + > > > + map_msg.hw_type = memory->hw_type; > > > + map_msg.mon_idx = monitor->mon_idx; > > > + map_msg.nr_rows = monitor->freq_map_len; > > > + for (i = 0; i < monitor->freq_map_len; i++) { > > > + map_msg.tbl[i].v1 = monitor->freq_map[i].cpufreq_mhz; > > > + map_msg.tbl[i].v2 = monitor->freq_map[i].memfreq_khz / 1000; > > > + } > > > > So this table goes 1:1 to the firmware? Is it going to be the same for > > all versions of the SoC? If so, it might be better to turn it into the > > static data inside the driver. If it doesn't change, there is no need > > to put it to DT. > > The table does go directly to the firmware but obviously varies across > SoCs. Also since it's a SCMI client driver we don't have a way to > distinguish between SoCs based on compatibles. So it made more sense to > move it to the device tree instead. > Well, the SCMI fw running the server DOES know where it is running right ? So, if you have multiple fixed config tables to feed into the firmware that vary based on the SoC you are running on, you could add an SCMI command to your QCOM SCMI vendor protocol and expose a related operation in ops to get the actual SoC model, so that you can embed the tableS in the driver here (as suggested) and then choose at runtime which one to use based on the reported platform...this is clearly config stuff (sa said by others) so it just does not belong to DT descriptions. Thanks, Cristian
On Wed, Jan 17, 2024 at 11:04:55PM +0530, Sibi Sankar wrote: > From: Shivnandan Kumar <quic_kshivnan@quicinc.com> Hi > > This patch introduces a client driver that interacts with the SCMI QCOM > vendor protocol and passes on the required tuneables to start various > features running on the SCMI controller. > > Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> > Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > Co-developed-by: Amir Vajid <avajid@quicinc.com> > Signed-off-by: Amir Vajid <avajid@quicinc.com> > Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> > Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> > --- > drivers/soc/qcom/Kconfig | 10 + > drivers/soc/qcom/Makefile | 1 + > drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > 3 files changed, 497 insertions(+) > create mode 100644 drivers/soc/qcom/qcom_scmi_client.c > > diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig > index c6ca4de42586..1530558aebfb 100644 > --- a/drivers/soc/qcom/Kconfig > +++ b/drivers/soc/qcom/Kconfig > @@ -264,6 +264,16 @@ config QCOM_ICC_BWMON > the fixed bandwidth votes from cpufreq (CPU nodes) thus achieve high > memory throughput even with lower CPU frequencies. > > +config QCOM_SCMI_CLIENT > + tristate "Qualcomm Technologies Inc. SCMI client driver" > + depends on QCOM_SCMI_VENDOR_PROTOCOL || COMPILE_TEST > + default n > + help > + SCMI client driver registers for SCMI QCOM vendor protocol. > + > + This driver interacts with the vendor protocol and passes on the required > + tuneables to start various features running on the SCMI controller. > + > config QCOM_INLINE_CRYPTO_ENGINE > tristate > select QCOM_SCM > diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile > index 05b3d54e8dc9..c2a51293c886 100644 > --- a/drivers/soc/qcom/Makefile > +++ b/drivers/soc/qcom/Makefile > @@ -32,5 +32,6 @@ obj-$(CONFIG_QCOM_APR) += apr.o > obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o > obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o > obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o > +obj-$(CONFIG_QCOM_SCMI_CLIENT) += qcom_scmi_client.o > qcom_ice-objs += ice.o > obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o > diff --git a/drivers/soc/qcom/qcom_scmi_client.c b/drivers/soc/qcom/qcom_scmi_client.c > new file mode 100644 > index 000000000000..418aa7900496 > --- /dev/null > +++ b/drivers/soc/qcom/qcom_scmi_client.c > @@ -0,0 +1,486 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2024, The Linux Foundation. All rights reserved. > + */ > + > +#include <linux/cpu.h> > +#include <linux/err.h> > +#include <linux/errno.h> > +#include <linux/init.h> > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/of.h> > +#include <linux/platform_device.h> > +#include <linux/qcom_scmi_vendor.h> > +#include <linux/scmi_protocol.h> > + > +#define MAX_MEMORY_TYPES 3 > +#define MEMLAT_ALGO_STR 0x74616C6D656D /* "memlat" */ > +#define INVALID_IDX 0xFF > +#define MAX_NAME_LEN 20 > +#define MAX_MAP_ENTRIES 6 > +#define MAX_MONITOR_CNT 4 > +#define SCMI_VENDOR_MSG_START 3 > +#define SCMI_VENDOR_MSG_MODULE_START 16 > + > +enum scmi_memlat_protocol_cmd { > + MEMLAT_SET_LOG_LEVEL = SCMI_VENDOR_MSG_START, > + MEMLAT_FLUSH_LOGBUF, > + MEMLAT_SET_MEM_GROUP = SCMI_VENDOR_MSG_MODULE_START, > + MEMLAT_SET_MONITOR, > + MEMLAT_SET_COMMON_EV_MAP, > + MEMLAT_SET_GRP_EV_MAP, > + MEMLAT_ADAPTIVE_LOW_FREQ, > + MEMLAT_ADAPTIVE_HIGH_FREQ, > + MEMLAT_GET_ADAPTIVE_CUR_FREQ, > + MEMLAT_IPM_CEIL, > + MEMLAT_FE_STALL_FLOOR, > + MEMLAT_BE_STALL_FLOOR, > + MEMLAT_WB_PCT, > + MEMLAT_IPM_FILTER, > + MEMLAT_FREQ_SCALE_PCT, > + MEMLAT_FREQ_SCALE_CEIL_MHZ, > + MEMLAT_FREQ_SCALE_FLOOR_MHZ, > + MEMLAT_SAMPLE_MS, > + MEMLAT_MON_FREQ_MAP, > + MEMLAT_SET_MIN_FREQ, > + MEMLAT_SET_MAX_FREQ, > + MEMLAT_GET_CUR_FREQ, > + MEMLAT_START_TIMER, > + MEMLAT_STOP_TIMER, > + MEMLAT_GET_TIMESTAMP, > + MEMLAT_MAX_MSG > +}; So the reason why you can have just a single qualcomm vendor SCMI protocol is that it really implements and expose just a couple of set/get generic commands and exposes a few related ops so that you can piggyback any kind of real messages into this new sort of transport layer and at the end the full final message payloads are built here in the client driver...and any future further QC SCMI client driver will implement its own set of payloads for different protocols. Seems a bit odd to me (but certainly a creative way to abuse the SCMI stack), anyway I have personally nothing against it, if you are happy with this design, but the spec says that the protocol messages have to be little endian-encoded so I suppose that you should, down below, define your smuggled (:P) payloads with __le32 & friends and use the proper helpers to be sure that the values tranferred are properly interpreted, from the endianess point-of-view, on both sides of the channel. ...but Sudeep could think differently...I would wait for his feedback... > + > +struct map_table { > + u16 v1; > + u16 v2; > +}; > + > +struct map_param_msg { > + u32 hw_type; > + u32 mon_idx; > + u32 nr_rows; > + struct map_table tbl[MAX_MAP_ENTRIES]; > +} __packed; > + > +struct node_msg { > + u32 cpumask; > + u32 hw_type; > + u32 mon_type; > + u32 mon_idx; > + char mon_name[MAX_NAME_LEN]; > +}; > + > +struct scalar_param_msg { > + u32 hw_type; > + u32 mon_idx; > + u32 val; > +}; > + > +enum common_ev_idx { > + INST_IDX, > + CYC_IDX, > + FE_STALL_IDX, > + BE_STALL_IDX, > + NUM_COMMON_EVS > +}; > + > +enum grp_ev_idx { > + MISS_IDX, > + WB_IDX, > + ACC_IDX, > + NUM_GRP_EVS > +}; > + > +#define EV_CPU_CYCLES 0 > +#define EV_INST_RETIRED 2 > +#define EV_L2_D_RFILL 5 > + > +struct ev_map_msg { > + u32 num_evs; > + u32 hw_type; > + u32 cid[NUM_COMMON_EVS]; > +}; > + > +struct cpufreq_memfreq_map { > + unsigned int cpufreq_mhz; > + unsigned int memfreq_khz; > +}; > + > +struct scmi_monitor_info { > + struct cpufreq_memfreq_map *freq_map; > + char mon_name[MAX_NAME_LEN]; > + u32 mon_idx; > + u32 mon_type; > + u32 ipm_ceil; > + u32 mask; > + u32 freq_map_len; > +}; > + > +struct scmi_memory_info { > + struct scmi_monitor_info *monitor[MAX_MONITOR_CNT]; > + u32 hw_type; > + int monitor_cnt; > + u32 min_freq; > + u32 max_freq; > +}; > + > +struct scmi_memlat_info { > + struct scmi_protocol_handle *ph; > + const struct qcom_scmi_vendor_ops *ops; > + struct scmi_memory_info *memory[MAX_MEMORY_TYPES]; > + int memory_cnt; > +}; > + > +static int get_mask(struct device_node *np, u32 *mask) > +{ > + struct device_node *dev_phandle; > + struct device *cpu_dev; > + int cpu, i = 0; > + int ret = -ENODEV; > + > + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); > + while (dev_phandle) { > + for_each_possible_cpu(cpu) { > + cpu_dev = get_cpu_device(cpu); > + if (cpu_dev && cpu_dev->of_node == dev_phandle) { > + *mask |= BIT(cpu); > + ret = 0; > + break; > + } > + } > + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); > + } > + > + return ret; > +} > + > +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev, > + struct device_node *of_node, > + u32 *cnt) > +{ > + int len, nf, i, j; > + u32 data; > + struct cpufreq_memfreq_map *tbl; > + int ret; > + > + if (!of_find_property(of_node, "qcom,cpufreq-memfreq-tbl", &len)) > + return NULL; > + len /= sizeof(data); > + > + if (len % 2 || len == 0) > + return NULL; > + nf = len / 2; > + > + tbl = devm_kzalloc(dev, (nf + 1) * sizeof(struct cpufreq_memfreq_map), > + GFP_KERNEL); > + if (!tbl) > + return NULL; > + > + for (i = 0, j = 0; i < nf; i++, j += 2) { > + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", > + j, &data); > + if (ret < 0) > + return NULL; Bailing out here and down below you are not releasing the devm_ allocated memory AND what is worst is that the caller of this function does not check either for a NULL return value (see below) > + tbl[i].cpufreq_mhz = data / 1000; > + > + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", > + j + 1, &data); > + if (ret < 0) > + return NULL; > + Ditto. > + tbl[i].memfreq_khz = data; > + pr_debug("Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, > + tbl[i].memfreq_khz); > + } > + *cnt = nf; > + tbl[i].cpufreq_mhz = 0; > + > + return tbl; > +} > + > +static int process_scmi_memlat_of_node(struct scmi_device *sdev, struct scmi_memlat_info *info) > +{ > + struct device_node *memlat_np, *memory_np, *monitor_np; > + struct scmi_memory_info *memory; > + struct scmi_monitor_info *monitor; > + int ret = 0, i = 0, j; > + u32 memfreq[2]; > + > + of_node_get(sdev->handle->dev->of_node); > + memlat_np = of_find_node_by_name(sdev->handle->dev->of_node, "memlat"); > + > + info->memory_cnt = of_get_child_count(memlat_np); > + if (info->memory_cnt <= 0) > + pr_err("No memory nodes present\n"); > + > + for_each_child_of_node(memlat_np, memory_np) { > + memory = devm_kzalloc(&sdev->dev, sizeof(*memory), GFP_KERNEL); > + if (!memory) { > + ret = -ENOMEM; > + goto err; > + } > + > + ret = of_property_read_u32(memory_np, "reg", &memory->hw_type); > + if (ret) { No devm_free see below. > + pr_err("Failed to read memory type\n"); > + goto err; > + } > + > + memory->monitor_cnt = of_get_child_count(memory_np); > + if (memory->monitor_cnt <= 0) { > + pr_err("No monitor nodes present\n"); > + ret = -EINVAL; No devm_free see below. > + goto err; > + } > + > + ret = of_property_read_u32_array(memory_np, "freq-table-khz", memfreq, 2); > + if (ret && (ret != -EINVAL)) { > + pr_err("Failed to read min/max freq %d\n", ret); No devm_free see below. > + goto err; > + } > + > + memory->min_freq = memfreq[0]; > + memory->max_freq = memfreq[1]; > + info->memory[i] = memory; > + j = 0; > + i++; > + > + for_each_child_of_node(memory_np, monitor_np) { > + monitor = devm_kzalloc(&sdev->dev, sizeof(*monitor), GFP_KERNEL); > + if (!monitor) { > + ret = -ENOMEM; No devm_free see below. > + goto err; > + } > + > + monitor->mon_type = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 1 : 0; > + monitor->ipm_ceil = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 0 : 20000000; > + > + if (get_mask(monitor_np, &monitor->mask)) { > + pr_err("Failed to populate cpu mask %d\n", ret); No devm_free see below. > + goto err; > + } > + > + monitor->freq_map = init_cpufreq_memfreq_map(&sdev->dev, monitor_np, > + &monitor->freq_map_len); Return can be NULL here and it is not checked, but seems to me that is then accessed without any NULL-check.... > + snprintf(monitor->mon_name, MAX_NAME_LEN, "monitor-%d", j); > + monitor->mon_idx = j; > + > + memory->monitor[j] = monitor; > + j++; > + } > + } > + > + return 0; > + > +err: > + of_node_put(memlat_np); > + Here you bailout with an -ENOMEM or other errors but without releasing devm_ allocated stuff AND in the caller of this you carry on despite any error and you neither did release this memory in the caller, so this unused mem-areas will be eventually freed only on driver removal. > + return ret; > +} > + > +static int configure_cpucp_common_events(struct scmi_memlat_info *info) > +{ > + const struct qcom_scmi_vendor_ops *ops = info->ops; > + u8 ev_map[NUM_COMMON_EVS]; > + struct ev_map_msg msg; > + int ret; > + > + memset(ev_map, 0xFF, NUM_COMMON_EVS); > + > + msg.num_evs = NUM_COMMON_EVS; > + msg.hw_type = INVALID_IDX; > + msg.cid[INST_IDX] = EV_INST_RETIRED; > + msg.cid[CYC_IDX] = EV_CPU_CYCLES; > + msg.cid[FE_STALL_IDX] = INVALID_IDX; > + msg.cid[BE_STALL_IDX] = INVALID_IDX; > + > + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_COMMON_EV_MAP, > + sizeof(msg)); > + return ret; > +} > + > +static int configure_cpucp_grp(struct scmi_memlat_info *info, int memory_index) > +{ > + const struct qcom_scmi_vendor_ops *ops = info->ops; > + struct scmi_memory_info *memory = info->memory[memory_index]; > + struct ev_map_msg ev_msg; > + u8 ev_map[NUM_GRP_EVS]; > + struct node_msg msg; > + int ret; > + > + msg.cpumask = 0; > + msg.hw_type = memory->hw_type; > + msg.mon_type = 0; > + msg.mon_idx = 0; > + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MEM_GROUP, sizeof(msg)); > + if (ret < 0) { > + pr_err("Failed to configure mem type %d\n", memory->hw_type); > + return ret; > + } > + > + memset(ev_map, 0xFF, NUM_GRP_EVS); > + ev_msg.num_evs = NUM_GRP_EVS; > + ev_msg.hw_type = memory->hw_type; > + ev_msg.cid[MISS_IDX] = EV_L2_D_RFILL; > + ev_msg.cid[WB_IDX] = INVALID_IDX; > + ev_msg.cid[ACC_IDX] = INVALID_IDX; > + ret = ops->set_param(info->ph, &ev_msg, MEMLAT_ALGO_STR, MEMLAT_SET_GRP_EV_MAP, > + sizeof(ev_msg)); > + if (ret < 0) { > + pr_err("Failed to configure event map for mem type %d\n", memory->hw_type); > + return ret; > + } > + > + return ret; > +} > + > +static int configure_cpucp_mon(struct scmi_memlat_info *info, int memory_index, int monitor_index) > +{ > + const struct qcom_scmi_vendor_ops *ops = info->ops; > + struct scmi_memory_info *memory = info->memory[memory_index]; > + struct scmi_monitor_info *monitor = memory->monitor[monitor_index]; > + struct scalar_param_msg scalar_msg; > + struct map_param_msg map_msg; > + struct node_msg msg; > + int ret; > + int i; > + > + msg.cpumask = monitor->mask; > + msg.hw_type = memory->hw_type; > + msg.mon_type = monitor->mon_type; > + msg.mon_idx = monitor->mon_idx; > + strscpy(msg.mon_name, monitor->mon_name, sizeof(msg.mon_name)); > + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MONITOR, sizeof(msg)); > + if (ret < 0) { > + pr_err("Failed to configure monitor %s\n", monitor->mon_name); > + return ret; > + } > + > + scalar_msg.hw_type = memory->hw_type; > + scalar_msg.mon_idx = monitor->mon_idx; > + scalar_msg.val = monitor->ipm_ceil; > + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_IPM_CEIL, > + sizeof(scalar_msg)); > + if (ret < 0) { > + pr_err("Failed to set ipm ceil for %s\n", monitor->mon_name); > + return ret; > + } > + > + map_msg.hw_type = memory->hw_type; > + map_msg.mon_idx = monitor->mon_idx; > + map_msg.nr_rows = monitor->freq_map_len; > + for (i = 0; i < monitor->freq_map_len; i++) { > + map_msg.tbl[i].v1 = monitor->freq_map[i].cpufreq_mhz; > + map_msg.tbl[i].v2 = monitor->freq_map[i].memfreq_khz / 1000; > + } > + ret = ops->set_param(info->ph, &map_msg, MEMLAT_ALGO_STR, MEMLAT_MON_FREQ_MAP, > + sizeof(map_msg)); > + if (ret < 0) { > + pr_err("Failed to configure freq_map for %s\n", monitor->mon_name); > + return ret; > + } > + > + scalar_msg.hw_type = memory->hw_type; > + scalar_msg.mon_idx = monitor->mon_idx; > + scalar_msg.val = memory->min_freq; > + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MIN_FREQ, > + sizeof(scalar_msg)); > + if (ret < 0) { > + pr_err("Failed to set min_freq for %s\n", monitor->mon_name); > + return ret; > + } > + > + scalar_msg.hw_type = memory->hw_type; > + scalar_msg.mon_idx = monitor->mon_idx; > + scalar_msg.val = memory->max_freq; > + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MAX_FREQ, > + sizeof(scalar_msg)); > + if (ret < 0) > + pr_err("Failed to set max_freq for %s\n", monitor->mon_name); > + > + return ret; > +} > + > +static int cpucp_memlat_init(struct scmi_device *sdev) > +{ > + const struct scmi_handle *handle = sdev->handle; > + const struct qcom_scmi_vendor_ops *ops; > + struct scmi_protocol_handle *ph; > + struct scmi_memlat_info *info; > + u32 cpucp_sample_ms = 8; > + int ret, i, j; > + > + if (!handle) > + return -ENODEV; > + > + ops = handle->devm_protocol_get(sdev, QCOM_SCMI_VENDOR_PROTOCOL, &ph); > + if (IS_ERR(ops)) > + return PTR_ERR(ops); > + > + info = devm_kzalloc(&sdev->dev, sizeof(*info), GFP_KERNEL); > + if (!info) > + return -ENOMEM; > + > + ret = process_scmi_memlat_of_node(sdev, info); > + if (ret) > + pr_err("Failed to configure common events: %d\n", ret); Carry on without cleaning up stuff allocated by process_scmi_memlat_of_node() Thanks, Cristian
On Mon, Feb 12, 2024 at 03:54:27PM +0530, Sibi Sankar wrote: > > > On 1/18/24 02:11, Dmitry Baryshkov wrote: > > On Wed, 17 Jan 2024 at 19:36, Sibi Sankar <quic_sibis@quicinc.com> wrote: > > > > > > From: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > > > > > This patch introduces a client driver that interacts with the SCMI QCOM > > > > git grep This.patch Documentation/process/ > > > > > vendor protocol and passes on the required tuneables to start various > > > features running on the SCMI controller. > > > > Is there any word about the 'memlat'? No. Unless one reads into the > > patch, one can not come up with the idea of what is being introduced. > > ack, will fix it in the re-spin. > > > > > > > > > Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > > Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > > Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > > Co-developed-by: Amir Vajid <avajid@quicinc.com> > > > Signed-off-by: Amir Vajid <avajid@quicinc.com> > > > Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> > > > Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> > > > --- > > > drivers/soc/qcom/Kconfig | 10 + > > > drivers/soc/qcom/Makefile | 1 + > > > drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > > > > Should it go to drivers/firmware/arm_scmi instead? Or maybe to drivers/devfreq? > > I don't think it should go into arm_scmi unless Sudeep wants it there. I won't comment or worry about those silly details yet. I would like to understand the design better first and all these can be sorted when we get closer to getting this merged.
On Tue, Feb 20, 2024 at 04:19:02PM +0000, Cristian Marussi wrote: > On Wed, Jan 17, 2024 at 11:04:55PM +0530, Sibi Sankar wrote: > > From: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > > > This patch introduces a client driver that interacts with the SCMI QCOM > > vendor protocol and passes on the required tuneables to start various > > features running on the SCMI controller. > > > > Signed-off-by: Shivnandan Kumar <quic_kshivnan@quicinc.com> > > Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@quicinc.com> > > Co-developed-by: Amir Vajid <avajid@quicinc.com> > > Signed-off-by: Amir Vajid <avajid@quicinc.com> > > Co-developed-by: Sibi Sankar <quic_sibis@quicinc.com> > > Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com> > > --- > > drivers/soc/qcom/Kconfig | 10 + > > drivers/soc/qcom/Makefile | 1 + > > drivers/soc/qcom/qcom_scmi_client.c | 486 ++++++++++++++++++++++++++++ > > 3 files changed, 497 insertions(+) > > create mode 100644 drivers/soc/qcom/qcom_scmi_client.c > > > > diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig > > index c6ca4de42586..1530558aebfb 100644 > > --- a/drivers/soc/qcom/Kconfig > > +++ b/drivers/soc/qcom/Kconfig > > @@ -264,6 +264,16 @@ config QCOM_ICC_BWMON > > the fixed bandwidth votes from cpufreq (CPU nodes) thus achieve high > > memory throughput even with lower CPU frequencies. > > > > +config QCOM_SCMI_CLIENT > > + tristate "Qualcomm Technologies Inc. SCMI client driver" > > + depends on QCOM_SCMI_VENDOR_PROTOCOL || COMPILE_TEST > > + default n > > + help > > + SCMI client driver registers for SCMI QCOM vendor protocol. > > + > > + This driver interacts with the vendor protocol and passes on the required > > + tuneables to start various features running on the SCMI controller. > > + > > config QCOM_INLINE_CRYPTO_ENGINE > > tristate > > select QCOM_SCM > > diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile > > index 05b3d54e8dc9..c2a51293c886 100644 > > --- a/drivers/soc/qcom/Makefile > > +++ b/drivers/soc/qcom/Makefile > > @@ -32,5 +32,6 @@ obj-$(CONFIG_QCOM_APR) += apr.o > > obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o > > obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o > > obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o > > +obj-$(CONFIG_QCOM_SCMI_CLIENT) += qcom_scmi_client.o > > qcom_ice-objs += ice.o > > obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o > > diff --git a/drivers/soc/qcom/qcom_scmi_client.c b/drivers/soc/qcom/qcom_scmi_client.c > > new file mode 100644 > > index 000000000000..418aa7900496 > > --- /dev/null > > +++ b/drivers/soc/qcom/qcom_scmi_client.c > > @@ -0,0 +1,486 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (c) 2024, The Linux Foundation. All rights reserved. > > + */ > > + > > +#include <linux/cpu.h> > > +#include <linux/err.h> > > +#include <linux/errno.h> > > +#include <linux/init.h> > > +#include <linux/kernel.h> > > +#include <linux/module.h> > > +#include <linux/of.h> > > +#include <linux/platform_device.h> > > +#include <linux/qcom_scmi_vendor.h> > > +#include <linux/scmi_protocol.h> > > + > > +#define MAX_MEMORY_TYPES 3 > > +#define MEMLAT_ALGO_STR 0x74616C6D656D /* "memlat" */ > > +#define INVALID_IDX 0xFF > > +#define MAX_NAME_LEN 20 > > +#define MAX_MAP_ENTRIES 6 > > +#define MAX_MONITOR_CNT 4 > > +#define SCMI_VENDOR_MSG_START 3 > > +#define SCMI_VENDOR_MSG_MODULE_START 16 > > + > > +enum scmi_memlat_protocol_cmd { > > + MEMLAT_SET_LOG_LEVEL = SCMI_VENDOR_MSG_START, > > + MEMLAT_FLUSH_LOGBUF, > > + MEMLAT_SET_MEM_GROUP = SCMI_VENDOR_MSG_MODULE_START, > > + MEMLAT_SET_MONITOR, > > + MEMLAT_SET_COMMON_EV_MAP, > > + MEMLAT_SET_GRP_EV_MAP, > > + MEMLAT_ADAPTIVE_LOW_FREQ, > > + MEMLAT_ADAPTIVE_HIGH_FREQ, > > + MEMLAT_GET_ADAPTIVE_CUR_FREQ, > > + MEMLAT_IPM_CEIL, > > + MEMLAT_FE_STALL_FLOOR, > > + MEMLAT_BE_STALL_FLOOR, > > + MEMLAT_WB_PCT, > > + MEMLAT_IPM_FILTER, > > + MEMLAT_FREQ_SCALE_PCT, > > + MEMLAT_FREQ_SCALE_CEIL_MHZ, > > + MEMLAT_FREQ_SCALE_FLOOR_MHZ, > > + MEMLAT_SAMPLE_MS, > > + MEMLAT_MON_FREQ_MAP, > > + MEMLAT_SET_MIN_FREQ, > > + MEMLAT_SET_MAX_FREQ, > > + MEMLAT_GET_CUR_FREQ, > > + MEMLAT_START_TIMER, > > + MEMLAT_STOP_TIMER, > > + MEMLAT_GET_TIMESTAMP, > > + MEMLAT_MAX_MSG > > +}; > > So the reason why you can have just a single qualcomm vendor SCMI protocol is > that it really implements and expose just a couple of set/get generic commands > and exposes a few related ops so that you can piggyback any kind of real messages > into this new sort of transport layer and at the end the full final message payloads > are built here in the client driver...and any future further QC SCMI client driver > will implement its own set of payloads for different protocols. > > Seems a bit odd to me (but certainly a creative way to abuse the SCMI stack), anyway > I have personally nothing against it, if you are happy with this design, but the spec > says that the protocol messages have to be little endian-encoded so I suppose that you > should, down below, define your smuggled (:P) payloads with __le32 & friends and use > the proper helpers to be sure that the values tranferred are properly interpreted, from > the endianess point-of-view, on both sides of the channel. > > ...but Sudeep could think differently...I would wait for his feedback... No not really. I agree. I have more fundamental question which I expected to find some reasoning somewhere. It could be me not having followed the patches properly, but 1. Why this bus/memlat dvfs can't be done using perf protocol ? 2. What std perf protocol lacks that is needed here ? 3. Why it needs to be vendor specific ? 4. If so, why MEMLAT alone doesn't deserve a separate vendor protocol ? IOW why is vendor protocol is implemented as more generic one for all qcom needs(I am assuming here) and MEMLAT is just command extension of that protocol(as Cristian said protocol on top of the generic qcom vendor protocol) These are few details that helps to understand and review the patches. So please add as much as documentation. If it is too much, we can remove it (but I am sure we may not reach that point).
On 2/12/24 11:33, Sibi Sankar wrote: [...] >> >>> + monitor->mon_type = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 1 : 0; >>> + monitor->ipm_ceil = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 0 : 20000000; >> >> What does it even mean for a monitor to be a compute mon? >> > > When a monitor is marked compute-mon it means that the table is > followed religiously irrespective whether the instruction per miss > count threshold (ipm) is exceeded or not. Equivalent to having > a cpufreq map -> l3/DDR bw mapping upstream. I'm sorta puzzled why the OS would even be required to program this, since L3/DDR/CPU frequencies are known by various stages of boot and secure firmware too. What happens if we omit this? Is the default configuration identical to this? Or does it need explicit enabling? Konrad
On 6/19/24 01:07, Konrad Dybcio wrote: > > > On 2/12/24 11:33, Sibi Sankar wrote: > > [...] > > >>> >>>> + monitor->mon_type = (of_property_read_bool(monitor_np, >>>> "qcom,compute-mon")) ? 1 : 0; >>>> + monitor->ipm_ceil = (of_property_read_bool(monitor_np, >>>> "qcom,compute-mon")) ? 0 : 20000000; >>> >>> What does it even mean for a monitor to be a compute mon? >>> >> >> When a monitor is marked compute-mon it means that the table is >> followed religiously irrespective whether the instruction per miss >> count threshold (ipm) is exceeded or not. Equivalent to having >> a cpufreq map -> l3/DDR bw mapping upstream. > > I'm sorta puzzled why the OS would even be required to program this, since > L3/DDR/CPU frequencies are known by various stages of boot and secure > firmware > too. > > What happens if we omit this? Is the default configuration identical to > this? > Or does it need explicit enabling? CPUCP isn't expected to know the various ranges supported by the memory buses it can vote on and from a sandboxing perspective one would want to control what CPUCP has access to as well. It also can't arrive at the exact values just from the OPP tables we pass on as well. So it doesn't have any default values to start off with. For all these reasons, they need explicit setting up and without it, the algorithm wouldn't function as expected. -Sibi > > Konrad
On 1.07.2024 10:44 AM, Sibi Sankar wrote: > > > On 6/19/24 01:07, Konrad Dybcio wrote: >> >> >> On 2/12/24 11:33, Sibi Sankar wrote: >> >> [...] >> >> >>>> >>>>> + monitor->mon_type = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 1 : 0; BTW: the ternary operator here is unnecessary, but to make it readable, please make an enum / #define describing the two, as magic values are discouraged >>>>> + monitor->ipm_ceil = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 0 : 20000000; Given that you check the same condition here, an if-else block may be more readable, perhaps some comment like: ipm_ceil = 0; /* Always keep a vote, no matter the bus traffic */ >>>> >>>> What does it even mean for a monitor to be a compute mon? >>>> >>> >>> When a monitor is marked compute-mon it means that the table is >>> followed religiously irrespective whether the instruction per miss >>> count threshold (ipm) is exceeded or not. Equivalent to having >>> a cpufreq map -> l3/DDR bw mapping upstream. I.. don't really like that this exists as something that requires OS intervention, but since it does, I suppose it takes a couple lines of code less than adding OPP entries for each and every PSTATE and NUM_SKUs.. >> >> I'm sorta puzzled why the OS would even be required to program this, since >> L3/DDR/CPU frequencies are known by various stages of boot and secure firmware >> too. >> >> What happens if we omit this? Is the default configuration identical to this? >> Or does it need explicit enabling? > > CPUCP isn't expected to know the various ranges supported by the memory > buses it can vote on and from a sandboxing perspective one would want to > control what CPUCP has access to as well. It also can't arrive at the > exact values just from the OPP tables we pass on as well. So it doesn't > have any default values to start off with. For all these reasons, they > need explicit setting up and without it, the algorithm wouldn't function > as expected. Ok, I was thinking more of a scenario where XBL/GH would take care of this.. Throwing in my 5 cents, this could perhaps be moved there in future FW designs (the earlier in the chain the better, especially to keep kicking out gunyah a viable option), as I don't think Linux is the greatest place for storing one-shot configuration data, especially for blocks that already run their own firmware.. I would imagine this could speed up booting as well, if DRAM was appropriately scaled during the boot splash stage (unless it already is either scaled or pinned to FMAX) Konrad
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index c6ca4de42586..1530558aebfb 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -264,6 +264,16 @@ config QCOM_ICC_BWMON the fixed bandwidth votes from cpufreq (CPU nodes) thus achieve high memory throughput even with lower CPU frequencies. +config QCOM_SCMI_CLIENT + tristate "Qualcomm Technologies Inc. SCMI client driver" + depends on QCOM_SCMI_VENDOR_PROTOCOL || COMPILE_TEST + default n + help + SCMI client driver registers for SCMI QCOM vendor protocol. + + This driver interacts with the vendor protocol and passes on the required + tuneables to start various features running on the SCMI controller. + config QCOM_INLINE_CRYPTO_ENGINE tristate select QCOM_SCM diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 05b3d54e8dc9..c2a51293c886 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_QCOM_APR) += apr.o obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o +obj-$(CONFIG_QCOM_SCMI_CLIENT) += qcom_scmi_client.o qcom_ice-objs += ice.o obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o diff --git a/drivers/soc/qcom/qcom_scmi_client.c b/drivers/soc/qcom/qcom_scmi_client.c new file mode 100644 index 000000000000..418aa7900496 --- /dev/null +++ b/drivers/soc/qcom/qcom_scmi_client.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ + +#include <linux/cpu.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/qcom_scmi_vendor.h> +#include <linux/scmi_protocol.h> + +#define MAX_MEMORY_TYPES 3 +#define MEMLAT_ALGO_STR 0x74616C6D656D /* "memlat" */ +#define INVALID_IDX 0xFF +#define MAX_NAME_LEN 20 +#define MAX_MAP_ENTRIES 6 +#define MAX_MONITOR_CNT 4 +#define SCMI_VENDOR_MSG_START 3 +#define SCMI_VENDOR_MSG_MODULE_START 16 + +enum scmi_memlat_protocol_cmd { + MEMLAT_SET_LOG_LEVEL = SCMI_VENDOR_MSG_START, + MEMLAT_FLUSH_LOGBUF, + MEMLAT_SET_MEM_GROUP = SCMI_VENDOR_MSG_MODULE_START, + MEMLAT_SET_MONITOR, + MEMLAT_SET_COMMON_EV_MAP, + MEMLAT_SET_GRP_EV_MAP, + MEMLAT_ADAPTIVE_LOW_FREQ, + MEMLAT_ADAPTIVE_HIGH_FREQ, + MEMLAT_GET_ADAPTIVE_CUR_FREQ, + MEMLAT_IPM_CEIL, + MEMLAT_FE_STALL_FLOOR, + MEMLAT_BE_STALL_FLOOR, + MEMLAT_WB_PCT, + MEMLAT_IPM_FILTER, + MEMLAT_FREQ_SCALE_PCT, + MEMLAT_FREQ_SCALE_CEIL_MHZ, + MEMLAT_FREQ_SCALE_FLOOR_MHZ, + MEMLAT_SAMPLE_MS, + MEMLAT_MON_FREQ_MAP, + MEMLAT_SET_MIN_FREQ, + MEMLAT_SET_MAX_FREQ, + MEMLAT_GET_CUR_FREQ, + MEMLAT_START_TIMER, + MEMLAT_STOP_TIMER, + MEMLAT_GET_TIMESTAMP, + MEMLAT_MAX_MSG +}; + +struct map_table { + u16 v1; + u16 v2; +}; + +struct map_param_msg { + u32 hw_type; + u32 mon_idx; + u32 nr_rows; + struct map_table tbl[MAX_MAP_ENTRIES]; +} __packed; + +struct node_msg { + u32 cpumask; + u32 hw_type; + u32 mon_type; + u32 mon_idx; + char mon_name[MAX_NAME_LEN]; +}; + +struct scalar_param_msg { + u32 hw_type; + u32 mon_idx; + u32 val; +}; + +enum common_ev_idx { + INST_IDX, + CYC_IDX, + FE_STALL_IDX, + BE_STALL_IDX, + NUM_COMMON_EVS +}; + +enum grp_ev_idx { + MISS_IDX, + WB_IDX, + ACC_IDX, + NUM_GRP_EVS +}; + +#define EV_CPU_CYCLES 0 +#define EV_INST_RETIRED 2 +#define EV_L2_D_RFILL 5 + +struct ev_map_msg { + u32 num_evs; + u32 hw_type; + u32 cid[NUM_COMMON_EVS]; +}; + +struct cpufreq_memfreq_map { + unsigned int cpufreq_mhz; + unsigned int memfreq_khz; +}; + +struct scmi_monitor_info { + struct cpufreq_memfreq_map *freq_map; + char mon_name[MAX_NAME_LEN]; + u32 mon_idx; + u32 mon_type; + u32 ipm_ceil; + u32 mask; + u32 freq_map_len; +}; + +struct scmi_memory_info { + struct scmi_monitor_info *monitor[MAX_MONITOR_CNT]; + u32 hw_type; + int monitor_cnt; + u32 min_freq; + u32 max_freq; +}; + +struct scmi_memlat_info { + struct scmi_protocol_handle *ph; + const struct qcom_scmi_vendor_ops *ops; + struct scmi_memory_info *memory[MAX_MEMORY_TYPES]; + int memory_cnt; +}; + +static int get_mask(struct device_node *np, u32 *mask) +{ + struct device_node *dev_phandle; + struct device *cpu_dev; + int cpu, i = 0; + int ret = -ENODEV; + + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); + while (dev_phandle) { + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (cpu_dev && cpu_dev->of_node == dev_phandle) { + *mask |= BIT(cpu); + ret = 0; + break; + } + } + dev_phandle = of_parse_phandle(np, "qcom,cpulist", i++); + } + + return ret; +} + +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev, + struct device_node *of_node, + u32 *cnt) +{ + int len, nf, i, j; + u32 data; + struct cpufreq_memfreq_map *tbl; + int ret; + + if (!of_find_property(of_node, "qcom,cpufreq-memfreq-tbl", &len)) + return NULL; + len /= sizeof(data); + + if (len % 2 || len == 0) + return NULL; + nf = len / 2; + + tbl = devm_kzalloc(dev, (nf + 1) * sizeof(struct cpufreq_memfreq_map), + GFP_KERNEL); + if (!tbl) + return NULL; + + for (i = 0, j = 0; i < nf; i++, j += 2) { + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", + j, &data); + if (ret < 0) + return NULL; + tbl[i].cpufreq_mhz = data / 1000; + + ret = of_property_read_u32_index(of_node, "qcom,cpufreq-memfreq-tbl", + j + 1, &data); + if (ret < 0) + return NULL; + + tbl[i].memfreq_khz = data; + pr_debug("Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, + tbl[i].memfreq_khz); + } + *cnt = nf; + tbl[i].cpufreq_mhz = 0; + + return tbl; +} + +static int process_scmi_memlat_of_node(struct scmi_device *sdev, struct scmi_memlat_info *info) +{ + struct device_node *memlat_np, *memory_np, *monitor_np; + struct scmi_memory_info *memory; + struct scmi_monitor_info *monitor; + int ret = 0, i = 0, j; + u32 memfreq[2]; + + of_node_get(sdev->handle->dev->of_node); + memlat_np = of_find_node_by_name(sdev->handle->dev->of_node, "memlat"); + + info->memory_cnt = of_get_child_count(memlat_np); + if (info->memory_cnt <= 0) + pr_err("No memory nodes present\n"); + + for_each_child_of_node(memlat_np, memory_np) { + memory = devm_kzalloc(&sdev->dev, sizeof(*memory), GFP_KERNEL); + if (!memory) { + ret = -ENOMEM; + goto err; + } + + ret = of_property_read_u32(memory_np, "reg", &memory->hw_type); + if (ret) { + pr_err("Failed to read memory type\n"); + goto err; + } + + memory->monitor_cnt = of_get_child_count(memory_np); + if (memory->monitor_cnt <= 0) { + pr_err("No monitor nodes present\n"); + ret = -EINVAL; + goto err; + } + + ret = of_property_read_u32_array(memory_np, "freq-table-khz", memfreq, 2); + if (ret && (ret != -EINVAL)) { + pr_err("Failed to read min/max freq %d\n", ret); + goto err; + } + + memory->min_freq = memfreq[0]; + memory->max_freq = memfreq[1]; + info->memory[i] = memory; + j = 0; + i++; + + for_each_child_of_node(memory_np, monitor_np) { + monitor = devm_kzalloc(&sdev->dev, sizeof(*monitor), GFP_KERNEL); + if (!monitor) { + ret = -ENOMEM; + goto err; + } + + monitor->mon_type = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 1 : 0; + monitor->ipm_ceil = (of_property_read_bool(monitor_np, "qcom,compute-mon")) ? 0 : 20000000; + + if (get_mask(monitor_np, &monitor->mask)) { + pr_err("Failed to populate cpu mask %d\n", ret); + goto err; + } + + monitor->freq_map = init_cpufreq_memfreq_map(&sdev->dev, monitor_np, + &monitor->freq_map_len); + snprintf(monitor->mon_name, MAX_NAME_LEN, "monitor-%d", j); + monitor->mon_idx = j; + + memory->monitor[j] = monitor; + j++; + } + } + + return 0; + +err: + of_node_put(memlat_np); + + return ret; +} + +static int configure_cpucp_common_events(struct scmi_memlat_info *info) +{ + const struct qcom_scmi_vendor_ops *ops = info->ops; + u8 ev_map[NUM_COMMON_EVS]; + struct ev_map_msg msg; + int ret; + + memset(ev_map, 0xFF, NUM_COMMON_EVS); + + msg.num_evs = NUM_COMMON_EVS; + msg.hw_type = INVALID_IDX; + msg.cid[INST_IDX] = EV_INST_RETIRED; + msg.cid[CYC_IDX] = EV_CPU_CYCLES; + msg.cid[FE_STALL_IDX] = INVALID_IDX; + msg.cid[BE_STALL_IDX] = INVALID_IDX; + + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_COMMON_EV_MAP, + sizeof(msg)); + return ret; +} + +static int configure_cpucp_grp(struct scmi_memlat_info *info, int memory_index) +{ + const struct qcom_scmi_vendor_ops *ops = info->ops; + struct scmi_memory_info *memory = info->memory[memory_index]; + struct ev_map_msg ev_msg; + u8 ev_map[NUM_GRP_EVS]; + struct node_msg msg; + int ret; + + msg.cpumask = 0; + msg.hw_type = memory->hw_type; + msg.mon_type = 0; + msg.mon_idx = 0; + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MEM_GROUP, sizeof(msg)); + if (ret < 0) { + pr_err("Failed to configure mem type %d\n", memory->hw_type); + return ret; + } + + memset(ev_map, 0xFF, NUM_GRP_EVS); + ev_msg.num_evs = NUM_GRP_EVS; + ev_msg.hw_type = memory->hw_type; + ev_msg.cid[MISS_IDX] = EV_L2_D_RFILL; + ev_msg.cid[WB_IDX] = INVALID_IDX; + ev_msg.cid[ACC_IDX] = INVALID_IDX; + ret = ops->set_param(info->ph, &ev_msg, MEMLAT_ALGO_STR, MEMLAT_SET_GRP_EV_MAP, + sizeof(ev_msg)); + if (ret < 0) { + pr_err("Failed to configure event map for mem type %d\n", memory->hw_type); + return ret; + } + + return ret; +} + +static int configure_cpucp_mon(struct scmi_memlat_info *info, int memory_index, int monitor_index) +{ + const struct qcom_scmi_vendor_ops *ops = info->ops; + struct scmi_memory_info *memory = info->memory[memory_index]; + struct scmi_monitor_info *monitor = memory->monitor[monitor_index]; + struct scalar_param_msg scalar_msg; + struct map_param_msg map_msg; + struct node_msg msg; + int ret; + int i; + + msg.cpumask = monitor->mask; + msg.hw_type = memory->hw_type; + msg.mon_type = monitor->mon_type; + msg.mon_idx = monitor->mon_idx; + strscpy(msg.mon_name, monitor->mon_name, sizeof(msg.mon_name)); + ret = ops->set_param(info->ph, &msg, MEMLAT_ALGO_STR, MEMLAT_SET_MONITOR, sizeof(msg)); + if (ret < 0) { + pr_err("Failed to configure monitor %s\n", monitor->mon_name); + return ret; + } + + scalar_msg.hw_type = memory->hw_type; + scalar_msg.mon_idx = monitor->mon_idx; + scalar_msg.val = monitor->ipm_ceil; + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_IPM_CEIL, + sizeof(scalar_msg)); + if (ret < 0) { + pr_err("Failed to set ipm ceil for %s\n", monitor->mon_name); + return ret; + } + + map_msg.hw_type = memory->hw_type; + map_msg.mon_idx = monitor->mon_idx; + map_msg.nr_rows = monitor->freq_map_len; + for (i = 0; i < monitor->freq_map_len; i++) { + map_msg.tbl[i].v1 = monitor->freq_map[i].cpufreq_mhz; + map_msg.tbl[i].v2 = monitor->freq_map[i].memfreq_khz / 1000; + } + ret = ops->set_param(info->ph, &map_msg, MEMLAT_ALGO_STR, MEMLAT_MON_FREQ_MAP, + sizeof(map_msg)); + if (ret < 0) { + pr_err("Failed to configure freq_map for %s\n", monitor->mon_name); + return ret; + } + + scalar_msg.hw_type = memory->hw_type; + scalar_msg.mon_idx = monitor->mon_idx; + scalar_msg.val = memory->min_freq; + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MIN_FREQ, + sizeof(scalar_msg)); + if (ret < 0) { + pr_err("Failed to set min_freq for %s\n", monitor->mon_name); + return ret; + } + + scalar_msg.hw_type = memory->hw_type; + scalar_msg.mon_idx = monitor->mon_idx; + scalar_msg.val = memory->max_freq; + ret = ops->set_param(info->ph, &scalar_msg, MEMLAT_ALGO_STR, MEMLAT_SET_MAX_FREQ, + sizeof(scalar_msg)); + if (ret < 0) + pr_err("Failed to set max_freq for %s\n", monitor->mon_name); + + return ret; +} + +static int cpucp_memlat_init(struct scmi_device *sdev) +{ + const struct scmi_handle *handle = sdev->handle; + const struct qcom_scmi_vendor_ops *ops; + struct scmi_protocol_handle *ph; + struct scmi_memlat_info *info; + u32 cpucp_sample_ms = 8; + int ret, i, j; + + if (!handle) + return -ENODEV; + + ops = handle->devm_protocol_get(sdev, QCOM_SCMI_VENDOR_PROTOCOL, &ph); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + info = devm_kzalloc(&sdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + ret = process_scmi_memlat_of_node(sdev, info); + if (ret) + pr_err("Failed to configure common events: %d\n", ret); + + info->ph = ph; + info->ops = ops; + + ret = configure_cpucp_common_events(info); + if (ret < 0) + pr_err("Failed to configure common events: %d\n", ret); + + for (i = 0; i < info->memory_cnt; i++) { + ret = configure_cpucp_grp(info, i); + if (ret < 0) + pr_err("Failed to configure mem group: %d\n", ret); + + for (j = 0; j < info->memory[i]->monitor_cnt; j++) { + /* Configure per monitor parameters */ + ret = configure_cpucp_mon(info, i, j); + if (ret < 0) + pr_err("Failed to configure monitor: %d\n", ret); + } + } + + ret = ops->set_param(ph, &cpucp_sample_ms, MEMLAT_ALGO_STR, MEMLAT_SAMPLE_MS, + sizeof(cpucp_sample_ms)); + if (ret < 0) + pr_err("Failed to set cpucp sample_ms ret = %d\n", ret); + + /* Start sampling and voting timer */ + ret = ops->start_activity(ph, NULL, MEMLAT_ALGO_STR, MEMLAT_START_TIMER, 0); + if (ret < 0) + pr_err("Error in starting the mem group timer %d\n", ret); + + dev_set_drvdata(&sdev->dev, info); + + return ret; +} + +static int scmi_client_probe(struct scmi_device *sdev) +{ + cpucp_memlat_init(sdev); + + return 0; +} + +static const struct scmi_device_id scmi_id_table[] = { + { .protocol_id = QCOM_SCMI_VENDOR_PROTOCOL, .name = "qcom_scmi_vendor_protocol" }, + { }, +}; +MODULE_DEVICE_TABLE(scmi, scmi_id_table); + +static struct scmi_driver qcom_scmi_client_drv = { + .name = "qcom-scmi-driver", + .probe = scmi_client_probe, + .id_table = scmi_id_table, +}; +module_scmi_driver(qcom_scmi_client_drv); + +MODULE_DESCRIPTION("QTI SCMI client driver"); +MODULE_LICENSE("GPL");