[PATCH V4 4/5] soc: qcom: Introduce SCMI based Memlat (Memory Latency) governor
Cristian Marussi
cristian.marussi at arm.com
Tue Oct 22 05:00:18 PDT 2024
On Mon, Oct 07, 2024 at 11:40:22AM +0530, Sibi Sankar wrote:
> Introduce a client driver that uses the memlat algorithm string
> hosted on QCOM SCMI Generic Extension Protocol to detect memory
> latency workloads and control frequency/level of the various
> memory buses (DDR/LLCC/DDR_QOS).
>
Hi,
a few small remarks, down below.
> Co-developed-by: Shivnandan Kumar <quic_kshivnan at quicinc.com>
> Signed-off-by: Shivnandan Kumar <quic_kshivnan at quicinc.com>
> Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu at quicinc.com>
> Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu at quicinc.com>
> Co-developed-by: Amir Vajid <avajid at quicinc.com>
> Signed-off-by: Amir Vajid <avajid at quicinc.com>
> Signed-off-by: Sibi Sankar <quic_sibis at quicinc.com>
> ---
[snip]
> +static int populate_cluster_info(u32 *cluster_info)
> +{
> + char name[MAX_NAME_LEN];
> + int i = 0;
> +
> + struct device_node *cn __free(device_node) = of_find_node_by_path("/cpus");
> + if (!cn)
> + return -ENODEV;
Not sure if this is some new coding style accepted for the new cleanup.h
fancy stuff (sincere question/doubt...so please take this with a grain of salt),
BUT, if not, you should consider grouping this definition/initialization to
the start of the block whose scope they are in...like:
struct device_node *cn __free(device_node) = of_find_node_by_path("/cpus");
struct device_node *map __free(device_node) = NULL;
char name[MAX_NAME_LEN];
int i = 0;
if (!cn)
return -ENODEV;
map = of_get_child_by_name(cn, "cpu-map");
if (!map)
return -ENODEV;
> +
> + struct device_node *map __free(device_node) = of_get_child_by_name(cn, "cpu-map");
> + if (!map)
> + return -ENODEV;
> +
As said...
> + do {
> + snprintf(name, sizeof(name), "cluster%d", i);
> + struct device_node *c __free(device_node) = of_get_child_by_name(map, name);
> + if (!c)
> + break;
> +
> + *(cluster_info + i) = of_get_child_count(c);
> + i++;
> + } while (1);
> +
> + return 0;
> +}
> +
> +static void populate_physical_mask(struct device_node *np, u32 *mask, u32 *cluster_info)
> +{
> + struct device_node *dev_phandle __free(device_node);
...so this cleanups on return....
> + int cpu, i = 0, physical_id;
> +
> + do {
> + dev_phandle = of_parse_phandle(np, "cpus", i++);
BUT wont this be needed to be of_put, between calls to of_parse_phandle
inside this loop ? ... so cannot this be done like
int cpu, i = 0, physical_id;
while (1) {
struct device_node *dev_phandle __free(device_node) = of_parse_phandle(np, "cpus", i++);
if (!dev_phandle)
break;
cpu = of_cpu_node_to_id(dev_phandle);
if (cpu != -ENODEV) {
....
}
...not even build tested ... ah... :P
> + cpu = of_cpu_node_to_id(dev_phandle);
> + if (cpu != -ENODEV) {
> + physical_id = topology_core_id(cpu);
> + for (int j = 0; j < topology_cluster_id(cpu); j++)
> + physical_id += *(cluster_info + j);
> + *mask |= BIT(physical_id);
> + }
> + } while (dev_phandle);
> +}
> +
> +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev,
> + struct scmi_memory_info *memory,
> + struct device_node *of_node,
> + u32 *cnt)
> +{
> + struct device_node *tbl_np __free(device_node), *opp_np __free(device_node);
> + struct cpufreq_memfreq_map *tbl;
> + int ret, i = 0;
> + u32 level, len;
> + u64 rate;
> +
> + tbl_np = of_parse_phandle(of_node, "operating-points-v2", 0);
> + if (!tbl_np)
> + return ERR_PTR(-ENODEV);
> +
> + len = min(of_get_available_child_count(tbl_np), MAX_MAP_ENTRIES);
> + if (len == 0)
> + return ERR_PTR(-ENODEV);
> +
> + tbl = devm_kzalloc(dev, (len + 1) * sizeof(struct cpufreq_memfreq_map),
> + GFP_KERNEL);
> + if (!tbl)
> + return ERR_PTR(-ENOMEM);
> +
> + for_each_available_child_of_node(tbl_np, opp_np) {
This seems to lack a of+node_put at the end but possibly the scoped
version for_each_available_child_of_node_scoped() will do it for you...
> + ret = of_property_read_u64_index(opp_np, "opp-hz", 0, &rate);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].cpufreq_mhz = rate / HZ_PER_MHZ;
> +
> + if (memory->hw_type != QCOM_MEM_TYPE_DDR_QOS) {
> + ret = of_property_read_u64_index(opp_np, "opp-hz", 1, &rate);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].memfreq_khz = rate / HZ_PER_KHZ;
> + } else {
> + ret = of_property_read_u32(opp_np, "opp-level", &level);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].memfreq_khz = level;
> + }
> +
> + dev_dbg(dev, "Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, tbl[i].memfreq_khz);
> + i++;
> + }
> + *cnt = len;
> +
> + return tbl;
> +}
> +
> +static int process_scmi_memlat_of_node(struct scmi_device *sdev, struct scmi_memlat_info *info)
> +{
> + struct scmi_monitor_info *monitor;
> + struct scmi_memory_info *memory;
> + char name[MAX_NAME_LEN];
> + u64 memfreq[2];
> + int ret;
> +
> + ret = populate_cluster_info(info->cluster_info);
> + if (ret < 0) {
> + dev_err_probe(&sdev->dev, ret, "failed to populate cluster info\n");
> + goto err;
> + }
> +
> + of_node_get(sdev->dev.of_node);
cant you use cleanup.h magic also for this and get rid of a few gotos down below ?
...this function seems the ideal case fot that...
> + do {
> + snprintf(name, sizeof(name), "memory-%d", info->memory_cnt);
> + struct device_node *memory_np __free(device_node) =
> + of_find_node_by_name(sdev->dev.of_node, name);
> +
> + if (!memory_np)
> + break;
> +
> + if (info->memory_cnt >= MAX_MEMORY_TYPES)
Shouldn't the MAX_MEMORY_TYPES something discoverable at runtime through
some command of your vendor protocol ? for better future scalability I
mean...maybe I am overthinking...
> + return dev_err_probe(&sdev->dev, -EINVAL,
> + "failed to parse unsupported memory type\n");
> +
> + memory = devm_kzalloc(&sdev->dev, sizeof(*memory), GFP_KERNEL);
> + if (!memory) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
Thanks,
Cristian
More information about the linux-arm-kernel
mailing list