[PATCH v3 1/7] ACPI/PPTT: Add Processor Properties Topology Table parsing

Mon Oct 16 07:24:18 PDT 2017

On 12/10/2017 20:48, Jeremy Linton wrote:
> ACPI 6.2 adds a new table, which describes how processing units
> are related to each other in tree like fashion. Caches are
> also sprinkled throughout the tree and describe the properties
> of the caches in relation to other caches and processing units.
>
> Add the code to parse the cache hierarchy and report the total
> number of levels of cache for a given core using
> acpi_find_last_cache_level() as well as fill out the individual
> cores cache information with cache_setup_acpi() once the
> cpu_cacheinfo structure has been populated by the arch specific
> code.
>
> Further, report peers in the topology using setup_acpi_cpu_topology()
> to report a unique ID for each processing unit at a given level
> in the tree. These unique id's can then be used to match related
> processing units which exist as threads, COD (clusters
> on die), within a given package, etc.
>

As already commented, there are many lines over 80 characters.

And so far I only really looked at cpu topology part.

> Signed-off-by: Jeremy Linton <jeremy.linton at arm.com>
> ---
>  drivers/acpi/pptt.c | 485 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 485 insertions(+)
>  create mode 100644 drivers/acpi/pptt.c
>
> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
> new file mode 100644
> index 000000000000..c86715fed4a7
> --- /dev/null
> +++ b/drivers/acpi/pptt.c
> @@ -0,1 +1,485 @@
> +/*
> + * Copyright (C) 2017, ARM
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * This file implements parsing of Processor Properties Topology Table (PPTT)
> + * which is optionally used to describe the processor and cache topology.
> + * Due to the relative pointers used throughout the table, this doesn't
> + * leverage the existing subtable parsing in the kernel.
> + */
> +#define pr_fmt(fmt) "ACPI PPTT: " fmt
> +
> +#include <linux/acpi.h>
> +#include <linux/cacheinfo.h>
> +#include <acpi/processor.h>
> +
> +/*
> + * Given the PPTT table, find and verify that the subtable entry
> + * is located within the table
> + */
> +static struct acpi_subtable_header *fetch_pptt_subtable(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	struct acpi_subtable_header *entry;
> +
> +	/* there isn't a subtable at reference 0 */
> +	if (!pptt_ref)
> +		return NULL;
> +
> +	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
> +		return NULL;
> +
> +	entry = (struct acpi_subtable_header *)((u8 *)table_hdr + pptt_ref);
> +
> +	if (pptt_ref + entry->length > table_hdr->length)
> +		return NULL;
> +
> +	return entry;
> +}
> +
> +static struct acpi_pptt_processor *fetch_pptt_node(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref);
> +}
> +
> +static struct acpi_pptt_cache *fetch_pptt_cache(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref);
> +}
> +
> +static struct acpi_subtable_header *acpi_get_pptt_resource(
> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *node, int resource)
> +{
> +	u32 ref;
> +
> +	if (resource >= node->number_of_priv_resources)
> +		return NULL;
> +
> +	ref = *(u32 *)((u8 *)node + sizeof(struct acpi_pptt_processor) +
> +		      sizeof(u32) * resource);
> +
> +	return fetch_pptt_subtable(table_hdr, ref);
> +}
> +
> +/*
> + * given a pptt resource, verify that it is a cache node, then walk

/s/given/Given/

> + * down each level of caches, counting how many levels are found
> + * as well as checking the cache type (icache, dcache, unified). If a
> + * level & type match, then we set found, and continue the search.
> + * Once the entire cache branch has been walked return its max
> + * depth.
> + */
> +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> +				int local_level,
> +				struct acpi_subtable_header *res,
> +				struct acpi_pptt_cache **found,
> +				int level, int type)
> +{
> +	struct acpi_pptt_cache *cache;
> +
> +	if (res->type != ACPI_PPTT_TYPE_CACHE)
> +		return 0;
> +
> +	cache = (struct acpi_pptt_cache *) res;

please remove whitespace before res

> +	while (cache) {
> +		local_level++;
> +
> +		if ((local_level == level) &&
> +		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
> +		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
> +			if (*found != NULL)
> +				pr_err("Found duplicate cache level/type unable to determine uniqueness\n");
> +
> +			pr_debug("Found cache @ level %d\n", level);
> +			*found = cache;
> +			/*
> +			 * continue looking at this node's resource list
> +			 * to verify that we don't find a duplicate
> +			 * cache node.
> +			 */
> +		}
> +		cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
> +	}
> +	return local_level;
> +}
> +
> +/*
> + * Given a CPU node look for cache levels that exist at this level, and then
> + * for each cache node, count how many levels exist below (logically above) it.
> + * If a level and type are specified, and we find that level/type, abort
> + * processing and return the acpi_pptt_cache structure.
> + */
> +static struct acpi_pptt_cache *acpi_find_cache_level(
> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *cpu_node,
> +	int *starting_level, int level, int type)
> +{
> +	struct acpi_subtable_header *res;
> +	int number_of_levels = *starting_level;
> +	int resource = 0;
> +	struct acpi_pptt_cache *ret = NULL;
> +	int local_level;
> +
> +	/* walk down from the processor node */
> +	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
> +		resource++;
> +
> +		local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
> +						   res, &ret, level, type);
> +		/*
> +		 * we are looking for the max depth. Since its potentially
> +		 * possible for a given node to have resources with differing
> +		 * depths verify that the depth we have found is the largest.
> +		 */
> +		if (number_of_levels < local_level)
> +			number_of_levels = local_level;
> +	}
> +	if (number_of_levels > *starting_level)
> +		*starting_level = number_of_levels;
> +
> +	return ret;
> +}
> +
> +/*
> + * given a processor node containing a processing unit, walk into it and count
> + * how many levels exist solely for it, and then walk up each level until we hit
> + * the root node (ignore the package level because it may be possible to have
> + * caches that exist across packages). Count the number of cache levels that
> + * exist at each level on the way up.
> + */
> +static int acpi_process_node(struct acpi_table_header *table_hdr,
> +			     struct acpi_pptt_processor *cpu_node)
> +{
> +	int total_levels = 0;
> +
> +	do {
> +		acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> +	} while (cpu_node);
> +
> +	return total_levels;
> +}
> +
> +/* determine if the given node is a leaf node */
> +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
> +			       struct acpi_pptt_processor *node)
> +{
> +	struct acpi_subtable_header *entry;
> +	unsigned long table_end;
> +	u32 node_entry;
> +	struct acpi_pptt_processor *cpu_node;
> +
> +	table_end = (unsigned long)table_hdr + table_hdr->length;
> +	node_entry = (u32)((u8 *)node - (u8 *)table_hdr);
> +	entry = (struct acpi_subtable_header *)((u8 *)table_hdr +
> +						sizeof(struct acpi_table_pptt));
> +
> +	while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) < table_end) {
> +		cpu_node = (struct acpi_pptt_processor *)entry;
> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
> +		    (cpu_node->parent == node_entry))
> +			return 0;
> +		entry = (struct acpi_subtable_header *)((u8 *)entry + entry->length);
> +	}
> +	return 1;
> +}
> +
> +/*
> + * Find the subtable entry describing the provided processor
> + */
> +static struct acpi_pptt_processor *acpi_find_processor_node(
> +	struct acpi_table_header *table_hdr,
> +	u32 acpi_cpu_id)
> +{
> +	struct acpi_subtable_header *entry;
> +	unsigned long table_end;
> +	struct acpi_pptt_processor *cpu_node;
> +
> +	table_end = (unsigned long)table_hdr + table_hdr->length;
> +	entry = (struct acpi_subtable_header *)((u8 *)table_hdr +
> +						sizeof(struct acpi_table_pptt));
> +

This is the first time looking at your implementation of the PPTT 
driver. Has it been mentioned before that it is rather inefficient to 
re-parse the table for every cpu in the system? Actually within the cpu 
parsing loop we call acpi_pptt_leaf_node(), which does another table 
parsing loop.

However this is simple.

I do know the version from wangxiongfeng had a kmalloc per node, which 
would also be somewhat inefficient.

But I worry that this implementation does not scale with larger numbers 
of CPUs.

> +	/* find the processor structure associated with this cpuid */
> +	while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) < table_end) {
> +		cpu_node = (struct acpi_pptt_processor *)entry;
> +
> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
> +		    acpi_pptt_leaf_node(table_hdr, cpu_node)) {
> +			pr_debug("checking phy_cpu_id %d against acpi id %d\n",
> +				 acpi_cpu_id, cpu_node->acpi_processor_id);
> +			if (acpi_cpu_id == cpu_node->acpi_processor_id) {
> +				/* found the correct entry */
> +				pr_debug("match found!\n");

Do we really need to add 2 debug messages for case of checking for and 
finding a valid match?

> +				return (struct acpi_pptt_processor *)entry;
> +			}
> +		}
> +
> +		if (entry->length == 0) {
> +			pr_err("Invalid zero length subtable\n");
> +			break;
> +		}
> +		entry = (struct acpi_subtable_header *)
> +			((u8 *)entry + entry->length);
> +	}
> +
> +	return NULL;
> +}
> +
> +/*
> + * Given a acpi_pptt_processor node, walk up until we identify the
> + * package that the node is associated with or we run out of levels
> + * to request.
> + */
> +static struct acpi_pptt_processor *acpi_find_processor_package_id(

One would assume from the name that this function returns an integer 
value, that being an index for the package for that cpu

> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *cpu,

Do you think that "cpu_node" would be a better name?

> +	int level)
> +{
> +	struct acpi_pptt_processor *prev_node;
> +
> +	while (cpu && level && !(cpu->flags & ACPI_PPTT_PHYSICAL_PACKAGE)) {
> +		pr_debug("level %d\n", level);

that's not such a useful message and I can imagine it creates so many prints

> +		prev_node = fetch_pptt_node(table_hdr, cpu->parent);
> +		if (prev_node == NULL)
> +			break;
> +		cpu = prev_node;
> +		level--;
> +	}
> +	return cpu;
> +}
> +
> +static int acpi_parse_pptt(struct acpi_table_header *table_hdr, u32 acpi_cpu_id)
> +{
> +	int number_of_levels = 0;
> +	struct acpi_pptt_processor *cpu;
> +
> +	cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
> +	if (cpu)
> +		number_of_levels = acpi_process_node(table_hdr, cpu);
> +
> +	return number_of_levels;
> +}
> +
> +#define ACPI_6_2_CACHE_TYPE_DATA		      (0x0)
> +#define ACPI_6_2_CACHE_TYPE_INSTR		      (1<<2)
> +#define ACPI_6_2_CACHE_TYPE_UNIFIED		      (1<<3)
> +#define ACPI_6_2_CACHE_POLICY_WB		      (0x0)
> +#define ACPI_6_2_CACHE_POLICY_WT		      (1<<4)
> +#define ACPI_6_2_CACHE_READ_ALLOCATE		      (0x0)
> +#define ACPI_6_2_CACHE_WRITE_ALLOCATE		      (0x01)
> +#define ACPI_6_2_CACHE_RW_ALLOCATE		      (0x02)
> +
> +static u8 acpi_cache_type(enum cache_type type)
> +{
> +	switch (type) {
> +	case CACHE_TYPE_DATA:
> +		pr_debug("Looking for data cache\n");
> +		return ACPI_6_2_CACHE_TYPE_DATA;
> +	case CACHE_TYPE_INST:
> +		pr_debug("Looking for instruction cache\n");
> +		return ACPI_6_2_CACHE_TYPE_INSTR;
> +	default:
> +		pr_debug("Unknown cache type, assume unified\n");
> +	case CACHE_TYPE_UNIFIED:
> +		pr_debug("Looking for unified cache\n");
> +		return ACPI_6_2_CACHE_TYPE_UNIFIED;
> +	}
> +}
> +
> +/* find the ACPI node describing the cache type/level for the given CPU */
> +static struct acpi_pptt_cache *acpi_find_cache_node(
> +	struct acpi_table_header *table_hdr, u32 acpi_cpu_id,
> +	enum cache_type type, unsigned int level,
> +	struct acpi_pptt_processor **node)
> +{
> +	int total_levels = 0;
> +	struct acpi_pptt_cache *found = NULL;
> +	struct acpi_pptt_processor *cpu_node;
> +	u8 acpi_type = acpi_cache_type(type);
> +
> +	pr_debug("Looking for CPU %d's level %d cache type %d\n",
> +		 acpi_cpu_id, level, acpi_type);
> +
> +	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
> +	if (!cpu_node)
> +		return NULL;
> +
> +	do {
> +		found = acpi_find_cache_level(table_hdr, cpu_node, &total_levels, level, acpi_type);
> +		*node = cpu_node;
> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> +	} while ((cpu_node) && (!found));
> +
> +	return found;
> +}
> +
> +int acpi_find_last_cache_level(unsigned int cpu)
> +{
> +	u32 acpi_cpu_id;
> +	struct acpi_table_header *table;
> +	int number_of_levels = 0;
> +	acpi_status status;
> +
> +	pr_debug("Cache Setup find last level cpu=%d\n", cpu);

these prints (and others) probably should be verbose level or removed

> +
> +	acpi_cpu_id = acpi_cpu_get_madt_gicc(cpu)->uid;
> +	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
> +	if (ACPI_FAILURE(status)) {
> +		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");

is this really an error?

> +	} else {
> +		number_of_levels = acpi_parse_pptt(table, acpi_cpu_id);
> +		acpi_put_table(table);
> +	}
> +	pr_debug("Cache Setup find last level level=%d\n", number_of_levels);

I think that this could be better worded

> +
> +	return number_of_levels;
> +}
> +
> +/*
> + * The ACPI spec implies that the fields in the cache structures are used to
> + * extend and correct the information probed from the hardware. In the case
> + * of arm64 the CCSIDR probing has been removed because it might be incorrect.
> + */
> +static void update_cache_properties(struct cacheinfo *this_leaf,
> +				    struct acpi_pptt_cache *found_cache,
> +				    struct acpi_pptt_processor *cpu_node)
> +{
> +	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID)
> +		this_leaf->size = found_cache->size;
> +	if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID)
> +		this_leaf->coherency_line_size = found_cache->line_size;
> +	if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID)
> +		this_leaf->number_of_sets = found_cache->number_of_sets;
> +	if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID)
> +		this_leaf->ways_of_associativity = found_cache->associativity;
> +	if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID)
> +		switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
> +		case ACPI_6_2_CACHE_POLICY_WT:
> +			this_leaf->attributes = CACHE_WRITE_THROUGH;
> +			break;
> +		case ACPI_6_2_CACHE_POLICY_WB:
> +			this_leaf->attributes = CACHE_WRITE_BACK;
> +			break;
> +		default:
> +			pr_err("Unknown ACPI cache policy %d\n",
> +			      found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY);
> +		}
> +	if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID)
> +		switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
> +		case ACPI_6_2_CACHE_READ_ALLOCATE:
> +			this_leaf->attributes |= CACHE_READ_ALLOCATE;
> +			break;
> +		case ACPI_6_2_CACHE_WRITE_ALLOCATE:
> +			this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
> +			break;
> +		case ACPI_6_2_CACHE_RW_ALLOCATE:
> +			this_leaf->attributes |=
> +				CACHE_READ_ALLOCATE|CACHE_WRITE_ALLOCATE;
> +			break;
> +		default:
> +			pr_err("Unknown ACPI cache allocation policy %d\n",
> +			   found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE);
> +		}
> +}
> +
> +static void cache_setup_acpi_cpu(struct acpi_table_header *table,
> +				 unsigned int cpu)
> +{
> +	struct acpi_pptt_cache *found_cache;
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	u32 acpi_cpu_id = acpi_cpu_get_madt_gicc(cpu)->uid;
> +	struct cacheinfo *this_leaf;
> +	unsigned int index = 0;
> +	struct acpi_pptt_processor *cpu_node = NULL;
> +
> +	while (index < get_cpu_cacheinfo(cpu)->num_leaves) {

cpu does not change, so, for efficiency, can you use 
this_cpu_ci->num_leaves?

> +		this_leaf = this_cpu_ci->info_list + index;
> +		found_cache = acpi_find_cache_node(table, acpi_cpu_id,
> +						   this_leaf->type,
> +						   this_leaf->level,
> +						   &cpu_node);
> +		pr_debug("found = %p %p\n", found_cache, cpu_node);

I am not sure how useful printing pointers is to the user, even if NULL. 
Therse prints are too verbose.

> +		if (found_cache)
> +			update_cache_properties(this_leaf,
> +						found_cache,
> +						cpu_node);
> +
> +		index++;
> +	}
> +}
> +
> +static int topology_setup_acpi_cpu(struct acpi_table_header *table,
> +				    unsigned int cpu, int level)
> +{

It's not clear what is suppossed to be returned from this function, if 
anything, since it's job is seemingly to "setup"

> +	struct acpi_pptt_processor *cpu_node;
> +	u32 acpi_cpu_id = acpi_cpu_get_madt_gicc(cpu)->uid;
> +
> +	cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
> +	if (cpu_node) {
> +		cpu_node = acpi_find_processor_package_id(table, cpu_node, level);
> +		/* Only the first level has a guaranteed id */
> +		if (level == 0)
> +			return cpu_node->acpi_processor_id;
> +		return (int)((u8 *)cpu_node - (u8 *)table);

Sorry, but I just don't get this. As I understand, our intention is to 
find the core/cluster/package index in the system for a given cpu, right?

If so, how is the distance between the table base and cpu level's node 
the same as the system index? I would say that this value is unique, but 
are we expecting sequential system indexing per level?

> +	}
> +	pr_err_once("PPTT table found, but unable to locate core for %d\n",

the code seems to intermix terms "core" and "cpu" - is this intentional?

> +		    cpu);
> +	return -ENOENT;
> +}
> +
> +/*
> + * simply assign a ACPI cache entry to each known CPU cache entry
> + * determining which entries are shared is done later.
> + */
> +int cache_setup_acpi(unsigned int cpu)
> +{
> +	struct acpi_table_header *table;
> +	acpi_status status;
> +
> +	pr_debug("Cache Setup ACPI cpu %d\n", cpu);

Please don't leave whitespace after "cpu"

> +
> +	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
> +	if (ACPI_FAILURE(status)) {
> +		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");
> +		return -ENOENT;
> +	}
> +
> +	cache_setup_acpi_cpu(table, cpu);
> +	acpi_put_table(table);
> +
> +	return status;
> +}
> +
> +/*
> + * Determine a topology unique ID for each thread/core/cluster/socket/etc.
> + * This ID can then be used to group peers.
> + */
> +int setup_acpi_cpu_topology(unsigned int cpu, int level)

I think that you should add a function description to explain what cpu 
and level are.

This function does no setup either. I think that doing a setup would be 
doing something which has a persistent result. Instead, this function 
gets the cpu topology index for a certain hierarchy level.

> +{
> +	struct acpi_table_header *table;
> +	acpi_status status;
> +	int retval;
> +
> +	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
> +	if (ACPI_FAILURE(status)) {
> +		pr_err_once("No PPTT table found, cpu topology may be inaccurate\n");

As before, is this really an error?

> +		return -ENOENT;
> +	}
> +	retval = topology_setup_acpi_cpu(table, cpu, level);
> +	pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n",
> +		 cpu, level, retval);
> +	acpi_put_table(table);
> +
> +	return retval;
> +}
>

Thanks,
John