[PATCH v5 4/6] x86: Add hardware prefetch control driver for x86

Kohei Tarumizu tarumizu.kohei at fujitsu.com
Tue Jun 7 05:05:28 PDT 2022


Adds module init/exit code to create sysfs attributes for x86 with
"hardware_prefetcher_enable", "ip_prefetcher_enable" and
"adjacent_cache_line_prefetcher_enable".

This driver works only if a CPU model is mapped to type of register
specification(e.g. TYPE_L12_BASE) in pfctl_match[].

The details of the registers(MSR_MISC_FEATURE_CONTROL) to be read and
written in this patch are described below:

"https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html"
    Volume 4

Signed-off-by: Kohei Tarumizu <tarumizu.kohei at fujitsu.com>
---
 arch/x86/kernel/cpu/x86-pfctl.c | 363 ++++++++++++++++++++++++++++++++
 1 file changed, 363 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/x86-pfctl.c

diff --git a/arch/x86/kernel/cpu/x86-pfctl.c b/arch/x86/kernel/cpu/x86-pfctl.c
new file mode 100644
index 000000000000..154e927d092c
--- /dev/null
+++ b/arch/x86/kernel/cpu/x86-pfctl.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 FUJITSU LIMITED
+ *
+ * x86 Hardware Prefetch Control support
+ */
+
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/msr.h>
+
+/*
+ * MSR_MISC_FEATURE_CONTROL has three type of register specifications.
+ *
+ * The register specification of TYPE_L12_BASE is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    Reserved
+ * [2]    DCU Hardware Prefetcher Disable (R/W)
+ * [63:3] Reserved
+ *
+ * The register specification of TYPE_L12_PLUS is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    L2 Adjacent Cache Line Prefetcher Disable (R/W)
+ * [2]    DCU Hardware Prefetcher Disable (R/W)
+ * [3]    DCU IP Prefetcher Disable (R/W)
+ * [63:4] Reserved
+ *
+ * The register specification of TYPE_L12_XPHI is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    DCU Hardware Prefetcher Disable (R/W)
+ * [63:2] Reserved
+ *
+ * See "Intel 64 and IA-32 Architectures Software Developer's Manual"
+ * (https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html)
+ * for register specification details.
+ */
+enum {
+	TYPE_L12_BASE,
+	TYPE_L12_PLUS,
+	TYPE_L12_XPHI,
+};
+
+struct x86_pfctl_attr {
+	struct device_attribute		attr;
+	u64				mask;
+};
+
+struct pfctl_group {
+	unsigned int			level;
+	enum cache_type			type;
+	const struct attribute_group	**groups;
+};
+
+enum cpuhp_state hp_online;
+
+static inline unsigned int pfctl_dev_get_cpu(struct device *pfctl_dev)
+{
+	return *(u32 *)dev_get_drvdata(pfctl_dev);
+}
+
+static ssize_t
+pfctl_show(struct device *pfctl_dev, struct device_attribute *attr, char *buf)
+{
+	unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev);
+	struct x86_pfctl_attr *xa;
+	u64 val;
+
+	xa = container_of(attr, struct x86_pfctl_attr, attr);
+
+	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
+	return sysfs_emit(buf, "%d\n", val & xa->mask ? 0 : 1);
+}
+
+struct write_info {
+	u64 mask;
+	bool enable;
+};
+
+/*
+ * wrmsrl() in this patch is only done inside of an interrupt-disabled region
+ * to avoid a conflict of write access from other drivers,
+ */
+static void pfctl_write(void *info)
+{
+	struct write_info *winfo = info;
+	u64 reg;
+
+	reg = 0;
+	rdmsrl(MSR_MISC_FEATURE_CONTROL, reg);
+
+	if (winfo->enable)
+		reg &= ~winfo->mask;
+	else
+		reg |= winfo->mask;
+
+	wrmsrl(MSR_MISC_FEATURE_CONTROL, reg);
+}
+
+/*
+ * MSR_MISC_FEATURE_CONTROL has "core" scope, so define the lock to avoid a
+ * conflict of write access from different logical processors in the same core.
+ */
+static DEFINE_MUTEX(pfctl_mutex);
+
+static ssize_t
+pfctl_store(struct device *pfctl_dev, struct device_attribute *attr,
+	    const char *buf, size_t size)
+{
+	unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev);
+	struct x86_pfctl_attr *xa;
+	struct write_info info;
+
+	xa = container_of(attr, struct x86_pfctl_attr, attr);
+	info.mask = xa->mask;
+
+	if (strtobool(buf, &info.enable) < 0)
+		return -EINVAL;
+
+	mutex_lock(&pfctl_mutex);
+	smp_call_function_single(cpu, pfctl_write, &info, true);
+	mutex_unlock(&pfctl_mutex);
+
+	return size;
+}
+
+#define PFCTL_ATTR(_name, _level, _bit)					\
+	struct x86_pfctl_attr attr_l##_level##_##_name = {		\
+		.attr = __ATTR(_name, 0600, pfctl_show, pfctl_store),	\
+		.mask = BIT_ULL(_bit), }
+
+static PFCTL_ATTR(hardware_prefetcher_enable,			1, 2);
+static PFCTL_ATTR(hardware_prefetcher_enable,			2, 0);
+static PFCTL_ATTR(ip_prefetcher_enable,				1, 3);
+static PFCTL_ATTR(adjacent_cache_line_prefetcher_enable,	2, 1);
+
+static struct attribute *l1_attrs[] = {
+	&attr_l1_hardware_prefetcher_enable.attr.attr,
+	&attr_l1_ip_prefetcher_enable.attr.attr,
+	NULL,
+};
+
+static struct attribute *l2_attrs[] = {
+	&attr_l2_hardware_prefetcher_enable.attr.attr,
+	&attr_l2_adjacent_cache_line_prefetcher_enable.attr.attr,
+	NULL,
+};
+
+static struct attribute_group l1_group = {
+	.attrs = l1_attrs,
+};
+
+static struct attribute_group l2_group = {
+	.attrs = l2_attrs,
+};
+
+static const struct attribute_group *l1_groups[] = {
+	&l1_group,
+	NULL,
+};
+
+static const struct attribute_group *l2_groups[] = {
+	&l2_group,
+	NULL,
+};
+
+static const struct pfctl_group pfctl_groups[] = {
+	{
+		.level = 1,
+		.type = CACHE_TYPE_DATA,
+		.groups = l1_groups,
+	},
+	{
+		.level = 2,
+		.type = CACHE_TYPE_UNIFIED,
+		.groups = l2_groups,
+	},
+	{
+		.groups = NULL,
+	},
+};
+
+static const struct attribute_group **
+get_pfctl_attribute_groups(unsigned int level, enum cache_type type)
+{
+	int i;
+
+	for (i = 0; pfctl_groups[i].groups; i++)
+		if ((level == pfctl_groups[i].level) &&
+		    (type == pfctl_groups[i].type))
+			return pfctl_groups[i].groups;
+
+	return NULL;
+}
+
+static int remove_pfctl_attr(struct device *index_dev, void *data)
+{
+	struct device *pfctl_dev;
+
+	pfctl_dev = device_find_child_by_name(index_dev, "prefetch_control");
+	if (!pfctl_dev)
+		return 0;
+
+	device_unregister(pfctl_dev);
+	put_device(pfctl_dev);
+
+	return 0;
+}
+
+static int create_pfctl_attr(struct device *index_dev, void *data)
+{
+	struct cacheinfo *leaf = dev_get_drvdata(index_dev);
+	const struct attribute_group **groups;
+	struct device *pfctl_dev;
+
+	groups = get_pfctl_attribute_groups(leaf->level, leaf->type);
+	if (!groups)
+		return 0;
+
+	pfctl_dev = cpu_device_create(index_dev, data, groups,
+				      "prefetch_control");
+	if (IS_ERR(pfctl_dev))
+		return PTR_ERR(pfctl_dev);
+
+	return 0;
+}
+
+static int pfctl_online(unsigned int cpu)
+{
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct device *cache_dev;
+	int ret;
+
+	cache_dev = device_find_child_by_name(cpu_dev, "cache");
+	if (!cache_dev)
+		return -ENODEV;
+
+	ret = device_for_each_child(cache_dev, &cpu_dev->id, create_pfctl_attr);
+
+	put_device(cache_dev);
+
+	return ret;
+}
+
+static int pfctl_prepare_down(unsigned int cpu)
+{
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct device *cache_dev;
+
+	cache_dev = device_find_child_by_name(cpu_dev, "cache");
+	if (!cache_dev)
+		return 0;
+
+	device_for_each_child(cache_dev, NULL, remove_pfctl_attr);
+
+	put_device(cache_dev);
+
+	return 0;
+}
+
+/*
+ * Only BROADWELL_X has been tested in the actual machine at this point. Other
+ * models were defined based on the information in the "Intel 64 and IA-32
+ * Architectures Software Developer's Manual"
+ */
+static const struct x86_cpu_id pfctl_match[] __initconst = {
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L,	TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	TYPE_L12_XPHI),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	TYPE_L12_XPHI),
+	{},
+};
+MODULE_DEVICE_TABLE(x86cpu, pfctl_match);
+
+static int __init x86_pfctl_init(void)
+{
+	const struct x86_cpu_id *m;
+	int ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
+	m = x86_match_cpu(pfctl_match);
+	if (!m)
+		return -ENODEV;
+
+	switch (m->driver_data) {
+	case TYPE_L12_BASE:
+		l1_attrs[1] = NULL;
+		l2_attrs[1] = NULL;
+		break;
+	case TYPE_L12_PLUS:
+		break;
+	case TYPE_L12_XPHI:
+		attr_l1_hardware_prefetcher_enable.mask = BIT_ULL(1);
+		l1_attrs[1] = NULL;
+		l2_attrs[1] = NULL;
+		break;
+	default:
+		return -ENODEV;
+	};
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/x86-pfctl:online",
+				pfctl_online, pfctl_prepare_down);
+	if (ret < 0) {
+		pr_err("failed to register hotplug callbacks\n");
+		return ret;
+	}
+
+	hp_online = ret;
+
+	return 0;
+}
+
+static void __exit x86_pfctl_exit(void)
+{
+	cpuhp_remove_state(hp_online);
+}
+
+late_initcall(x86_pfctl_init);
+module_exit(x86_pfctl_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("FUJITSU LIMITED");
+MODULE_DESCRIPTION("x86 Hardware Prefetch Control Driver");
-- 
2.27.0




More information about the linux-arm-kernel mailing list