[PATCH] arm64: cache: Skip an unnecessary data cache clean PoU operation

Shanker Donthineni shankerd at codeaurora.org
Thu Feb 2 13:45:06 PST 2017


The cache management functions always do the data cache PoU
(point of unification) operations even though it is not required
on some systems. NO need to clean data cache till PoU if all the
cache levels below PoUIS are WT (Write-Through) caches. It causes
a huge performance degradation when operating on a larger memory
area, especially THP with 64K page size kernel.

For each online CPU, check the need of 'dc cvau' instruction and
update a global variable __skip_dcache_pou. The two functions
__flush_cache_user_range() and __clean_dcache_area_pou() are
patched using an alternative primitive to skip an unnecessary
code execution. It won't change the existing behavior if any one
of the CPU is capable of WB cache below PoUIS level.

Signed-off-by: Shanker Donthineni <shankerd at codeaurora.org>
---
 arch/arm64/include/asm/cachetype.h |  6 ++++++
 arch/arm64/include/asm/cpucaps.h   |  3 ++-
 arch/arm64/kernel/cpufeature.c     | 12 ++++++++++++
 arch/arm64/kernel/cpuinfo.c        | 23 +++++++++++++++++++++++
 arch/arm64/mm/cache.S              |  3 +++
 5 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index f558869..f05974c 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -39,6 +39,12 @@
 
 extern unsigned long __icache_flags;
 
+extern bool __skip_dcache_pou;
+
+#define CLIDR_LOUIS_SHIFT	(21)
+#define CLIDR_LOUIS_MASK	(0x7)
+#define CLIDR_LOUIS(x)		(((x) >> CLIDR_LOUIS_SHIFT) & CLIDR_LOUIS_MASK)
+
 /*
  * NumSets, bits[27:13] - (Number of sets in cache) - 1
  * Associativity, bits[12:3] - (Associativity of cache) - 1
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 4174f09..6f4ea61 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -35,7 +35,8 @@
 #define ARM64_HYP_OFFSET_LOW			14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 #define ARM64_HAS_NO_FPSIMD			16
+#define ARM64_SKIP_DCACHE_POU			17
 
-#define ARM64_NCAPS				17
+#define ARM64_NCAPS				18
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fdf8f04..eaa86d1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -755,6 +755,12 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 					ID_AA64PFR0_FP_SHIFT) < 0;
 }
 
+static bool check_dcache_pou_skipped(const struct arm64_cpu_capabilities *entry,
+				     int __unused)
+{
+	return __skip_dcache_pou;
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -845,6 +851,12 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
+	{
+		.desc = "Skip data cache clean PoU operation",
+		.capability = ARM64_SKIP_DCACHE_POU,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = check_dcache_pou_skipped,
+	},
 	{},
 };
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 7b7be71..4fdbb55 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -50,6 +50,7 @@
 };
 
 unsigned long __icache_flags;
+bool __skip_dcache_pou = true;
 
 static const char *const hwcap_str[] = {
 	"fp",
@@ -305,6 +306,25 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
+/*
+ * Check if all the data cache levels below LoUIS doesn't support WB.
+ * Return value 1 if any one of cache level below LoUIS has WB cache
+ * else return value 0.
+ */
+static bool is_dcache_below_pou_wt(void)
+{
+	u32 louis = CLIDR_LOUIS(read_sysreg(clidr_el1));
+	u32 lvl, csidr;
+
+	for (lvl = 0; lvl < louis; lvl++) {
+		csidr = cache_get_ccsidr(lvl << 1);
+		if (csidr & CCSIDR_EL1_WRITE_BACK)
+			return false;
+	}
+
+	return true;
+}
+
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
 	info->reg_cntfrq = arch_timer_get_cntfrq();
@@ -345,6 +365,9 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	}
 
 	cpuinfo_detect_icache_policy(info);
+
+	if (__skip_dcache_pou)
+		__skip_dcache_pou = is_dcache_below_pou_wt();
 }
 
 void cpuinfo_store_cpu(void)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 83c27b6e..bb3cdb3 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -50,6 +50,7 @@ ENTRY(flush_icache_range)
  */
 ENTRY(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3
+	alternative_insn "nop", "b 2f", ARM64_SKIP_DCACHE_POU
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x0, x3
@@ -60,6 +61,7 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	b.lo	1b
 	dsb	ish
 
+2:
 	icache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x0, x3
@@ -104,6 +106,7 @@ ENDPIPROC(__flush_dcache_area)
  *	- size    - size in question
  */
 ENTRY(__clean_dcache_area_pou)
+	alternative_insn "nop", "ret", ARM64_SKIP_DCACHE_POU
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
 ENDPROC(__clean_dcache_area_pou)
-- 
Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.




More information about the linux-arm-kernel mailing list