[PATCH] arm64: Add support for new control bits CTR_EL0.IDC and CTR_EL0.IDC

Shanker Donthineni shankerd at codeaurora.org
Fri Feb 16 16:57:46 PST 2018


Two point of unification cache maintenance operations 'DC CVAU' and
'IC IVAU' are optional for implementors as per ARMv8 specification.
This patch parses the updated CTR_EL0 register definition and adds
the required changes to skip POU operations if the hardware reports
CTR_EL0.IDC and/or CTR_EL0.IDC.

CTR_EL0.DIC: Instruction cache invalidation requirements for
 instruction to data coherence. The meaning of this bit[29].
  0: Instruction cache invalidation to the point of unification
     is required for instruction to data coherence.
  1: Instruction cache cleaning to the point of unification is
      not required for instruction to data coherence.

CTR_EL0.IDC: Data cache clean requirements for instruction to data
 coherence. The meaning of this bit[28].
  0: Data cache clean to the point of unification is required for
     instruction to data coherence, unless CLIDR_EL1.LoC == 0b000
     or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000).
  1: Data cache clean to the point of unification is not required
     for instruction to data coherence.

Signed-off-by: Philip Elcan <pelcan at codeaurora.org>
Signed-off-by: Shanker Donthineni <shankerd at codeaurora.org>
---
 arch/arm64/include/asm/assembler.h | 48 ++++++++++++++++++++++++--------------
 arch/arm64/include/asm/cache.h     |  2 ++
 arch/arm64/kernel/cpufeature.c     |  2 ++
 arch/arm64/mm/cache.S              | 26 ++++++++++++++-------
 4 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3c78835..9eaa948 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -30,6 +30,7 @@
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
+#include <asm/cache.h>
 
 	.macro save_and_disable_daif, flags
 	mrs	\flags, daif
@@ -334,9 +335,9 @@
  * raw_dcache_line_size - get the minimum D-cache line size on this CPU
  * from the CTR register.
  */
-	.macro	raw_dcache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	.macro	raw_dcache_line_size, reg, tmp, ctr
+	mrs	\ctr, ctr_el0			// read CTR
+	ubfm	\tmp, \ctr, #16, #19		// cache line size encoding
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
@@ -344,9 +345,9 @@
 /*
  * dcache_line_size - get the safe D-cache line size across all CPUs
  */
-	.macro	dcache_line_size, reg, tmp
-	read_ctr	\tmp
-	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
+	.macro	dcache_line_size, reg, tmp, ctr
+	read_ctr	\ctr
+	ubfm		\tmp, \ctr, #16, #19	// cache line size encoding
 	mov		\reg, #4		// bytes per word
 	lsl		\reg, \reg, \tmp	// actual cache line size
 	.endm
@@ -355,9 +356,9 @@
  * raw_icache_line_size - get the minimum I-cache line size on this CPU
  * from the CTR register.
  */
-	.macro	raw_icache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	and	\tmp, \tmp, #0xf		// cache line size encoding
+	.macro	raw_icache_line_size, reg, tmp, ctr
+	mrs	\ctr, ctr_el0			// read CTR
+	and	\tmp, \ctr, #0xf		// cache line size encoding
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
@@ -365,9 +366,9 @@
 /*
  * icache_line_size - get the safe I-cache line size across all CPUs
  */
-	.macro	icache_line_size, reg, tmp
-	read_ctr	\tmp
-	and		\tmp, \tmp, #0xf	// cache line size encoding
+	.macro	icache_line_size, reg, tmp, ctr
+	read_ctr	\ctr
+	and		\tmp, \ctr, #0xf	// cache line size encoding
 	mov		\reg, #4		// bytes per word
 	lsl		\reg, \reg, \tmp	// actual cache line size
 	.endm
@@ -408,13 +409,21 @@
  * 	size:		size of the region
  * 	Corrupts:	kaddr, size, tmp1, tmp2
  */
-	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
-	dcache_line_size \tmp1, \tmp2
+	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2, tmp3
+	dcache_line_size \tmp1, \tmp2, \tmp3
 	add	\size, \kaddr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\kaddr, \kaddr, \tmp2
 9998:
-	.if	(\op == cvau || \op == cvac)
+	.if	(\op == cvau)
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	tbnz	\tmp3, #CTR_IDC_SHIFT, 9997f
+	dc	cvau, \kaddr
+alternative_else
+	dc	civac, \kaddr
+	nop
+alternative_endif
+	.elseif (\op == cvac)
 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
 	dc	\op, \kaddr
 alternative_else
@@ -433,6 +442,7 @@
 	cmp	\kaddr, \size
 	b.lo	9998b
 	dsb	\domain
+9997:
 	.endm
 
 /*
@@ -441,10 +451,11 @@
  *
  * 	start, end:	virtual addresses describing the region
  *	label:		A label to branch to on user fault.
- * 	Corrupts:	tmp1, tmp2
+ * 	Corrupts:	tmp1, tmp2, tmp3
  */
-	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
-	icache_line_size \tmp1, \tmp2
+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, tmp3, label
+	icache_line_size \tmp1, \tmp2, \tmp3
+	tbnz    \tmp3, #CTR_DIC_SHIFT, 9996f
 	sub	\tmp2, \tmp1, #1
 	bic	\tmp2, \start, \tmp2
 9997:
@@ -454,6 +465,7 @@
 	b.lo	9997b
 	dsb	ish
 	isb
+9996:
 	.endm
 
 /*
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ea9bb4e..aea533b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -22,6 +22,8 @@
 #define CTR_L1IP_MASK		3
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
+#define CTR_IDC_SHIFT		28
+#define CTR_DIC_SHIFT		29
 
 #define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 29b1f87..f42bb5a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -200,6 +200,8 @@ static int __init register_cpu_hwcaps_dumper(void)
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 0),	/* DIC */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 0),	/* IDC */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 758bde7..5764af8 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,6 +24,7 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 #include <asm/asm-uaccess.h>
+#include <asm/cache.h>
 
 /*
  *	flush_icache_range(start,end)
@@ -50,7 +51,12 @@ ENTRY(flush_icache_range)
  */
 ENTRY(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3, x4
-	dcache_line_size x2, x3
+	dcache_line_size x2, x3, x4
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	tbnz    x4, #CTR_IDC_SHIFT, 8f
+alternative_else
+	nop
+alternative_endif
 	sub	x3, x2, #1
 	bic	x4, x0, x3
 1:
@@ -60,7 +66,9 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	b.lo	1b
 	dsb	ish
 
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
+8:
+	invalidate_icache_by_line x0, x1, x2, x3, x4, 9f
+
 	mov	x0, #0
 1:
 	uaccess_ttbr0_disable x1, x2
@@ -82,7 +90,7 @@ ENDPROC(__flush_cache_user_range)
 ENTRY(invalidate_icache_range)
 	uaccess_ttbr0_enable x2, x3, x4
 
-	invalidate_icache_by_line x0, x1, x2, x3, 2f
+	invalidate_icache_by_line x0, x1, x2, x3, x4, 2f
 	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2
@@ -102,7 +110,7 @@ ENDPROC(invalidate_icache_range)
  *	- size    - size in question
  */
 ENTRY(__flush_dcache_area)
-	dcache_by_line_op civac, sy, x0, x1, x2, x3
+	dcache_by_line_op civac, sy, x0, x1, x2, x3, x4
 	ret
 ENDPIPROC(__flush_dcache_area)
 
@@ -116,7 +124,7 @@ ENDPIPROC(__flush_dcache_area)
  *	- size    - size in question
  */
 ENTRY(__clean_dcache_area_pou)
-	dcache_by_line_op cvau, ish, x0, x1, x2, x3
+	dcache_by_line_op cvau, ish, x0, x1, x2, x3, x4
 	ret
 ENDPROC(__clean_dcache_area_pou)
 
@@ -140,7 +148,7 @@ ENTRY(__inval_dcache_area)
  */
 __dma_inv_area:
 	add	x1, x1, x0
-	dcache_line_size x2, x3
+	dcache_line_size x2, x3, x4
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
 	bic	x1, x1, x3
@@ -178,7 +186,7 @@ ENTRY(__clean_dcache_area_poc)
  *	- size    - size in question
  */
 __dma_clean_area:
-	dcache_by_line_op cvac, sy, x0, x1, x2, x3
+	dcache_by_line_op cvac, sy, x0, x1, x2, x3, x4
 	ret
 ENDPIPROC(__clean_dcache_area_poc)
 ENDPROC(__dma_clean_area)
@@ -193,7 +201,7 @@ ENDPROC(__dma_clean_area)
  *	- size    - size in question
  */
 ENTRY(__clean_dcache_area_pop)
-	dcache_by_line_op cvap, sy, x0, x1, x2, x3
+	dcache_by_line_op cvap, sy, x0, x1, x2, x3, x4
 	ret
 ENDPIPROC(__clean_dcache_area_pop)
 
@@ -206,7 +214,7 @@ ENDPIPROC(__clean_dcache_area_pop)
  *	- size    - size in question
  */
 ENTRY(__dma_flush_area)
-	dcache_by_line_op civac, sy, x0, x1, x2, x3
+	dcache_by_line_op civac, sy, x0, x1, x2, x3, x4
 	ret
 ENDPIPROC(__dma_flush_area)
 
-- 
Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.




More information about the linux-arm-kernel mailing list