[RFC] mixture of cleanups to cache-v7.S
Russell King - ARM Linux
linux at arm.linux.org.uk
Fri Apr 3 03:08:48 PDT 2015
On Thu, Apr 02, 2015 at 11:57:59PM +0100, Russell King - ARM Linux wrote:
> On Thu, Apr 02, 2015 at 11:49:47PM +0100, Russell King - ARM Linux wrote:
> > Several cleanups are in the patch below... I'll separate them out, but
> > I'd like to hear comments on them. Basically:
> >
> > 1. cache-v7.S is built for ARMv7 CPUs, so there's no reason not to
> > use movw and movt when loading large constants, rather than using
> > "ldr rd,=constant"
> >
> > 2. we can do a much more efficient check for the errata in
> > v7_flush_dcache_louis than we were doing - rather than putting the
> > work-around code in the fast path, we can re-organise this such that
> > we only try to run the workaround code if the LoU field is zero.
> >
> > 3. shift the bitfield we want to extract in the CLIDR to the appropriate
> > bit position prior to masking; this reduces the complexity of the
> > code, particularly with the SMP differences in v7_flush_dcache_louis.
> >
> > 4. pre-shift the Cortex A9 MIDR value to be checked, and shift the
> > actual MIDR to lose the bottom four revision bits.
> >
> > 5. as the v7_flush_dcache_louis code is more optimal, I see no reason not
> > to enable this workaround by default now - if people really want it to
> > be disabled, they can still choose that option. This is in addition
> > to Versatile Express enabling it. Given the memory corrupting abilities
> > of not having this errata enabled, I think it's only sane that it's
> > something that should be encouraged to be enabled, even though it only
> > affects r0pX CPUs.
> >
> > One obvious issue comes up here though - in the case that the LoU bits
> > are validly zero, we merely return from v7_flush_dcache_louis with no
> > DSB or ISB. However v7_flush_dcache_all always has a DSB or ISB at the
> > end, even if LoC is zero. Is this an intentional difference, or should
> > v7_flush_dcache_louis always end with a DSB+ISB ?
>
> I should point out that if the DSB+ISB is needed, then the code can
> instead become as below - basically, we just move the CLIDR into the
> appropriate position and call start_flush_levels, which does the DMB,
> applies the mask to extract the appropriate field, and then decides
> whether it has any levels to process.
I've now tested this patch on the Versatile Express and SDP4430, and
both seem to work fine with the patch below.
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 2eb6de9465bf..c26dfef393cd 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -1139,6 +1139,7 @@ config ARM_ERRATA_742231
> config ARM_ERRATA_643719
> bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
> depends on CPU_V7 && SMP
> + default y
> help
> This option enables the workaround for the 643719 Cortex-A9 (prior to
> r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
> diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
> index b966656d2c2d..14bfdd584385 100644
> --- a/arch/arm/mm/cache-v7.S
> +++ b/arch/arm/mm/cache-v7.S
> @@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1)
> mcr p15, 2, r0, c0, c0, 0
> mrc p15, 1, r0, c0, c0, 0
>
> - ldr r1, =0x7fff
> + movw r1, #0x7fff
> and r2, r1, r0, lsr #13
>
> - ldr r1, =0x3ff
> + movw r1, #0x3ff
>
> and r3, r1, r0, lsr #3 @ NumWays - 1
> add r2, r2, #1 @ NumSets
> @@ -88,23 +88,20 @@ ENDPROC(v7_flush_icache_all)
> */
>
> ENTRY(v7_flush_dcache_louis)
> - dmb @ ensure ordering with previous memory accesses
> mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr
> - ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr
> - ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr
> +ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position
> +ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position
> #ifdef CONFIG_ARM_ERRATA_643719
> - ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register
> - ALT_UP(reteq lr) @ LoUU is zero, so nothing to do
> - ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p?
> - biceq r2, r2, #0x0000000f @ clear minor revision number
> - teqeq r2, r1 @ test for errata affected core and if so...
> - orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne')
> +ALT_SMP(ands r3, r3, #7 << 1) @ extract LoU field from clidr
> +ALT_UP( b start_flush_levels)
> + bne start_flush_levels @ LoU != 0, start flushing
> + mrc p15, 0, r2, c0, c0, 0 @ read main ID register
> + movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p?
> + movt r1, #:upper16:(0x410fc090 >> 4)
> + teq r1, r2, lsr #4 @ test for errata affected core and if so...
> + moveq r3, #1 << 1 @ fix LoUIS value (and set flags state to 'ne')
> #endif
> - ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2
> - ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2
> - reteq lr @ return if level == 0
> - mov r10, #0 @ r10 (starting level) = 0
> - b flush_levels @ start flushing cache levels
> + b start_flush_levels @ start flushing cache levels
> ENDPROC(v7_flush_dcache_louis)
>
> /*
> @@ -117,10 +114,11 @@ ENDPROC(v7_flush_dcache_louis)
> * - mm - mm_struct describing address space
> */
> ENTRY(v7_flush_dcache_all)
> - dmb @ ensure ordering with previous memory accesses
> mrc p15, 1, r0, c0, c0, 1 @ read clidr
> - ands r3, r0, #0x7000000 @ extract loc from clidr
> - mov r3, r3, lsr #23 @ left align loc bit field
> + mov r3, r0, lsr #23 @ align LoC
> +start_flush_levels:
> + dmb @ ensure ordering with previous memory accesses
> + ands r3, r3, #7 << 1 @ extract loc from clidr
> beq finished @ if loc is 0, then no need to clean
> mov r10, #0 @ start clean at cache level 0
> flush_levels:
> @@ -140,10 +138,10 @@ flush_levels:
> #endif
> and r2, r1, #7 @ extract the length of the cache lines
> add r2, r2, #4 @ add 4 (line length offset)
> - ldr r4, =0x3ff
> + movw r4, #0x3ff
> ands r4, r4, r1, lsr #3 @ find maximum number on the way size
> clz r5, r4 @ find bit position of way size increment
> - ldr r7, =0x7fff
> + movw r7, #0x7fff
> ands r7, r7, r1, lsr #13 @ extract max number of the index size
> loop1:
> mov r9, r7 @ create working copy of max index
>
> --
> FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
> according to speedtest.net.
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.
More information about the linux-arm-kernel
mailing list