[PATCH 2/5] ARM: Add Broadcom Brahma-B15 readahead cache support

Russell King - ARM Linux linux at arm.linux.org.uk
Mon Mar 16 14:02:51 PDT 2015


On Fri, Mar 06, 2015 at 04:54:50PM -0800, Florian Fainelli wrote:
> This patch adds support for the Broadcom Brahma-B15 CPU readahead cache
> controller. This cache controller sits between the L2 and the memory bus
> and its purpose is to provide a friendler burst size towards the DDR
> interface than the native cache line size.
> 
> The readahead cache is mostly transparent, except for
> flush_kern_cache_all, flush_kern_cache_louis and flush_icache_all, which
> is precisely what we are overriding here.
> 
> The readahead cache only intercepts reads, not writes, as such, some
> data can remain stale in any of its buffers, such that we need to flush
> it, which is an operation that needs to happen in a particular order:
> 
> - disable the readahead cache
> - flush it
> - call the appropriate cache-v7.S function
> - re-enable
> 
> This patch tries to minimize the impact to the cache-v7.S file by only
> providing a stub in case CONFIG_CACHE_B15_RAC is enabled (default for
> ARCH_BRCMSTB since it is the current user).
> 
> Signed-off-by: Alamy Liu <alamyliu at broadcom.com>
> Signed-off-by: Florian Fainelli <f.fainelli at gmail.com>
> ---
>  arch/arm/include/asm/cacheflush.h             |   2 +-
>  arch/arm/include/asm/glue-cache.h             |   4 +
>  arch/arm/include/asm/hardware/cache-b15-rac.h |  12 ++
>  arch/arm/mm/Kconfig                           |   8 ++
>  arch/arm/mm/Makefile                          |   1 +
>  arch/arm/mm/cache-b15-rac.c                   | 181 ++++++++++++++++++++++++++
>  6 files changed, 207 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm/include/asm/hardware/cache-b15-rac.h
>  create mode 100644 arch/arm/mm/cache-b15-rac.c
> 
> diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
> index 2d46862e7bef..4d847e185cf6 100644
> --- a/arch/arm/include/asm/cacheflush.h
> +++ b/arch/arm/include/asm/cacheflush.h
> @@ -199,7 +199,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
>   */
>  #if (defined(CONFIG_CPU_V7) && \
>       (defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K))) || \
> -	defined(CONFIG_SMP_ON_UP)
> +	defined(CONFIG_SMP_ON_UP) || defined(CONFIG_CACHE_B15_RAC)
>  #define __flush_icache_preferred	__cpuc_flush_icache_all
>  #elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
>  #define __flush_icache_preferred	__flush_icache_all_v7_smp
> diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
> index a3c24cd5b7c8..11f33b5f9284 100644
> --- a/arch/arm/include/asm/glue-cache.h
> +++ b/arch/arm/include/asm/glue-cache.h
> @@ -117,6 +117,10 @@
>  # endif
>  #endif
>  
> +#if defined(CONFIG_CACHE_B15_RAC)
> +# define MULTI_CACHE 1
> +#endif
> +
>  #if defined(CONFIG_CPU_V7M)
>  # ifdef _CACHE
>  #  define MULTI_CACHE 1
> diff --git a/arch/arm/include/asm/hardware/cache-b15-rac.h b/arch/arm/include/asm/hardware/cache-b15-rac.h
> new file mode 100644
> index 000000000000..76b888f53f90
> --- /dev/null
> +++ b/arch/arm/include/asm/hardware/cache-b15-rac.h
> @@ -0,0 +1,12 @@
> +#ifndef __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
> +#define __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
> +
> +#ifndef __ASSEMBLY__
> +
> +void b15_flush_kern_cache_all(void);
> +void b15_flush_kern_cache_louis(void);
> +void b15_flush_icache_all(void);
> +
> +#endif
> +
> +#endif
> diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
> index 9b4f29e595a4..4d5652a39304 100644
> --- a/arch/arm/mm/Kconfig
> +++ b/arch/arm/mm/Kconfig
> @@ -853,6 +853,14 @@ config OUTER_CACHE_SYNC
>  	  The outer cache has a outer_cache_fns.sync function pointer
>  	  that can be used to drain the write buffer of the outer cache.
>  
> +config CACHE_B15_RAC
> +	bool "Enable the Broadcom Brahma-B15 read-ahead cache controller"
> +	depends on ARCH_BRCMSTB
> +	default y
> +	help
> +	  This option enables the Broadcom Brahma-B15 read-ahead cache
> +	  controller. If disabled, the read-ahead cache remains off.
> +
>  config CACHE_FEROCEON_L2
>  	bool "Enable the Feroceon L2 cache controller"
>  	depends on ARCH_MV78XX0 || ARCH_MVEBU
> diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
> index d3afdf9eb65a..a6797fdb6721 100644
> --- a/arch/arm/mm/Makefile
> +++ b/arch/arm/mm/Makefile
> @@ -96,6 +96,7 @@ AFLAGS_proc-v6.o	:=-Wa,-march=armv6
>  AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
>  
>  obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o
> +obj-$(CONFIG_CACHE_B15_RAC)	+= cache-b15-rac.o
>  obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o
>  obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o
>  obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
> diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c
> new file mode 100644
> index 000000000000..1c5bca6e906b
> --- /dev/null
> +++ b/arch/arm/mm/cache-b15-rac.c
> @@ -0,0 +1,181 @@
> +/*
> + * Broadcom Brahma-B15 CPU read-ahead cache management functions
> + *
> + * Copyright (C) 2015, Broadcom Corporation
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/err.h>
> +#include <linux/spinlock.h>
> +#include <linux/io.h>
> +#include <linux/bitops.h>
> +#include <linux/of_address.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/hardware/cache-b15-rac.h>
> +
> +extern void v7_flush_kern_cache_all(void);
> +extern void v7_flush_kern_cache_louis(void);
> +extern void v7_flush_icache_all(void);
> +
> +/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
> +#define RAC_CONFIG0_REG			(0x78)
> +#define  RACENPREF_MASK			(0x3)
> +#define  RACPREFINST_SHIFT		(0)
> +#define  RACENINST_SHIFT		(2)
> +#define  RACPREFDATA_SHIFT		(4)
> +#define  RACENDATA_SHIFT		(6)
> +#define  RAC_CPU_SHIFT			(8)
> +#define  RACCFG_MASK			(0xff)
> +#define RAC_CONFIG1_REG			(0x7c)
> +#define RAC_FLUSH_REG			(0x80)
> +#define  FLUSH_RAC			(1 << 0)

					BIT(0) ?

> +
> +/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
> +#define RAC_DATA_INST_EN_MASK		(1 << RACPREFINST_SHIFT | \
> +					 RACENPREF_MASK << RACENINST_SHIFT | \
> +					 1 << RACPREFDATA_SHIFT | \
> +					 RACENPREF_MASK << RACENDATA_SHIFT)
> +
> +#define RAC_ENABLED			(1 << 0)

					BIT(0) ?

However, you don't use RAC_ENABLED as a bitmask, but a bit index, so
shouldn't this be zero?

> +
> +static void __iomem *b15_rac_base;
> +static DEFINE_SPINLOCK(rac_lock);
> +
> +/* Initialization flag to avoid checking for b15_rac_base, and to prevent
> + * multi-platform kernels from crashing here as well.
> + */
> +static unsigned long b15_rac_flags;
> +
> +static inline u32 __b15_rac_disable(void)
> +{
> +	u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
> +	__raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
> +	dmb();
> +	return val;
> +}
> +
> +static inline void __b15_rac_flush(void)
> +{
> +	u32 reg;
> +
> +	__raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG);
> +	do {
> +		/* This dmb() is required to force the Bus Interface Unit
> +		 * to clean oustanding writes, and forces an idle cycle
> +		 * to be inserted.
> +		 */
> +		dmb();
> +		reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG);
> +	} while (reg & RAC_FLUSH_REG);
> +}
> +
> +static inline u32 b15_rac_disable_and_flush(void)
> +{
> +	u32 reg;
> +
> +	reg = __b15_rac_disable();
> +	__b15_rac_flush();
> +	return reg;
> +}
> +
> +static inline void __b15_rac_enable(u32 val)
> +{
> +	__raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
> +	/* dsb() is required here to be consistent with __flush_icache_all() */
> +	dsb();
> +}
> +
> +#define BUILD_RAC_CACHE_OP(name, bar)				\
> +void b15_flush_##name(void)					\
> +{								\
> +	unsigned int do_flush;					\
> +	u32 val = 0;						\
> +								\
> +	spin_lock(&rac_lock);					\
> +	do_flush = test_bit(RAC_ENABLED, &b15_rac_flags);	\

Do you need to use test_bit() here?  You set and test this location
under a spinlock, so it's safe to use non-atomic ops here.

> +static void b15_rac_enable(void)
> +{
> +	unsigned int cpu;
> +	u32 enable = 0;
> +
> +	for_each_possible_cpu(cpu)
> +		enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));

		enable |= RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT);

You don't need the additional parens - the right hand side of |= is
already expected to be an expression by the compiler.

> +	spin_lock(&rac_lock);
> +	reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
> +	for_each_possible_cpu(cpu)
> +		en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));

		en_mask |= 1 << (RACPREFDATA_SHIFT + cpu * RAC_CPU_SHIFT);

looks nicer, rather than having two shifts.

What happens when the system goes down (eg, for kexec?)  Does the RAC
need to be disabled for that?

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.



More information about the linux-arm-kernel mailing list