[PATCH] ARM: Improve the L2 cache performance when PL310 is used

Srinidhi Kasagar srinidhikasagar at gmail.com
Tue Feb 15 06:29:23 EST 2011


Just curious to know, why the spinlock surrounding
l2x0_cache_sync still exists?  I see that
Catalin's first version adds void lock for PL310
as they are atomic.

am I missing some discussion thread which I have
not noticed?

srinidhi

On Sat, Mar 13, 2010 at 7:04 PM, Shilimkar, Santosh
<santosh.shilimkar at ti.com> wrote:
>> -----Original Message-----
>> From: Catalin Marinas [mailto:catalin.marinas at arm.com]
>> Sent: Tuesday, March 09, 2010 3:45 PM
>> To: linux-arm-kernel at lists.infradead.org
>> Cc: Shilimkar, Santosh; Russell King
>> Subject: [PATCH] ARM: Improve the L2 cache performance when PL310 is used
>>
>> With this L2 cache controller, the cache maintenance by PA and sync
>> operations are atomic and do not require a "wait" loop or spinlocks.
>> This patch conditionally defines the cache_wait() function and locking
>> primitives (rather than duplicating the functions or file).
>>
>> Since L2x0 cache controllers do not work with ARMv7 CPUs, the patch
>> automatically enables CACHE_PL310 when CPU_V7 is defined.
>>
>> Signed-off-by: Catalin Marinas <catalin.marinas at arm.com>
>> Cc: Santosh Shilimkar <santosh.shilimkar at ti.com>
>> Cc: Russell King <rmk at arm.linux.org.uk>
>> ---
>>
>> We did some benchmarks and the performance benefit of this patch with a
>> PL310 cache controller is considerable.
>>
> I tested this patch and indeed it improves the performance by
> 12 % to 28 % for buffers ranging from 1 MB to 10 MB.
>
>> I also considered separate functions in the same file or a separate file
>> but this would mean having to move the TI's workaround to a new file as
>> well. Suggestions welcome.
>>
>>
>>  arch/arm/mm/Kconfig      |    7 +++++
>>  arch/arm/mm/cache-l2x0.c |   70 ++++++++++++++++++++++++++++++++--------------
>>  2 files changed, 56 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
>> index ef61c93..0a59071 100644
>> --- a/arch/arm/mm/Kconfig
>> +++ b/arch/arm/mm/Kconfig
>> @@ -781,6 +781,13 @@ config CACHE_L2X0
>>       help
>>         This option enables the L2x0 PrimeCell.
>>
>> +config CACHE_PL310
>> +     bool
>> +     depends on CACHE_L2X0
>> +     default y if CPU_V7
>> +     help
>> +       This option enables support for the PL310 cache controller.
>> +
>>  config CACHE_TAUROS2
>>       bool "Enable the Tauros2 L2 cache controller"
>>       depends on ARCH_DOVE
>> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
>> index 9fc01b4..2a20d95 100644
>> --- a/arch/arm/mm/cache-l2x0.c
>> +++ b/arch/arm/mm/cache-l2x0.c
>> @@ -26,9 +26,21 @@
>>  #define CACHE_LINE_SIZE              32
>>
>>  static void __iomem *l2x0_base;
>> -static DEFINE_SPINLOCK(l2x0_lock);
>>  bool l2x0_disabled;
>>
>> +#ifdef CONFIG_CACHE_PL310
>> +static inline void cache_wait(void __iomem *reg, unsigned long mask)
>> +{
>> +     /* cache operations are atomic */
>> +}
>> +
>> +#define l2x0_lock(lock, flags)               ((void)(flags))
>> +#define l2x0_unlock(lock, flags)     ((void)(flags))
>> +
>> +#define block_end(start, end)                (end)
>> +
>> +#define L2CC_TYPE                    "PL310/L2C-310"
>> +#else
>>  static inline void cache_wait(void __iomem *reg, unsigned long mask)
>>  {
>>       /* wait for the operation to complete */
>> @@ -36,6 +48,22 @@ static inline void cache_wait(void __iomem *reg, unsigned long mask)
>>               ;
>>  }
>>
>> +static DEFINE_SPINLOCK(l2x0_lock);
>> +#define l2x0_lock(lock, flags)               spin_lock_irqsave(lock, flags)
>> +#define l2x0_unlock(lock, flags)     spin_unlock_irqrestore(lock, flags)
>> +
>> +#define block_end(start, end)                ((start) + min((end) - (start), 4096UL))
>> +
>> +#define L2CC_TYPE                    "L2x0"
>> +#endif
>> +
>> +static inline void cache_wait_always(void __iomem *reg, unsigned long mask)
>> +{
>> +     /* wait for the operation to complete */
>> +     while (readl(reg) & mask)
>> +             ;
>> +}
>> +
>>  static inline void cache_sync(void)
>>  {
>>       void __iomem *base = l2x0_base;
>> @@ -99,11 +127,11 @@ static inline void l2x0_inv_all(void)
>>       unsigned long flags;
>>
>>       /* invalidate all ways */
>> -     spin_lock_irqsave(&l2x0_lock, flags);
>> +     l2x0_lock(&l2x0_lock, flags);
>>       writel(0xff, l2x0_base + L2X0_INV_WAY);
>> -     cache_wait(l2x0_base + L2X0_INV_WAY, 0xff);
>> +     cache_wait_always(l2x0_base + L2X0_INV_WAY, 0xff);
>>       cache_sync();
>> -     spin_unlock_irqrestore(&l2x0_lock, flags);
>> +     l2x0_unlock(&l2x0_lock, flags);
>>  }
>>
>>  static void l2x0_inv_range(unsigned long start, unsigned long end)
>> @@ -111,7 +139,7 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
>>       void __iomem *base = l2x0_base;
>>       unsigned long flags;
>>
>> -     spin_lock_irqsave(&l2x0_lock, flags);
>> +     l2x0_lock(&l2x0_lock, flags);
>>       if (start & (CACHE_LINE_SIZE - 1)) {
>>               start &= ~(CACHE_LINE_SIZE - 1);
>>               debug_writel(0x03);
>> @@ -128,7 +156,7 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
>>       }
>>
>>       while (start < end) {
>> -             unsigned long blk_end = start + min(end - start, 4096UL);
>> +             unsigned long blk_end = block_end(start, end);
>>
>>               while (start < blk_end) {
>>                       l2x0_inv_line(start);
>> @@ -136,13 +164,13 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
>>               }
>>
>>               if (blk_end < end) {
>> -                     spin_unlock_irqrestore(&l2x0_lock, flags);
>> -                     spin_lock_irqsave(&l2x0_lock, flags);
>> +                     l2x0_unlock(&l2x0_lock, flags);
>> +                     l2x0_lock(&l2x0_lock, flags);
>>               }
>>       }
>>       cache_wait(base + L2X0_INV_LINE_PA, 1);
>>       cache_sync();
>> -     spin_unlock_irqrestore(&l2x0_lock, flags);
>> +     l2x0_unlock(&l2x0_lock, flags);
>>  }
>>
>>  static void l2x0_clean_range(unsigned long start, unsigned long end)
>> @@ -150,10 +178,10 @@ static void l2x0_clean_range(unsigned long start, unsigned long end)
>>       void __iomem *base = l2x0_base;
>>       unsigned long flags;
>>
>> -     spin_lock_irqsave(&l2x0_lock, flags);
>> +     l2x0_lock(&l2x0_lock, flags);
>>       start &= ~(CACHE_LINE_SIZE - 1);
>>       while (start < end) {
>> -             unsigned long blk_end = start + min(end - start, 4096UL);
>> +             unsigned long blk_end = block_end(start, end);
>>
>>               while (start < blk_end) {
>>                       l2x0_clean_line(start);
>> @@ -161,13 +189,13 @@ static void l2x0_clean_range(unsigned long start, unsigned long end)
>>               }
>>
>>               if (blk_end < end) {
>> -                     spin_unlock_irqrestore(&l2x0_lock, flags);
>> -                     spin_lock_irqsave(&l2x0_lock, flags);
>> +                     l2x0_unlock(&l2x0_lock, flags);
>> +                     l2x0_lock(&l2x0_lock, flags);
>>               }
>>       }
>>       cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
>>       cache_sync();
>> -     spin_unlock_irqrestore(&l2x0_lock, flags);
>> +     l2x0_unlock(&l2x0_lock, flags);
>>  }
>>
>>  static void l2x0_flush_range(unsigned long start, unsigned long end)
>> @@ -175,10 +203,10 @@ static void l2x0_flush_range(unsigned long start, unsigned long end)
>>       void __iomem *base = l2x0_base;
>>       unsigned long flags;
>>
>> -     spin_lock_irqsave(&l2x0_lock, flags);
>> +     l2x0_lock(&l2x0_lock, flags);
>>       start &= ~(CACHE_LINE_SIZE - 1);
>>       while (start < end) {
>> -             unsigned long blk_end = start + min(end - start, 4096UL);
>> +             unsigned long blk_end = block_end(start, end);
>>
>>               debug_writel(0x03);
>>               while (start < blk_end) {
>> @@ -188,13 +216,13 @@ static void l2x0_flush_range(unsigned long start, unsigned long end)
>>               debug_writel(0x00);
>>
>>               if (blk_end < end) {
>> -                     spin_unlock_irqrestore(&l2x0_lock, flags);
>> -                     spin_lock_irqsave(&l2x0_lock, flags);
>> +                     l2x0_unlock(&l2x0_lock, flags);
>> +                     l2x0_lock(&l2x0_lock, flags);
>>               }
>>       }
>>       cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
>>       cache_sync();
>> -     spin_unlock_irqrestore(&l2x0_lock, flags);
>> +     l2x0_unlock(&l2x0_lock, flags);
>>  }
>>
>>  void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
>> @@ -202,7 +230,7 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
>>       __u32 aux;
>>
>>       if (l2x0_disabled) {
>> -             printk(KERN_INFO "L2X0 cache controller disabled\n");
>> +             pr_info(L2CC_TYPE " cache controller disabled\n");
>>               return;
>>       }
>>
>> @@ -232,7 +260,7 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
>>       outer_cache.clean_range = l2x0_clean_range;
>>       outer_cache.flush_range = l2x0_flush_range;
>>
>> -     printk(KERN_INFO "L2X0 cache controller enabled\n");
>> +     pr_info(L2CC_TYPE " cache controller enabled\n");
>>  }
>>
>>  static int __init l2x0_disable(char *unused)
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>



More information about the linux-arm-kernel mailing list