[PATCH 4/6] ARM: locks: prefetch the destination word for write prior to strex
Nicolas Pitre
nicolas.pitre at linaro.org
Tue Jul 23 16:10:33 EDT 2013
On Tue, 23 Jul 2013, Will Deacon wrote:
> The cost of changing a cacheline from shared to exclusive state can be
> significant, especially when this is triggered by an exclusive store,
> since it may result in having to retry the transaction.
>
> This patch prefixes our {spin,read,write}_[try]lock implementations with
> pldw instructions (on CPUs which support them) to try and grab the line
> in exclusive state from the start.
>
> Signed-off-by: Will Deacon <will.deacon at arm.com>
> ---
> arch/arm/include/asm/spinlock.h | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
> index 0de7bec..3e1cc9d 100644
> --- a/arch/arm/include/asm/spinlock.h
> +++ b/arch/arm/include/asm/spinlock.h
> @@ -5,7 +5,7 @@
> #error SMP not supported on pre-ARMv6 CPUs
> #endif
>
> -#include <asm/processor.h>
> +#include <linux/prefetch.h>
>
> /*
> * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
> @@ -70,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
> u32 newval;
> arch_spinlock_t lockval;
>
> + prefetchw((const void *)&lock->slock);
Couldn't that cast be carried in the definition of prefetchw() instead?
Other than that:
Acked-by: Nicolas Pitre <nico at linaro.org>
> __asm__ __volatile__(
> "1: ldrex %0, [%3]\n"
> " add %1, %0, %4\n"
> @@ -93,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
> unsigned long contended, res;
> u32 slock;
>
> + prefetchw((const void *)&lock->slock);
> do {
> __asm__ __volatile__(
> " ldrex %0, [%3]\n"
> @@ -145,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
> {
> unsigned long tmp;
>
> + prefetchw((const void *)&rw->lock);
> __asm__ __volatile__(
> "1: ldrex %0, [%1]\n"
> " teq %0, #0\n"
> @@ -163,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
> {
> unsigned long tmp;
>
> + prefetchw((const void *)&rw->lock);
> __asm__ __volatile__(
> " ldrex %0, [%1]\n"
> " teq %0, #0\n"
> @@ -211,6 +215,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
> {
> unsigned long tmp, tmp2;
>
> + prefetchw((const void *)&rw->lock);
> __asm__ __volatile__(
> "1: ldrex %0, [%2]\n"
> " adds %0, %0, #1\n"
> @@ -231,6 +236,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
>
> smp_mb();
>
> + prefetchw((const void *)&rw->lock);
> __asm__ __volatile__(
> "1: ldrex %0, [%2]\n"
> " sub %0, %0, #1\n"
> @@ -249,6 +255,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
> {
> unsigned long tmp, tmp2 = 1;
>
> + prefetchw((const void *)&rw->lock);
> __asm__ __volatile__(
> " ldrex %0, [%2]\n"
> " adds %0, %0, #1\n"
> --
> 1.8.2.2
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
More information about the linux-arm-kernel
mailing list