[PATCH 3/3] arm64/locking: qspinlocks and qrwlocks support

Yury Norov ynorov at caviumnetworks.com
Wed Apr 26 08:39:47 EDT 2017


On Thu, Apr 20, 2017 at 09:05:30PM +0200, Peter Zijlstra wrote:
> On Thu, Apr 20, 2017 at 09:23:18PM +0300, Yury Norov wrote:
> > Is there some test to reproduce the locking failure for the case.
> 
> Possibly sysvsem stress before commit:
> 
>   27d7be1801a4 ("ipc/sem.c: avoid using spin_unlock_wait()")
> 
> Although a similar scheme is also used in nf_conntrack, see commit:
> 
>   b316ff783d17 ("locking/spinlock, netfilter: Fix nf_conntrack_lock() barriers")
> 
> > I
> > ask because I run loctorture for many hours on my qemu (emulating
> > cortex-a57), and I see no failures in the test reports. And Jan did it
> > on ThunderX, and Adam on QDF2400 without any problems. So even if I
> > rework those functions, how could I check them for correctness?
> 
> Running them doesn't prove them correct. Memory ordering bugs have been
> in the kernel for many years without 'ever' triggering. This is stuff
> you have to think about.
> 
> > Anyway, regarding the queued_spin_unlock_wait(), is my understanding
> > correct that you assume adding smp_mb() before entering the for(;;)
> > cycle, and using ldaxr/strxr instead of atomic_read()?
> 
> You'll have to ask Will, I always forget the arm64 details.

So, below is what I have. For queued_spin_unlock_wait() the generated
code is looking like this:
ffff0000080983a0 <queued_spin_unlock_wait>:
ffff0000080983a0:       d5033bbf        dmb     ish
ffff0000080983a4:       b9400007        ldr     w7, [x0]
ffff0000080983a8:       350000c7        cbnz    w7, ffff0000080983c0 <queued_spin_unlock_wait+0x20>
ffff0000080983ac:       1400000e        b       ffff0000080983e4 <queued_spin_unlock_wait+0x44>
ffff0000080983b0:       d503203f        yield
ffff0000080983b4:       d5033bbf        dmb     ish
ffff0000080983b8:       b9400007        ldr     w7, [x0]
ffff0000080983bc:       34000147        cbz     w7, ffff0000080983e4 <queued_spin_unlock_wait+0x44>
ffff0000080983c0:       f2401cff        tst     x7, #0xff
ffff0000080983c4:       54ffff60        b.eq    ffff0000080983b0 <queued_spin_unlock_wait+0x10>
ffff0000080983c8:       14000003        b       ffff0000080983d4 <queued_spin_unlock_wait+0x34>
ffff0000080983cc:       d503201f        nop
ffff0000080983d0:       d503203f        yield
ffff0000080983d4:       d5033bbf        dmb     ish
ffff0000080983d8:       b9400007        ldr     w7, [x0]
ffff0000080983dc:       f2401cff        tst     x7, #0xff
ffff0000080983e0:       54ffff81        b.ne    ffff0000080983d0 <queued_spin_unlock_wait+0x30>
ffff0000080983e4:       d50339bf        dmb     ishld
ffff0000080983e8:       d65f03c0        ret
ffff0000080983ec:       d503201f        nop

If I understand the documentation correctly, it's enough to check the lock
properly. If not - please give me the clue. Will?

Yury

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 22dbde97eefa..2d80161ee367 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -25,6 +25,8 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_QUEUED_RWLOCKS
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/include/asm/qrwlock.h b/arch/arm64/include/asm/qrwlock.h
new file mode 100644
index 000000000000..626f6ebfb52d
--- /dev/null
+++ b/arch/arm64/include/asm/qrwlock.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_ARM64_QRWLOCK_H
+#define _ASM_ARM64_QRWLOCK_H
+
+#include <asm-generic/qrwlock_types.h>
+#include <asm-generic/qrwlock.h>
+
+#endif /* _ASM_ARM64_QRWLOCK_H */
diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h
new file mode 100644
index 000000000000..09ef4f13f549
--- /dev/null
+++ b/arch/arm64/include/asm/qspinlock.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_ARM64_QSPINLOCK_H
+#define _ASM_ARM64_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm/atomic.h>
+
+extern void queued_spin_unlock_wait(struct qspinlock *lock);
+#define queued_spin_unlock_wait queued_spin_unlock_wait
+
+#define	queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	smp_store_release((u8 *)lock, 0);
+}
+
+#define queued_spin_is_locked queued_spin_is_locked
+/**
+ * queued_spin_is_locked - is the spinlock locked?
+ * @lock: Pointer to queued spinlock structure
+ * Return: 1 if it is locked, 0 otherwise
+ */
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+	/*
+	 * See queued_spin_unlock_wait().
+	 *
+	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
+	 * isn't immediately observable.
+	 */
+	smp_mb();
+	return atomic_read(&lock->val);
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_ARM64_QSPINLOCK_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..37713397e0c5 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -20,6 +20,10 @@
 #include <asm/spinlock_types.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
 /*
  * Spinlock implementation.
  *
@@ -187,6 +191,12 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#ifdef CONFIG_QUEUED_RWLOCKS
+#include <asm/qrwlock.h>
+#else
+
 /*
  * Write lock implementation.
  *
@@ -351,6 +361,8 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 /* read_can_lock - would read_trylock() succeed? */
 #define arch_read_can_lock(x)		((x)->lock < 0x80000000)
 
+#endif /* CONFIG_QUEUED_RWLOCKS */
+
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 55be59a35e3f..0f0f1561ab6a 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -16,9 +16,9 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
-#endif
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
 
 #include <linux/types.h>
 
@@ -36,10 +36,18 @@ typedef struct {
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
 
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#ifdef CONFIG_QUEUED_RWLOCKS
+#include <asm-generic/qrwlock_types.h>
+#else
+
 typedef struct {
 	volatile unsigned int lock;
 } arch_rwlock_t;
 
 #define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
+#endif /* CONFIG_QUEUED_RWLOCKS */
+
 #endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9d56467dc223..f48f6256e893 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -56,6 +56,7 @@ arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
+arm64-obj-$(CONFIG_QUEUED_SPINLOCKS)	+= qspinlock.o
 
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-$(CONFIG_ARM64_ILP32)		+= vdso-ilp32/
diff --git a/arch/arm64/kernel/qspinlock.c b/arch/arm64/kernel/qspinlock.c
new file mode 100644
index 000000000000..924f19953adb
--- /dev/null
+++ b/arch/arm64/kernel/qspinlock.c
@@ -0,0 +1,34 @@
+#include <asm/qspinlock.h>
+#include <asm/processor.h>
+
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+	u32 val;
+
+	for (;;) {
+		smp_mb();
+		val = atomic_read(&lock->val);
+
+		if (!val) /* not locked, we're done */
+			goto done;
+
+		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+			break;
+
+		/* not locked, but pending, wait until we observe the lock */
+		cpu_relax();
+	}
+
+	for (;;) {
+		smp_mb();
+		val = atomic_read(&lock->val);
+		if (!(val & _Q_LOCKED_MASK)) /* any unlock is good */
+			break;
+
+		cpu_relax();
+	}
+
+done:
+	smp_acquire__after_ctrl_dep();
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
-- 
2.11.0




More information about the linux-arm-kernel mailing list