[PATCH v3 3/4] riscv: cmpxchg.h: Implement xchg for short

Thu Mar 25 07:55:36 GMT 2021

From: Guo Ren <guoren at linux.alibaba.com>

riscv only support lr.wd/s(c).w(d) with word(double word) size &
align access. There are not lr.h/sc.h instructions. But qspinlock.c
need xchg with short type variable:

xchg_tail -> xchg_releaxed(&lock->tail, ...

typedef struct qspinlock {
        union {
		atomic_t val;

		/*
		 * By using the whole 2nd least significant byte for the
		 * pending bit, we can allow better optimization of the lock
		 * acquisition for the pending bit holder.
		 */
		struct {
			u8	locked;
			u8	pending;
		};
		struct {
			u16	locked_pending;
			u16	tail; /* half word*/
		};
	};
} arch_spinlock_t;

So we add short emulation in xchg with word length and it only
solve qspinlock's requirement.

Michael has sent another implementation, see the Link below.

Signed-off-by: Guo Ren <guoren at linux.alibaba.com>
Co-developed-by: Michael Clark <michaeljclark at mac.com>
Tested-by: Guo Ren <guoren at linux.alibaba.com>
Link: https://lore.kernel.org/linux-riscv/20190211043829.30096-2-michaeljclark@mac.com/
Cc: Peter Zijlstra <peterz at infradead.org>
Cc: Anup Patel <anup at brainfault.org>
Cc: Arnd Bergmann <arnd at arndb.de>
Cc: Palmer Dabbelt <palmerdabbelt at google.com>
---
 arch/riscv/include/asm/cmpxchg.h | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 50513b95411d..5ca41152cf4b 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -22,7 +22,43 @@
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(new) __new = (new);					\
 	__typeof__(*(ptr)) __ret;					\
+	register unsigned long __rc, tmp, align, addr;			\
 	switch (size) {							\
+	case 2:								\
+		align = ((unsigned long) __ptr & 0x3);			\
+		addr = ((unsigned long) __ptr & ~0x3);			\
+		if (align) {						\
+			__asm__ __volatile__ (				\
+			"0:	lr.w	%0, (%4)	\n"		\
+			"	mv	%1, %0		\n"		\
+			"	slliw	%1, %1, 16	\n"		\
+			"	srliw	%1, %1, 16	\n"		\
+			"	mv	%2, %3		\n"		\
+			"	slliw	%2, %2, 16	\n"		\
+			"	or	%1, %2, %1	\n"		\
+			"	sc.w	%2, %1, (%4)	\n"		\
+			"	bnez	%2, 0b		\n"		\
+			"	srliw	%0, %0, 16	\n"		\
+			: "=&r" (__ret), "=&r" (tmp), "=&r" (__rc)	\
+			: "r" (__new), "r"(addr)			\
+			: "memory");					\
+		} else {						\
+			__asm__ __volatile__ (				\
+			"0:	lr.w	%0, (%4)	\n"		\
+			"	mv	%1, %0		\n"		\
+			"	srliw	%1, %1, 16	\n"		\
+			"	slliw	%1, %1, 16	\n"		\
+			"	mv	%2, %3		\n"		\
+			"	or	%1, %2, %1	\n"		\
+			"	sc.w	%2, %1, 0(%4)	\n"		\
+			"	bnez	%2, 0b		\n"		\
+			"	slliw	%0, %0, 16	\n"		\
+			"	srliw	%0, %0, 16	\n"		\
+			: "=&r" (__ret), "=&r" (tmp), "=&r" (__rc)	\
+			: "r" (__new), "r"(addr)			\
+			: "memory");					\
+		}							\
+		break;							\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"	amoswap.w %0, %2, %1\n"			\
-- 
2.17.1