[PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

Tue May 28 11:16:31 PDT 2024

> +	asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,			\
> +			     RISCV_ISA_EXT_ZACAS, 1)			\
> +			: : : : zacas);					\
> +									\
>  	__asm__ __volatile__ (						\
>  		prepend							\
>  		"0:	lr" lr_sfx " %0, %2\n"				\
>  		"	bne  %0, %z3, 1f\n"				\
> -		"	sc" sc_sfx " %1, %z4, %2\n"			\
> +		"	sc" sc_cas_sfx " %1, %z4, %2\n"			\
>  		"	bnez %1, 0b\n"					\
>  		append							\
>  		"1:\n"							\
>  		: "=&r" (r), "=&r" (__rc), "+A" (*(p))			\
>  		: "rJ" (co o), "rJ" (n)					\
>  		: "memory");						\
> +	goto end;							\
> +									\
> +zacas:									\
> +	__asm__ __volatile__ (						\
> +		prepend							\
> +		"	amocas" sc_cas_sfx " %0, %z2, %1\n"		\
> +		append							\
> +		: "+&r" (r), "+A" (*(p))				\
> +		: "rJ" (n)						\
> +		: "memory");						\

With this, a cmpxchg32() will result in something like

  amocas.w.rl     a5,a4,(s1)
  fence           rw,rw

(cf. my remarks in patch #4); this will/should provide enough sync,
but you might want to try the alternative and currently more common
mapping for "fully-ordered AMO sequences", aka

  amocas.w.aqrl   a5,a4,(s1)

Similarly for cmpxchg64 and other sizes.

  Andrea