[PATCH v4] arch/riscv: add Zihintpause support

Samuel Holland samuel at sholland.org
Thu Aug 18 22:00:42 PDT 2022


On 8/16/22 11:04 AM, Conor.Dooley at microchip.com wrote:
> On 16/08/2022 16:54, Palmer Dabbelt wrote:
>> On Fri, 12 Aug 2022 00:21:40 PDT (-0700), Conor.Dooley at microchip.com wrote:
>>> On 12/08/2022 07:57, Conor Dooley - M52691 wrote:
>>>> On 11/08/2022 16:17, Palmer Dabbelt wrote:
>>>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>>>
>>>>> On Mon, 20 Jun 2022 13:15:25 PDT (-0700), daolu at rivosinc.com wrote:
>>>>>> Implement support for the ZiHintPause extension.
>>>>>>
>>>>>> The PAUSE instruction is a HINT that indicates the current hart’s rate
>>>>>> of instruction retirement should be temporarily reduced or paused.
>>>>>>
>>>>>> Reviewed-by: Heiko Stuebner <heiko at sntech.de>
>>>>>> Tested-by: Heiko Stuebner <heiko at sntech.de>
>>>>>> Signed-off-by: Dao Lu <daolu at rivosinc.com>
>>>>>> ---
>>>>>>
>>>>>> v1 -> v2:
>>>>>>  Remove the usage of static branch, use PAUSE if toolchain supports it
>>>>>> v2 -> v3:
>>>>>>  Added the static branch back, cpu_relax() behavior is kept the same for
>>>>>> systems that do not support ZiHintPause
>>>>>> v3 -> v4:
>>>>>>  Adopted the newly added unified static keys for extensions
>>>>>> ---
>>>>>>  arch/riscv/Makefile                     |  4 ++++
>>>>>>  arch/riscv/include/asm/hwcap.h          |  5 +++++
>>>>>>  arch/riscv/include/asm/vdso/processor.h | 21 ++++++++++++++++++---
>>>>>>  arch/riscv/kernel/cpu.c                 |  1 +
>>>>>>  arch/riscv/kernel/cpufeature.c          |  1 +
>>>>>>  5 files changed, 29 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>>>>>> index 34cf8a598617..6ddacc6f44b9 100644
>>>>>> --- a/arch/riscv/Makefile
>>>>>> +++ b/arch/riscv/Makefile
>>>>>> @@ -56,6 +56,10 @@ riscv-march-$(CONFIG_RISCV_ISA_C)  := $(riscv-march-y)c
>>>>>>  toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
>>>>>>  riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
>>>>>>
>>>>>> +# Check if the toolchain supports Zihintpause extension
>>>>>> +toolchain-supports-zihintpause := $(call cc-option-yn, -march=$(riscv-march-y)_zihintpause)
>>>>>> +riscv-march-$(toolchain-supports-zihintpause) := $(riscv-march-y)_zihintpause
>>>>>> +
>>>>>>  KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
>>>>>>  KBUILD_AFLAGS += -march=$(riscv-march-y)
>>>>>>
>>>>>> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
>>>>>> index e48eebdd2631..dc47019a0b38 100644
>>>>>> --- a/arch/riscv/include/asm/hwcap.h
>>>>>> +++ b/arch/riscv/include/asm/hwcap.h
>>>>>> @@ -8,6 +8,7 @@
>>>>>>  #ifndef _ASM_RISCV_HWCAP_H
>>>>>>  #define _ASM_RISCV_HWCAP_H
>>>>>>
>>>>>> +#include <asm/errno.h>
>>>>>>  #include <linux/bits.h>
>>>>>>  #include <uapi/asm/hwcap.h>
>>>>>>
>>>>>> @@ -54,6 +55,7 @@ extern unsigned long elf_hwcap;
>>>>>>  enum riscv_isa_ext_id {
>>>>>>       RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
>>>>>>       RISCV_ISA_EXT_SVPBMT,
>>>>>> +     RISCV_ISA_EXT_ZIHINTPAUSE,
>>>>>>       RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
>>>>>>  };
>>>>>>
>>>>>> @@ -64,6 +66,7 @@ enum riscv_isa_ext_id {
>>>>>>   */
>>>>>>  enum riscv_isa_ext_key {
>>>>>>       RISCV_ISA_EXT_KEY_FPU,          /* For 'F' and 'D' */
>>>>>> +     RISCV_ISA_EXT_KEY_ZIHINTPAUSE,
>>>>>>       RISCV_ISA_EXT_KEY_MAX,
>>>>>>  };
>>>>>>
>>>>>> @@ -83,6 +86,8 @@ static __always_inline int riscv_isa_ext2key(int num)
>>>>>>               return RISCV_ISA_EXT_KEY_FPU;
>>>>>>       case RISCV_ISA_EXT_d:
>>>>>>               return RISCV_ISA_EXT_KEY_FPU;
>>>>>> +     case RISCV_ISA_EXT_ZIHINTPAUSE:
>>>>>> +             return RISCV_ISA_EXT_KEY_ZIHINTPAUSE;
>>>>>>       default:
>>>>>>               return -EINVAL;
>>>>>>       }
>>>>>> diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
>>>>>> index 134388cbaaa1..1e4f8b4aef79 100644
>>>>>> --- a/arch/riscv/include/asm/vdso/processor.h
>>>>>> +++ b/arch/riscv/include/asm/vdso/processor.h
>>>>>> @@ -4,15 +4,30 @@
>>>>>>
>>>>>>  #ifndef __ASSEMBLY__
>>>>>>
>>>>>> +#include <linux/jump_label.h>
>>>>>>  #include <asm/barrier.h>
>>>>>> +#include <asm/hwcap.h>
>>>>>>
>>>>>>  static inline void cpu_relax(void)
>>>>>>  {
>>>>>> +     if (!static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_ZIHINTPAUSE])) {
>>>>>>  #ifdef __riscv_muldiv
>>>>>> -     int dummy;
>>>>>> -     /* In lieu of a halt instruction, induce a long-latency stall. */
>>>>>> -     __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
>>>>>> +             int dummy;
>>>>>> +             /* In lieu of a halt instruction, induce a long-latency stall. */
>>>>>> +             __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
>>>>>>  #endif
>>>>>> +     } else {
>>>>>> +             /*
>>>>>> +              * Reduce instruction retirement.
>>>>>> +              * This assumes the PC changes.
>>>>>> +              */
>>>>>> +#ifdef __riscv_zihintpause
>>>>>> +             __asm__ __volatile__ ("pause");
>>>>>> +#else
>>>>>> +             /* Encoding of the pause instruction */
>>>>>> +             __asm__ __volatile__ (".4byte 0x100000F");
>>>>>> +#endif
>>>>>> +     }
>>>>>>       barrier();
>>>>>>  }
>>>>>>
>>>>>> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
>>>>>> index fba9e9f46a8c..a123e92b14dd 100644
>>>>>> --- a/arch/riscv/kernel/cpu.c
>>>>>> +++ b/arch/riscv/kernel/cpu.c
>>>>>> @@ -89,6 +89,7 @@ int riscv_of_parent_hartid(struct device_node *node)
>>>>>>  static struct riscv_isa_ext_data isa_ext_arr[] = {
>>>>>>       __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
>>>>>>       __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
>>>>>> +     __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
>>>>>>       __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
>>>>>>  };
>>>>>>
>>>>>> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>>>>>> index 1b3ec44e25f5..708df2c0bc34 100644
>>>>>> --- a/arch/riscv/kernel/cpufeature.c
>>>>>> +++ b/arch/riscv/kernel/cpufeature.c
>>>>>> @@ -198,6 +198,7 @@ void __init riscv_fill_hwcap(void)
>>>>>>                       } else {
>>>>>>                               SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
>>>>>>                               SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
>>>>>> +                             SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
>>>>>>                       }
>>>>>>  #undef SET_ISA_EXT_MAP
>>>>>>               }
>>>>>
>>>>> Thanks, this is on for-next.  It needs a sparse patch, which I put in as a link.
>>>>
>>>> This breaks the C=1 build for all toolchains, not just new ones as your sparse
>>>> patch suggests. I amn't 100% what my CI is running, but I replicated this on
>>>> my own machine with:
>>>
>>> Argh, I went poking around and my toolchain's binutils etc is newer than I thought.
>>> Good for people searching on lore I suppose...
>>> Sorry!
>>
>> So just to be clear: you're saying this only breaks with new binutils?
> 
> Yes, sorry - I *thought* the binutils in our CI predated Zihintpause but
> it doesn't. It (and so would the Zicbom stuff) breaks the builds in our CI
> as they run with C=1. I manually patched sparse to get that going again & I
> /suspect/ it may have impacted LKP too.
> 
> "New" is relative though - it breaks C=1 for anyone running a toolchain
> from riscv-collab/riscv-gnu-toolchain.
> 
> I guess it's just that Zicbom is newer so not many people will be on a
> toolchain that supports that. My GCC doesn't only my clang-15.
> 
>> I ask because Dao is also seeing some crashes, if it's breaking arbitrary
>> builds too then it's a stronger hint to revert it.
> 
> It is breaking module loading on RISC-V in general from what it seems.
> See:
> https://lore.kernel.org/linux-riscv/728ecbd5-975a-168e-efab-3c0030be21d5@w6rz.net/

This patch also completely breaks the build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y:

In file included from <command-line>:
./arch/riscv/include/asm/jump_label.h: In function 'cpu_relax':
././include/linux/compiler_types.h:285:33: warning: 'asm' operand 0 probably
does not match constraints
  285 | #define asm_volatile_goto(x...) asm goto(x)
      |                                 ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro
'asm_volatile_goto'
   41 |         asm_volatile_goto(
      |         ^~~~~~~~~~~~~~~~~
././include/linux/compiler_types.h:285:33: error: impossible constraint in 'asm'
  285 | #define asm_volatile_goto(x...) asm goto(x)
      |                                 ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro
'asm_volatile_goto'
   41 |         asm_volatile_goto(
      |         ^~~~~~~~~~~~~~~~~
make[1]: *** [scripts/Makefile.build:249:
arch/riscv/kernel/vdso/vgettimeofday.o] Error 1
make: *** [arch/riscv/Makefile:128: vdso_prepare] Error 2

Putting a static branch in such a widely-inlined function seems like a bad idea
in general. (A quick measurement of my somewhat-minimal config shows this single
static branch as responsible for 40% of the jump table.)

I don't think it's even necessary in this case. Why can't we unconditionally run
both instructions? If Zihintpause is supported, we trade the nop from the static
branch for a div. If it is unsupported, we trade the jump from the static branch
for a nop.

Regards,
Samuel



More information about the linux-riscv mailing list