[PATCH v4 1/1] riscv: mm: use svinval instructions instead of sfence.vma

Alexandre Ghiti alex at ghiti.fr
Wed Jun 21 08:44:25 PDT 2023


Hi Mayuresh,

On 21/06/2023 14:41, Mayuresh Chitale wrote:
> When svinval is supported the local_flush_tlb_page*
> functions would prefer to use the following sequence
> to optimize the tlb flushes instead of a simple sfence.vma:
>
> sfence.w.inval
> svinval.vma
>    .
>    .
> svinval.vma
> sfence.inval.ir
>
> The maximum number of consecutive svinval.vma instructions
> that can be executed in local_flush_tlb_page* functions is
> limited to PTRS_PER_PTE. This is required to avoid soft
> lockups and the approach is similar to that used in arm64.
>
> Signed-off-by: Mayuresh Chitale <mchitale at ventanamicro.com>
> ---
>   arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
>   1 file changed, 56 insertions(+), 6 deletions(-)
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 77be59aadc73..ade0b5cf8b47 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -5,6 +5,12 @@
>   #include <linux/sched.h>
>   #include <asm/sbi.h>
>   #include <asm/mmu_context.h>
> +#include <asm/hwcap.h>
> +#include <asm/insn-def.h>
> +
> +#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> +
> +static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;


The threshold is quite high to me: internally, we computed that 
something like 50 would be a good bet, because to me, 512 * sfence.vma 
(or svinval equivalent) takes way more time than just flushing the whole 
tlb (even with the whole refill I'd say). How did you get this number? 
And this value is micro-architecture dependent, so we need to find a 
consensus or a mechanism to allow a vendor to change it.

FYI, x86 threshold is 33 (can't find right now the pointer sorry).


>   
>   static inline void local_flush_tlb_all_asid(unsigned long asid)
>   {
> @@ -26,19 +32,63 @@ static inline void local_flush_tlb_page_asid(unsigned long addr,
>   static inline void local_flush_tlb_range(unsigned long start,
>   		unsigned long size, unsigned long stride)
>   {
> -	if (size <= stride)
> -		local_flush_tlb_page(start);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {


If size is not aligned on stride, you could get another page to flush so 
you do not respect the threshold. In my patchset, I used DIV_ROUND_UP.


> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, zero)
> +					     : : "r" (start) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page(start);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>   		local_flush_tlb_all();
> +	}
>   }
>   
>   static inline void local_flush_tlb_range_asid(unsigned long start,
>   		unsigned long size, unsigned long stride, unsigned long asid)
>   {
> -	if (size <= stride)
> -		local_flush_tlb_page_asid(start, asid);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {
> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
> +					     "r" (asid) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page_asid(start, asid);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>   		local_flush_tlb_all_asid(asid);
> +	}
>   }
>   
>   static void __ipi_flush_tlb_all(void *info)



More information about the linux-riscv mailing list