[PATCH v2] arm64: optimize flush tlb kernel range

Fri Sep 20 04:17:36 PDT 2024

On 2024/9/20 14:10, Anshuman Khandual wrote:
> 
> 
> On 9/20/24 09:25, Kefeng Wang wrote:
>> Currently the kernel TLBs is flushed page by page if the target
>> VA range is less than MAX_DVM_OPS * PAGE_SIZE, otherwise we'll
>> brutally issue a TLBI ALL.
>>
>> But we could optimize it when CPU supports TLB range operations,
>> convert to use __flush_tlb_range_op() like other tlb range flush
>> to improve performance.
>>
>> Co-developed-by: Yicong Yang <yangyicong at hisilicon.com>
>> Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang at huawei.com>
>> ---
>> v2:
>>   - address Catalin's comments and use __flush_tlb_range_op() directly
>>
>>   arch/arm64/include/asm/tlbflush.h | 24 +++++++++++++++++-------
>>   1 file changed, 17 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index 95fbc8c05607..42f0ec14fb2c 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -492,19 +492,29 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>>   
>>   static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>>   {
>> -	unsigned long addr;
>> +	const unsigned long stride = PAGE_SIZE;
>> +	unsigned long pages;
>> +
>> +	start = round_down(start, stride);
>> +	end = round_up(end, stride);
>> +	pages = (end - start) >> PAGE_SHIFT;
>>   
>> -	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
>> +	/*
>> +	 * When not uses TLB range ops, we can handle up to
>> +	 * (MAX_DVM_OPS - 1) pages;
>> +	 * When uses TLB range ops, we can handle up to
>> +	 * MAX_TLBI_RANGE_PAGES pages.
>> +	 */
>> +	if ((!system_supports_tlb_range() &&
>> +	     (end - start) >= (MAX_DVM_OPS * stride)) ||
>> +	    pages > MAX_TLBI_RANGE_PAGES) {
>>   		flush_tlb_all();
>>   		return;
>>   	}
> 
> Could the above conditional check for flush_tlb_all() be factored out
> in a helper, which can also be used in __flush_tlb_range_nosync() ?

How about adding this helper, not good at naming,

diff --git a/arch/arm64/include/asm/tlbflush.h 
b/arch/arm64/include/asm/tlbflush.h
index 42f0ec14fb2c..b7043ff0945f 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -431,6 +431,23 @@ do {                        \
  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
         __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, 
false, kvm_lpa2_is_enabled());

+static inline int __flush_tlb_range_limit_excess(unsigned long start,
+               unsigned long end, unsigned long pages, unsigned long 
stride)
+{
+       /*
+        * When not uses TLB range ops, we can handle up to
+        * (MAX_DVM_OPS - 1) pages;
+        * When uses TLB range ops, we can handle up to
+        * MAX_TLBI_RANGE_PAGES pages.
+        */
+       if ((!system_supports_tlb_range() &&
+            (end - start) >= (MAX_DVM_OPS * stride)) ||
+           pages > MAX_TLBI_RANGE_PAGES)
+               return -ERANGE;
+
+       return 0;
+}
+
  static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
                                      unsigned long start, unsigned long 
end,
                                      unsigned long stride, bool last_level,
@@ -442,15 +459,7 @@ static inline void __flush_tlb_range_nosync(struct 
vm_area_struct *vma,
         end = round_up(end, stride);
         pages = (end - start) >> PAGE_SHIFT;

-       /*
-        * When not uses TLB range ops, we can handle up to
-        * (MAX_DVM_OPS - 1) pages;
-        * When uses TLB range ops, we can handle up to
-        * MAX_TLBI_RANGE_PAGES pages.
-        */
-       if ((!system_supports_tlb_range() &&
-            (end - start) >= (MAX_DVM_OPS * stride)) ||
-           pages > MAX_TLBI_RANGE_PAGES) {
+       if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
                 flush_tlb_mm(vma->vm_mm);
                 return;
         }