[PATCH v2] arm64: optimize flush tlb kernel range
Kefeng Wang
wangkefeng.wang at huawei.com
Fri Sep 20 04:17:36 PDT 2024
On 2024/9/20 14:10, Anshuman Khandual wrote:
>
>
> On 9/20/24 09:25, Kefeng Wang wrote:
>> Currently the kernel TLBs is flushed page by page if the target
>> VA range is less than MAX_DVM_OPS * PAGE_SIZE, otherwise we'll
>> brutally issue a TLBI ALL.
>>
>> But we could optimize it when CPU supports TLB range operations,
>> convert to use __flush_tlb_range_op() like other tlb range flush
>> to improve performance.
>>
>> Co-developed-by: Yicong Yang <yangyicong at hisilicon.com>
>> Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang at huawei.com>
>> ---
>> v2:
>> - address Catalin's comments and use __flush_tlb_range_op() directly
>>
>> arch/arm64/include/asm/tlbflush.h | 24 +++++++++++++++++-------
>> 1 file changed, 17 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index 95fbc8c05607..42f0ec14fb2c 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -492,19 +492,29 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>>
>> static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>> {
>> - unsigned long addr;
>> + const unsigned long stride = PAGE_SIZE;
>> + unsigned long pages;
>> +
>> + start = round_down(start, stride);
>> + end = round_up(end, stride);
>> + pages = (end - start) >> PAGE_SHIFT;
>>
>> - if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
>> + /*
>> + * When not uses TLB range ops, we can handle up to
>> + * (MAX_DVM_OPS - 1) pages;
>> + * When uses TLB range ops, we can handle up to
>> + * MAX_TLBI_RANGE_PAGES pages.
>> + */
>> + if ((!system_supports_tlb_range() &&
>> + (end - start) >= (MAX_DVM_OPS * stride)) ||
>> + pages > MAX_TLBI_RANGE_PAGES) {
>> flush_tlb_all();
>> return;
>> }
>
> Could the above conditional check for flush_tlb_all() be factored out
> in a helper, which can also be used in __flush_tlb_range_nosync() ?
How about adding this helper, not good at naming,
diff --git a/arch/arm64/include/asm/tlbflush.h
b/arch/arm64/include/asm/tlbflush.h
index 42f0ec14fb2c..b7043ff0945f 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -431,6 +431,23 @@ do { \
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level,
false, kvm_lpa2_is_enabled());
+static inline int __flush_tlb_range_limit_excess(unsigned long start,
+ unsigned long end, unsigned long pages, unsigned long
stride)
+{
+ /*
+ * When not uses TLB range ops, we can handle up to
+ * (MAX_DVM_OPS - 1) pages;
+ * When uses TLB range ops, we can handle up to
+ * MAX_TLBI_RANGE_PAGES pages.
+ */
+ if ((!system_supports_tlb_range() &&
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
+ pages > MAX_TLBI_RANGE_PAGES)
+ return -ERANGE;
+
+ return 0;
+}
+
static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
unsigned long start, unsigned long
end,
unsigned long stride, bool last_level,
@@ -442,15 +459,7 @@ static inline void __flush_tlb_range_nosync(struct
vm_area_struct *vma,
end = round_up(end, stride);
pages = (end - start) >> PAGE_SHIFT;
- /*
- * When not uses TLB range ops, we can handle up to
- * (MAX_DVM_OPS - 1) pages;
- * When uses TLB range ops, we can handle up to
- * MAX_TLBI_RANGE_PAGES pages.
- */
- if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_DVM_OPS * stride)) ||
- pages > MAX_TLBI_RANGE_PAGES) {
+ if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
flush_tlb_mm(vma->vm_mm);
return;
}
More information about the linux-arm-kernel
mailing list