[PATCH-next v2] arm32: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION

liuyuntao (F) liuyuntao12 at huawei.com
Sat Mar 9 05:24:26 PST 2024



On 2024/3/9 16:20, Arnd Bergmann wrote:
> On Sat, Mar 9, 2024, at 07:14, liuyuntao (F) wrote:
>> On 2024/3/8 21:15, Arnd Bergmann wrote:
>>> On Thu, Mar 7, 2024, at 16:12, Yuntao Liu wrote:
>>
>> Thanks for the tests, CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and
>> CONFIG_TRIM_UNUSED_KSYMS do indeed result in a significant improvement.
>> I found that arm32 still doesn't support CONFIG_LTO_CLANG. I've done
>> some work on it, but without success. I'd like to learn more about the
>> CONFIG_LTO_CLANG patch. Do you have any relevant links?
> 
> I did not try to get it to boot and gave up when I did not see
> any size improvement. I think there were previous attempts to
> do it elsewhere, which I did not try to find.
> 

I tested this patch, the size improvement was only about one 
ten-thousandth, and the compilation time had increased by about a quarter,
and the kernel did not boot.

Strangely, LTO has actually increased the compilation time 
significantly, which seems contrary to its purpose.

           +          +trim      +dce       +trim+dce
no lto    5995384    5858720    5841024    5299032
lto       5990040    5854544    5839992    5289576
shrink    8.9‱     7.1‱     1.7‱     17.8‱


           +          +trim      +dce       +trim+dce
no lto    34.616     33.03      36.093     32.211
lto       46.881     45.324     47.247     43.246
increase  26.20%     27.10%     23.60%     25.50%



> The patch below makes it build, but it still requires disabling
> CONFIG_THUMB2_KERNEL, which totally defeats the purpose of shrinking
> the kernel as it adds some 40% size overhead in the vmlinux.
> There are probably also runtime bugs that get introduced by this.
> 
>       Arnd
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index de78ceb821df..7ebfda4839e8 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -2,6 +2,8 @@
>   config ARM
>   	bool
>   	default y
> +	select ARCH_SUPPORTS_LTO_CLANG
> +	select ARCH_SUPPORTS_LTO_CLANG_THIN
>   	select ARCH_32BIT_OFF_T
>   	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
>   	select ARCH_HAS_BINFMT_FLAT
> diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
> index 726ecabcef09..f2ddce451ab9 100644
> --- a/arch/arm/boot/compressed/Makefile
> +++ b/arch/arm/boot/compressed/Makefile
> @@ -9,6 +9,8 @@ OBJS		=
>   
>   HEAD	= head.o
>   OBJS	+= misc.o decompress.o
> +CFLAGS_REMOVE_misc.o += $(CC_FLAGS_LTO)
> +CFLAGS_REMOVE_decompress.o += $(CC_FLAGS_LTO)

Wow, I've encountered this issue before and didn't think to solve it in 
this way. You really have a thorough understanding of these parameters. 
On a side note, if CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is enabled, but 
only a few rodata sections are removed and no functions are eliminated, 
are there any compiler or linker options that can control this behavior?
thanks.

>   ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y)
>   OBJS	+= debug.o
>   AFLAGS_head.o += -DDEBUG
> diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
> index d19d140a10c7..aee9e13023a8 100644
> --- a/arch/arm/mm/flush.c
> +++ b/arch/arm/mm/flush.c
> @@ -38,15 +38,14 @@ EXPORT_SYMBOL(arm_heavy_mb);
>   static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
>   {
>   	unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
> -	const int zero = 0;
>   
>   	set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL));
>   
> -	asm(	"mcrr	p15, 0, %1, %0, c14\n"
> -	"	mcr	p15, 0, %2, c7, c10, 4"
> +	asm("mcrr	p15, 0, %1, %0, c14"
>   	    :
> -	    : "r" (to), "r" (to + PAGE_SIZE - 1), "r" (zero)
> +	    : "r" (to), "r" (to + PAGE_SIZE - 1)
>   	    : "cc");
> +	dsb();
>   }
>   
>   static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len)
> @@ -68,11 +67,11 @@ void flush_cache_mm(struct mm_struct *mm)
>   	}
>   
>   	if (cache_is_vipt_aliasing()) {
> -		asm(	"mcr	p15, 0, %0, c7, c14, 0\n"
> -		"	mcr	p15, 0, %0, c7, c10, 4"
> +		asm("mcr	p15, 0, %0, c7, c14, 0"
>   		    :
>   		    : "r" (0)
>   		    : "cc");
> +		dsb();
>   	}
>   }
>   
> @@ -84,11 +83,11 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
>   	}
>   
>   	if (cache_is_vipt_aliasing()) {
> -		asm(	"mcr	p15, 0, %0, c7, c14, 0\n"
> -		"	mcr	p15, 0, %0, c7, c10, 4"
> +		asm("mcr	p15, 0, %0, c7, c14, 0"
>   		    :
>   		    : "r" (0)
>   		    : "cc");
> +		dsb();
>   	}
>   
>   	if (vma->vm_flags & VM_EXEC)



More information about the linux-arm-kernel mailing list