[PATCH v2] ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions

Måns Rullgård mans at mansr.com
Tue Nov 12 13:03:56 EST 2013


Nicolas Pitre <nicolas.pitre at linaro.org> writes:

> On Tue, 12 Nov 2013, Måns Rullgård wrote:
>
>> Nicolas Pitre <nicolas.pitre at linaro.org> writes:
>> 
>> > On Tue, 12 Nov 2013, Ben Dooks wrote:
>> >
>> >> Given these are single instructoins for ARM, is it possible we could
>> >> make a table of all the callers and fix them up when we initialise
>> >> as we do for the SMP/UP case and for page-offset?
>> >
>> > Not really.  Calls to those functions are generated by the compiler 
>> > implicitly when a divisor operand is used and therefore we cannot 
>> > annotate those calls.  We'd have to use special accessors everywhere to 
>> > replace the standard division operand (like we do for 64 by 32 bit 
>> > divisions) but I doubt that people would accept that.
>> 
>> It might be possible to extract this information from relocation tables.
>
> True, but only for individual .o files.  Once the linker puts them 
> together the information is lost, and trying to infer what the linker 
> has done is insane.
>
> Filtering the compiler output to annotate idiv calls before it is 
> assembled would probably be a better solution.

OK, here's an extremely ugly hootenanny of a patch.  It seems to work on
an A7 Cubieboard2.  I would never suggest actually doing this, but maybe
it can be useful for comparing performance against the more palatable
solutions.

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7397db6..cf1cd30 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -113,7 +113,7 @@ endif
 endif
 
 # Need -Uarm for gcc < 3.x
-KBUILD_CFLAGS	+=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
+KBUILD_CFLAGS	+=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -include asm/divhack.h
 KBUILD_AFLAGS	+=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
 
 CHECKFLAGS	+= -D__arm__
diff --git a/arch/arm/include/asm/divhack.h b/arch/arm/include/asm/divhack.h
new file mode 100644
index 0000000..c750b78
--- /dev/null
+++ b/arch/arm/include/asm/divhack.h
@@ -0,0 +1,23 @@
+__asm__ (".macro dobl tgt                                       \n"
+         "    .ifc \\tgt, __aeabi_idiv                          \n"
+         "        .L.sdiv.\\@:                                  \n"
+         "        .pushsection .sdiv_tab.init, \"a\", %progbits \n"
+         "        .word    .L.sdiv.\\@                          \n"
+         "        .popsection                                   \n"
+         "    .endif                                            \n"
+         "    .ifc \\tgt, __aeabi_uidiv                         \n"
+         "        .L.udiv.\\@:                                  \n"
+         "        .pushsection .udiv_tab.init, \"a\", %progbits \n"
+         "        .word    .L.udiv.\\@                          \n"
+         "        .popsection                                   \n"
+         "    .endif                                            \n"
+         "    bl \\tgt                                          \n"
+         ".endm                                                 \n"
+         ".macro defbl                                          \n"
+         "    .macro bl tgt                                     \n"
+         "        .purgem bl                                    \n"
+         "        dobl \\tgt                                    \n"
+         "        defbl                                         \n"
+         "    .endm                                             \n"
+         ".endm                                                 \n"
+         "defbl                                                 \n");
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 067815c1..b3a3fe1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -375,6 +375,18 @@ static void __init cpuid_init_hwcaps(void)
 	case 1:
 		elf_hwcap |= HWCAP_IDIVT;
 	}
+
+	if (!IS_ENABLED(CONFIG_THUMB2_KERNEL) && (elf_hwcap & HWCAP_IDIVA)) {
+		extern u32 __sdiv_tab_start, __sdiv_tab_end;
+		extern u32 __udiv_tab_start, __udiv_tab_end;
+		u32 *div;
+
+		for (div = &__sdiv_tab_start; div < &__sdiv_tab_end; div++)
+			*(u32 *)*div = 0xe710f110;
+
+		for (div = &__udiv_tab_start; div < &__udiv_tab_end; div++)
+			*(u32 *)*div = 0xe730f110;
+	}
 }
 
 static void __init feat_v6_fixup(void)
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb..3d5c103 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -176,6 +176,8 @@ SECTIONS
 		CON_INITCALL
 		SECURITY_INITCALL
 		INIT_RAM_FS
+		__sdiv_tab_start = .; *(.sdiv_tab.init); __sdiv_tab_end = .;
+		__udiv_tab_start = .; *(.udiv_tab.init); __udiv_tab_end = .;
 	}
 #ifndef CONFIG_XIP_KERNEL
 	.exit.data : {


-- 
Måns Rullgård
mans at mansr.com



More information about the linux-arm-kernel mailing list