[PATCH v2] ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions
Måns Rullgård
mans at mansr.com
Tue Nov 12 13:03:56 EST 2013
Nicolas Pitre <nicolas.pitre at linaro.org> writes:
> On Tue, 12 Nov 2013, Måns Rullgård wrote:
>
>> Nicolas Pitre <nicolas.pitre at linaro.org> writes:
>>
>> > On Tue, 12 Nov 2013, Ben Dooks wrote:
>> >
>> >> Given these are single instructoins for ARM, is it possible we could
>> >> make a table of all the callers and fix them up when we initialise
>> >> as we do for the SMP/UP case and for page-offset?
>> >
>> > Not really. Calls to those functions are generated by the compiler
>> > implicitly when a divisor operand is used and therefore we cannot
>> > annotate those calls. We'd have to use special accessors everywhere to
>> > replace the standard division operand (like we do for 64 by 32 bit
>> > divisions) but I doubt that people would accept that.
>>
>> It might be possible to extract this information from relocation tables.
>
> True, but only for individual .o files. Once the linker puts them
> together the information is lost, and trying to infer what the linker
> has done is insane.
>
> Filtering the compiler output to annotate idiv calls before it is
> assembled would probably be a better solution.
OK, here's an extremely ugly hootenanny of a patch. It seems to work on
an A7 Cubieboard2. I would never suggest actually doing this, but maybe
it can be useful for comparing performance against the more palatable
solutions.
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7397db6..cf1cd30 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -113,7 +113,7 @@ endif
endif
# Need -Uarm for gcc < 3.x
-KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
+KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -include asm/divhack.h
KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
CHECKFLAGS += -D__arm__
diff --git a/arch/arm/include/asm/divhack.h b/arch/arm/include/asm/divhack.h
new file mode 100644
index 0000000..c750b78
--- /dev/null
+++ b/arch/arm/include/asm/divhack.h
@@ -0,0 +1,23 @@
+__asm__ (".macro dobl tgt \n"
+ " .ifc \\tgt, __aeabi_idiv \n"
+ " .L.sdiv.\\@: \n"
+ " .pushsection .sdiv_tab.init, \"a\", %progbits \n"
+ " .word .L.sdiv.\\@ \n"
+ " .popsection \n"
+ " .endif \n"
+ " .ifc \\tgt, __aeabi_uidiv \n"
+ " .L.udiv.\\@: \n"
+ " .pushsection .udiv_tab.init, \"a\", %progbits \n"
+ " .word .L.udiv.\\@ \n"
+ " .popsection \n"
+ " .endif \n"
+ " bl \\tgt \n"
+ ".endm \n"
+ ".macro defbl \n"
+ " .macro bl tgt \n"
+ " .purgem bl \n"
+ " dobl \\tgt \n"
+ " defbl \n"
+ " .endm \n"
+ ".endm \n"
+ "defbl \n");
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 067815c1..b3a3fe1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -375,6 +375,18 @@ static void __init cpuid_init_hwcaps(void)
case 1:
elf_hwcap |= HWCAP_IDIVT;
}
+
+ if (!IS_ENABLED(CONFIG_THUMB2_KERNEL) && (elf_hwcap & HWCAP_IDIVA)) {
+ extern u32 __sdiv_tab_start, __sdiv_tab_end;
+ extern u32 __udiv_tab_start, __udiv_tab_end;
+ u32 *div;
+
+ for (div = &__sdiv_tab_start; div < &__sdiv_tab_end; div++)
+ *(u32 *)*div = 0xe710f110;
+
+ for (div = &__udiv_tab_start; div < &__udiv_tab_end; div++)
+ *(u32 *)*div = 0xe730f110;
+ }
}
static void __init feat_v6_fixup(void)
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb..3d5c103 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -176,6 +176,8 @@ SECTIONS
CON_INITCALL
SECURITY_INITCALL
INIT_RAM_FS
+ __sdiv_tab_start = .; *(.sdiv_tab.init); __sdiv_tab_end = .;
+ __udiv_tab_start = .; *(.udiv_tab.init); __udiv_tab_end = .;
}
#ifndef CONFIG_XIP_KERNEL
.exit.data : {
--
Måns Rullgård
mans at mansr.com
More information about the linux-arm-kernel
mailing list