[PATCH v3 4/6] ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses
Sricharan R
r.sricharan at ti.com
Fri Oct 4 01:37:33 EDT 2013
Hi,
On Friday 04 October 2013 05:47 AM, Nicolas Pitre wrote:
> On Thu, 3 Oct 2013, Santosh Shilimkar wrote:
>
>> From: Sricharan R <r.sricharan at ti.com>
>>
>> The current phys_to_virt patching mechanism works only for 32 bit
>> physical addresses and this patch extends the idea for 64bit physical
>> addresses.
>>
>> The 64bit v2p patching mechanism patches the higher 8 bits of physical
>> address with a constant using 'mov' instruction and lower 32bits are patched
>> using 'add'. While this is correct, in those platforms where the lowmem addressable
>> physical memory spawns across 4GB boundary, a carry bit can be produced as a
>> result of addition of lower 32bits. This has to be taken in to account and added
>> in to the upper. The patched __pv_offset and va are added in lower 32bits, where
>> __pv_offset can be in two's complement form when PA_START < VA_START and that can
>> result in a false carry bit.
>>
>> e.g
>> 1) PA = 0x80000000; VA = 0xC0000000
>> __pv_offset = PA - VA = 0xC0000000 (2's complement)
>>
>> 2) PA = 0x2 80000000; VA = 0xC000000
>> __pv_offset = PA - VA = 0x1 C0000000
>>
>> So adding __pv_offset + VA should never result in a true overflow for (1).
>> So in order to differentiate between a true carry, a __pv_offset is extended
>> to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is
>> 2's complement. So 'mvn #0' is inserted instead of 'mov' while patching
>> for the same reason. Since mov, add, sub instruction are to patched
>> with different constants inside the same stub, the rotation field
>> of the opcode is using to differentiate between them.
>>
>> So the above examples for v2p translation becomes for VA=0xC0000000,
>> 1) PA[63:32] = 0xffffffff
>> PA[31:0] = VA + 0xC0000000 --> results in a carry
>> PA[63:32] = PA[63:32] + carry
>>
>> PA[63:0] = 0x0 80000000
>>
>> 2) PA[63:32] = 0x1
>> PA[31:0] = VA + 0xC0000000 --> results in a carry
>> PA[63:32] = PA[63:32] + carry
>>
>> PA[63:0] = 0x2 80000000
>>
>> The above ideas were suggested by Nicolas Pitre <nico at linaro.org> as
>> part of the review of first and second versions of the subject patch.
>>
>> There is no corresponding change on the phys_to_virt() side, because
>> computations on the upper 32-bits would be discarded anyway.
>>
>> Cc: Nicolas Pitre <nico at linaro.org>
>> Cc: Russell King <linux at arm.linux.org.uk>
>>
>> Signed-off-by: Sricharan R <r.sricharan at ti.com>
>> Signed-off-by: Santosh Shilimkar <santosh.shilimkar at ti.com>
> Almost there ...
>
>> ---
>> arch/arm/include/asm/memory.h | 35 +++++++++++++++++++++---
>> arch/arm/kernel/armksyms.c | 1 +
>> arch/arm/kernel/head.S | 60 ++++++++++++++++++++++++++---------------
>> arch/arm/kernel/patch.c | 3 +++
>> 4 files changed, 75 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
>> index d9b96c65..942ad84 100644
>> --- a/arch/arm/include/asm/memory.h
>> +++ b/arch/arm/include/asm/memory.h
>> @@ -172,9 +172,12 @@
>> * so that all we need to do is modify the 8-bit constant field.
>> */
>> #define __PV_BITS_31_24 0x81000000
>> +#define __PV_BITS_7_0 0x81
>>
>> extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
>> -extern unsigned long __pv_phys_offset;
>> +extern u64 __pv_phys_offset;
>> +extern u64 __pv_offset;
>> +
>> #define PHYS_OFFSET __pv_phys_offset
>>
>> #define __pv_stub(from,to,instr,type) \
>> @@ -186,10 +189,36 @@ extern unsigned long __pv_phys_offset;
>> : "=r" (to) \
>> : "r" (from), "I" (type))
>>
>> +#define __pv_stub_mov_hi(t) \
>> + __asm__ volatile("@ __pv_stub_mov\n" \
>> + "1: mov %R0, %1\n" \
>> + " .pushsection .pv_table,\"a\"\n" \
>> + " .long 1b\n" \
>> + " .popsection\n" \
>> + : "=r" (t) \
>> + : "I" (__PV_BITS_7_0))
>> +
>> +#define __pv_add_carry_stub(x, y) \
>> + __asm__ volatile("@ __pv_add_carry_stub\n" \
>> + "1: adds %Q0, %1, %2\n" \
>> + " adc %R0, %R0, #0\n" \
>> + " .pushsection .pv_table,\"a\"\n" \
>> + " .long 1b\n" \
>> + " .popsection\n" \
>> + : "+r" (y) \
>> + : "r" (x), "I" (__PV_BITS_31_24) \
> The third operand i.e. __PV_BITS_31_24 is useless here.
This is used to encode the correct rotation and we use
this in the patching code to identify the the offset to
be patched.
>> + : "cc")
>> +
>> static inline phys_addr_t __virt_to_phys(unsigned long x)
>> {
>> - unsigned long t;
>> - __pv_stub(x, t, "add", __PV_BITS_31_24);
>> + phys_addr_t t;
>> +
>> + if (sizeof(phys_addr_t) == 4) {
>> + __pv_stub(x, t, "add", __PV_BITS_31_24);
>> + } else {
>> + __pv_stub_mov_hi(t);
>> + __pv_add_carry_stub(x, t);
>> + }
>> return t;
>> }
>>
>> diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
>> index 60d3b73..1f031dd 100644
>> --- a/arch/arm/kernel/armksyms.c
>> +++ b/arch/arm/kernel/armksyms.c
>> @@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc);
>>
>> #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
>> EXPORT_SYMBOL(__pv_phys_offset);
>> +EXPORT_SYMBOL(__pv_offset);
>> #endif
>> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
>> index 2c7cc1e..90d04d7 100644
>> --- a/arch/arm/kernel/head.S
>> +++ b/arch/arm/kernel/head.S
>> @@ -536,6 +536,14 @@ ENTRY(fixup_smp)
>> ldmfd sp!, {r4 - r6, pc}
>> ENDPROC(fixup_smp)
>>
>> +#ifdef __ARMEB_
>> +#define LOW_OFFSET 0x4
>> +#define HIGH_OFFSET 0x0
>> +#else
>> +#define LOW_OFFSET 0x0
>> +#define HIGH_OFFSET 0x4
>> +#endif
>> +
>> #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
>>
>> /* __fixup_pv_table - patch the stub instructions with the delta between
>> @@ -546,17 +554,20 @@ ENDPROC(fixup_smp)
>> __HEAD
>> __fixup_pv_table:
>> adr r0, 1f
>> - ldmia r0, {r3-r5, r7}
>> - sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
>> + ldmia r0, {r3-r7}
>> + mvn ip, #0
>> + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
>> add r4, r4, r3 @ adjust table start address
>> add r5, r5, r3 @ adjust table end address
>> - add r7, r7, r3 @ adjust __pv_phys_offset address
>> - str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset
>> + add r6, r6, r3 @ adjust __pv_phys_offset address
>> + add r7, r7, r3 @ adjust __pv_offset address
>> + str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset
>> + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits
>> mov r6, r3, lsr #24 @ constant for add/sub instructions
>> teq r3, r6, lsl #24 @ must be 16MiB aligned
>> THUMB( it ne @ cross section branch )
>> bne __error
>> - str r6, [r7, #4] @ save to __pv_offset
>> + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits
>> b __fixup_a_pv_table
>> ENDPROC(__fixup_pv_table)
>>
>> @@ -565,9 +576,18 @@ ENDPROC(__fixup_pv_table)
>> .long __pv_table_begin
>> .long __pv_table_end
>> 2: .long __pv_phys_offset
>> + .long __pv_offset
>>
>> .text
>> __fixup_a_pv_table:
>> + adr r0, 3f
>> + ldr r6, [r0]
>> + add r6, r6, r3
>> + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word
>> + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word
>> + mov r6, r6, lsr #24
>> + cmn r0, #1
>> + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
>> #ifdef CONFIG_THUMB2_KERNEL
>> lsls r6, #24
>> beq 2f
>> @@ -582,9 +602,15 @@ __fixup_a_pv_table:
>> b 2f
>> 1: add r7, r3
>> ldrh ip, [r7, #2]
>> - and ip, 0x8f00
>> - orr ip, r6 @ mask in offset bits 31-24
>> + tst ip, #0x4000
>> + and ip, #0x8f00
>> + orrne ip, r6 @ mask in offset bits 31-24
>> + orreq ip, r0 @ mask in offset bits 7-0
>> strh ip, [r7, #2]
>> + ldrheq ip, [r7]
>> + biceq ip, #0x20
>> + orreq ip, ip, r0, lsr #16
>> + strheq ip, [r7]
>> 2: cmp r4, r5
>> ldrcc r7, [r4], #4 @ use branch for delay slot
>> bcc 1b
>> @@ -593,7 +619,10 @@ __fixup_a_pv_table:
>> b 2f
>> 1: ldr ip, [r7, r3]
>> bic ip, ip, #0x000000ff
>> - orr ip, ip, r6 @ mask in offset bits 31-24
>> + tst ip, #0xf00 @ check the rotation field
>> + orrne ip, ip, r6 @ mask in offset bits 31-24
>> + biceq ip, ip, #0x400000 @ clear bit 22
>> + orreq ip, ip, r0 @ mask in offset bits 7-0
>> str ip, [r7, r3]
>> 2: cmp r4, r5
>> ldrcc r7, [r4], #4 @ use branch for delay slot
>> @@ -602,28 +631,17 @@ __fixup_a_pv_table:
>> #endif
>> ENDPROC(__fixup_a_pv_table)
>>
>> +3: .long __pv_offset
>> +
>> ENTRY(fixup_pv_table)
>> stmfd sp!, {r4 - r7, lr}
>> - ldr r2, 2f @ get address of __pv_phys_offset
>> mov r3, #0 @ no offset
>> mov r4, r0 @ r0 = table start
>> add r5, r0, r1 @ r1 = table size
>> - ldr r6, [r2, #4] @ get __pv_offset
>> bl __fixup_a_pv_table
>> ldmfd sp!, {r4 - r7, pc}
>> ENDPROC(fixup_pv_table)
>>
>> - .align
>> -2: .long __pv_phys_offset
>> -
>> - .data
>> - .globl __pv_phys_offset
>> - .type __pv_phys_offset, %object
>> -__pv_phys_offset:
>> - .long 0
>> - .size __pv_phys_offset, . - __pv_phys_offset
>> -__pv_offset:
>> - .long 0
>> #endif
>>
>> #include "head-common.S"
>> diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
>> index 07314af..8356312 100644
>> --- a/arch/arm/kernel/patch.c
>> +++ b/arch/arm/kernel/patch.c
>> @@ -8,6 +8,9 @@
>>
>> #include "patch.h"
>>
>> +u64 __pv_phys_offset __attribute__((section(".data")));
>> +u64 __pv_offset __attribute__((section(".data")));
> Please add a comment explaining why you force those variables out of the
> .bss section. This is unlikely to be obvious to people.
>
> In fact, is there a reason why you moved those out of head.S? You only
> needed to replace the .long with .quad to match the u64 type.
>
> I think I might have suggested moving them out if they were to be typed
> with phys_addr_t, but using a fixed u64 is simpler.
Yes, I moved it here after your comments :-) . Since it is always u64
i can move it to head.S with quad as well.
Regards,
Sricharan
More information about the linux-arm-kernel
mailing list