Kexec on arm64

Arun Chandran achandran at mvista.com
Thu Aug 7 22:46:07 PDT 2014


Hi,

On Fri, Aug 8, 2014 at 1:37 AM, Geoff Levand <geoff at infradead.org> wrote:
> Hi Arun,
>
> On Wed, 2014-08-06 at 19:24 +0530, Arun Chandran wrote:
>
>> I have managed to run this test till 72 times with the
>> below changes.
>>
>> ############################
>> diff --git a/arch/arm64/kernel/machine_kexec.c
>> b/arch/arm64/kernel/machine_kexec.c
>> index 363a246..7de11ee 100644
>> --- a/arch/arm64/kernel/machine_kexec.c
>> +++ b/arch/arm64/kernel/machine_kexec.c
>> @@ -623,7 +623,6 @@ static void kexec_list_flush_cb(void *ctx ,
>> unsigned int flag,
>>   break;
>>   case IND_SOURCE:
>>   __flush_dcache_area(addr, PAGE_SIZE);
>> - __flush_dcache_area(dest, PAGE_SIZE);
>>   break;
>>   default:
>>   break;
>> @@ -641,6 +640,8 @@ void machine_kexec(struct kimage *image)
>>   phys_addr_t reboot_code_buffer_phys;
>>   void *reboot_code_buffer;
>>   struct kexec_ctx *ctx = kexec_image_to_ctx(image);
>> + unsigned long start, end;
>> + int i;
>>
>>   BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
>>   BUG_ON(num_online_cpus() > 1);
>> @@ -698,6 +699,20 @@ void machine_kexec(struct kimage *image)
>>
>>   kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
>>
>> + start = image->segment[0].mem;
>> + end = image->segment[0].mem + image->segment[0].memsz;
>> + for (i = 0; i < image->nr_segments; i++) {
>> + if (image->segment[i].mem > end)
>> + end = image->segment[i].mem + image->segment[i].memsz;
>> + }
>> +
>> + start = (unsigned long)phys_to_virt(start);
>> + end = (unsigned long)phys_to_virt(end);
>> + pr_info("flushing from %lx to %lx size = %lx\n", start, end, end - start);
>> + __flush_dcache_area((void *)start, end - start);
>> + //flush_icache_range(start, end);
>> + //mdelay(10);
>> +
>>   soft_restart(reboot_code_buffer_phys);
>>  }
>
> Doing the flush in kexec_list_flush_cb() is almost the same
> as using the image->segment to flush.  Did you see a
> difference on your system?
>

Yes I can see the difference. Let me explain it in detail.

I am doing a stress test of "kexec -e" with the below reboot
script.

################################
#!/bin/sh

sleep 5
i=$RANDOM
j=$(( $i % 2))

mount /dev/mmcblk0p1 /mnt
count=`cat /mnt/cnt`

if [ $j -eq 0 ] ; then
    echo "KEXEC rebootng to BE count = $count"
    echo $RANDOM > /mnt/"$count""_BE"
    kexec -l /mnt/vmlinux_BE.strip
--command-line="console=ttyS0,115200 earlyprintk=uart8
250-32bit,0x1c020000 debug swiotlb=65536 log_buf_len=4M"
else
   echo "KEXEC rebooting to LE count = $count"
   echo $RANDOM > /mnt/"$count""_LE"
    kexec -l /mnt/vmlinux_LE.strip
--command-line="console=ttyS0,115200 earlyprintk=uart8
250-32bit,0x1c020000 debug swiotlb=65536 log_buf_len=4M"
fi

count=$(( $count + 1 ))
echo "$count">/mnt/cnt
umount /mnt
kexec -e
exit $?
###############################

Observations with the default code
@https://git.linaro.org/people/geoff.levand/linux-kexec.git
Changed last on "Mon, 4 Aug 2014 23:24:10 +0000 (16:24 -0700)"

a) LE to LE worked without L3 cache on
b) BE to BE worked without L3 cache on
c) Random endian switching does not work in any case (with L3, No L3)
    It breaks very early and unstable.

Now with the below modifications

#############################
 diff --git a/arch/arm64/kernel/machine_kexec.c
b/arch/arm64/kernel/machine_kexec.c
index 363a246..571b68d 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -623,7 +623,6 @@ static void kexec_list_flush_cb(void *ctx ,
unsigned int flag,
                break;
        case IND_SOURCE:
                __flush_dcache_area(addr, PAGE_SIZE);
-               __flush_dcache_area(dest, PAGE_SIZE);
                break;
        default:
                break;
@@ -636,11 +635,13 @@ static void kexec_list_flush_cb(void *ctx ,
unsigned int flag,
  * Called from the core kexec code for a sys_reboot with
LINUX_REBOOT_CMD_KEXEC.
  */

+unsigned long dflush_start, dflush_end;
 void machine_kexec(struct kimage *image)
 {
        phys_addr_t reboot_code_buffer_phys;
        void *reboot_code_buffer;
        struct kexec_ctx *ctx = kexec_image_to_ctx(image);
+       int i;

        BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
        BUG_ON(num_online_cpus() > 1);
@@ -698,6 +699,19 @@ void machine_kexec(struct kimage *image)

        kexec_list_walk(NULL, image->head, kexec_list_flush_cb);

+       dflush_start = image->segment[0].mem;
+       dflush_end = image->segment[0].mem + image->segment[0].memsz;
+       for (i = 0; i < image->nr_segments; i++) {
+               if (image->segment[i].mem > dflush_end)
+                       dflush_end = image->segment[i].mem +
image->segment[i].memsz;
+       }
+
+       dflush_start = (unsigned long)phys_to_virt(dflush_start);
+       dflush_end = (unsigned long)phys_to_virt(dflush_end);
+
+       __flush_dcache_area((void *)&dflush_start, sizeof(dflush_start));
+       __flush_dcache_area((void *)&dflush_end, sizeof(dflush_end));
+
        soft_restart(reboot_code_buffer_phys);
 }

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index aa13521..b8c58d8 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -57,6 +57,7 @@ unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif

+extern unsigned long dflush_start, dflush_end;
 static void setup_restart(void)
 {
        /*
@@ -78,6 +79,8 @@ static void setup_restart(void)

        /* Push out any further dirty data, and ensure cache is empty */
        flush_cache_all();
+
+       __flush_dcache_area((void*)dflush_start, dflush_end - dflush_start);
 }

 void soft_restart(unsigned long addr)
diff --git a/arch/arm64/kernel/relocate_kernel.S
b/arch/arm64/kernel/relocate_kernel.S
index 4b077e1..a49549e 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -61,13 +61,13 @@ relocate_new_kernel:
        mov x20, x13
        mov x21, x14

-       prfm    pldl1strm, [x21, #64]
+       /*prfm  pldl1strm, [x21, #64] */
 1:     ldp     x22, x23, [x21]
        ldp     x24, x25, [x21, #16]
        ldp     x26, x27, [x21, #32]
        ldp     x28, x29, [x21, #48]
        add     x21, x21, #64
-       prfm    pldl1strm, [x21, #64]
+       /*prfm  pldl1strm, [x21, #64]*/
        stnp    x22, x23, [x20]
        stnp    x24, x25, [x20, #16]
        stnp    x26, x27, [x20, #32]
@@ -115,6 +115,8 @@ relocate_new_kernel:
        mov     x3, xzr

        ldr     x4, kexec_kimage_start
+       dsb     sy
+       isb
        br      x4

 .align 3
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index f1619c0..7d81b86 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -52,6 +52,12 @@
  */
 ENTRY(cpu_cache_off)
        mrs     x0, sctlr_el1
+       bic     x0, x0, #1 << 12                // clear SCTLR.C
+       msr     sctlr_el1, x0
+       isb
+       dsb     sy
+
+       mrs     x0, sctlr_el1
        bic     x0, x0, #1 << 2                 // clear SCTLR.C
        msr     sctlr_el1, x0
        isb
###########################

a) I am able to run random endian switching test for
11.5 hours without any breaks.

It rebooted totally 6542 times.
total LE boots = 3241
total BE boots = 3301

Out of that 1625 times it switched from "LE to BE"
or "BE to LE"

One major modification is flushing the Dcache area of
the new Image after turning off CPU caches.

This makes sure that L3 contains no lines in the
new "kernel Image area". I am still not sure what happens
with the other lines still in L3. Does the new kernel has
to do a __flush_dcahe_area() on all the new pages
it is gonna give to userspace?

Please refer to the discussion at
http://lists.linaro.org/pipermail/linaro-kernel/2013-August/006155.html
for more details.

It says that L3 cache becomes transparent when
lower level caches are OFF. So we need to clean
L3 when it is transparent.

And about adding barrier + removing pre-fetching  in
arch/arm64/kernel/relocate_kernel.S. I think it makes
sure that no stale data is present with CPU while
performing a endian switching. Am I right here?

There is one more change that is turning off Icache.
I am not sure why I did this. I will perform the test
without this change now.

>> diff --git a/arch/arm64/kernel/relocate_kernel.S
>> b/arch/arm64/kernel/relocate_kernel.S
>> index 4b077e1..a49549e 100644
>> --- a/arch/arm64/kernel/relocate_kernel.S
>> +++ b/arch/arm64/kernel/relocate_kernel.S
>
> I think these changes are good.  I'll add them in.

Yes Please. Thank you for letting it in.

--Arun



More information about the linux-arm-kernel mailing list