Kexec on arm64

Arun Chandran achandran at mvista.com
Mon Aug 11 22:42:55 PDT 2014


Hi Geoff, Mark,


Sorry for top posting. I hope we have solved almost all the problems
with kexec in uni-processor scenario except converting soft_restart()
to assembly (I will give try to do this).

kexec is stress tested with L3 cache on with the below changes.
It ran for 17 hours and rebooted totally 8226 times without any problem.

Total LE boots - 4122
Total BE boots - 4104

Total LE to BE or BE to LE switching - 4112

##########################
diff --git a/arch/arm64/kernel/machine_kexec.c
b/arch/arm64/kernel/machine_kexec.c
index 363a246..5b15a00 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -623,7 +623,6 @@ static void kexec_list_flush_cb(void *ctx ,
unsigned int flag,
                break;
        case IND_SOURCE:
                __flush_dcache_area(addr, PAGE_SIZE);
-               __flush_dcache_area(dest, PAGE_SIZE);
                break;
        default:
                break;
diff --git a/arch/arm64/kernel/relocate_kernel.S
b/arch/arm64/kernel/relocate_kernel.S
index 4b077e1..e890516 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -31,6 +31,13 @@

 .align 3

+.macro dcache_line_size, reg, tmp
+mrs    \tmp, ctr_el0                   // read CTR
+ubfm   \tmp, \tmp, #16, #19            // cache line size encoding
+mov    \reg, #4                        // bytes per word
+lsl    \reg, \reg, \tmp                // actual cache line size
+.endm
+
 .globl relocate_new_kernel
 relocate_new_kernel:

@@ -56,25 +63,51 @@ relocate_new_kernel:
 .Ltest_source:
        tbz     x10, IND_SOURCE_BIT, .Ltest_indirection

-       /* source: copy_page(x20 = dest, x21 = addr) */
+       mov     x0, x13
+       add     x1, x13, #PAGE_SIZE
+
+       /* Invalidate the destination cache area to make sure that
+        * all the data required for the second stage kernel is
+        * intact at PoC. This is the safest place to do this activity
+        * as we are running with MMU and D-cache off.
+        */
+__inval_cache_range:
+       dcache_line_size x2, x3
+       sub     x3, x2, #1
+       tst     x1, x3                          // end cache line aligned?
+       bic     x1, x1, x3
+       b.eq    1f
+       dc      ivac, x1                        // invalidate D / U line
+1:     tst     x0, x3                          // start cache line aligned?
+       bic     x0, x0, x3
+       b.eq    2f
+       dc      ivac, x0                        // invalidate D / U line
+       b       3f
+2:     dc      ivac, x0                        // invalidate D / U line
+3:     add     x0, x0, x2
+       cmp     x0, x1
+       b.lo    2b
+       dsb     sy

+       /* source: copy_page(x20 = dest, x21 = addr) */
        mov x20, x13
        mov x21, x14

-       prfm    pldl1strm, [x21, #64]
-1:     ldp     x22, x23, [x21]
+       /*prfm  pldl1strm, [x21, #64] */
+.Lcopy_data:
+       ldp     x22, x23, [x21]
        ldp     x24, x25, [x21, #16]
        ldp     x26, x27, [x21, #32]
        ldp     x28, x29, [x21, #48]
        add     x21, x21, #64
-       prfm    pldl1strm, [x21, #64]
+       /*prfm  pldl1strm, [x21, #64]*/
        stnp    x22, x23, [x20]
        stnp    x24, x25, [x20, #16]
        stnp    x26, x27, [x20, #32]
        stnp    x28, x29, [x20, #48]
        add     x20, x20, #64
        tst     x21, #(PAGE_SIZE - 1)
-       b.ne    1b
+       b.ne    .Lcopy_data

        /* dest += PAGE_SIZE */

@@ -115,6 +148,11 @@ relocate_new_kernel:
        mov     x3, xzr

        ldr     x4, kexec_kimage_start
+
+       /* Clean entire I-cache */
+       ic      ialluis
+       isb
+       dsb     sy
        br      x4

 .align 3
################################

I have attached the patch for this.
Now I will try kexec in SMP configuration.

--Arun

On Fri, Aug 8, 2014 at 3:33 PM, Arun Chandran <achandran at mvista.com> wrote:
> On Fri, Aug 8, 2014 at 11:16 AM, Arun Chandran <achandran at mvista.com> wrote:
>> Hi,
>>
>> On Fri, Aug 8, 2014 at 1:37 AM, Geoff Levand <geoff at infradead.org> wrote:
>>> Hi Arun,
>>>
>>> On Wed, 2014-08-06 at 19:24 +0530, Arun Chandran wrote:
>>>
>>>> I have managed to run this test till 72 times with the
>>>> below changes.
>>>>
>>>> ############################
>>>> diff --git a/arch/arm64/kernel/machine_kexec.c
>>>> b/arch/arm64/kernel/machine_kexec.c
>>>> index 363a246..7de11ee 100644
>>>> --- a/arch/arm64/kernel/machine_kexec.c
>>>> +++ b/arch/arm64/kernel/machine_kexec.c
>>>> @@ -623,7 +623,6 @@ static void kexec_list_flush_cb(void *ctx ,
>>>> unsigned int flag,
>>>>   break;
>>>>   case IND_SOURCE:
>>>>   __flush_dcache_area(addr, PAGE_SIZE);
>>>> - __flush_dcache_area(dest, PAGE_SIZE);
>>>>   break;
>>>>   default:
>>>>   break;
>>>> @@ -641,6 +640,8 @@ void machine_kexec(struct kimage *image)
>>>>   phys_addr_t reboot_code_buffer_phys;
>>>>   void *reboot_code_buffer;
>>>>   struct kexec_ctx *ctx = kexec_image_to_ctx(image);
>>>> + unsigned long start, end;
>>>> + int i;
>>>>
>>>>   BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
>>>>   BUG_ON(num_online_cpus() > 1);
>>>> @@ -698,6 +699,20 @@ void machine_kexec(struct kimage *image)
>>>>
>>>>   kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
>>>>
>>>> + start = image->segment[0].mem;
>>>> + end = image->segment[0].mem + image->segment[0].memsz;
>>>> + for (i = 0; i < image->nr_segments; i++) {
>>>> + if (image->segment[i].mem > end)
>>>> + end = image->segment[i].mem + image->segment[i].memsz;
>>>> + }
>>>> +
>>>> + start = (unsigned long)phys_to_virt(start);
>>>> + end = (unsigned long)phys_to_virt(end);
>>>> + pr_info("flushing from %lx to %lx size = %lx\n", start, end, end - start);
>>>> + __flush_dcache_area((void *)start, end - start);
>>>> + //flush_icache_range(start, end);
>>>> + //mdelay(10);
>>>> +
>>>>   soft_restart(reboot_code_buffer_phys);
>>>>  }
>>>
>>> Doing the flush in kexec_list_flush_cb() is almost the same
>>> as using the image->segment to flush.  Did you see a
>>> difference on your system?
>>>
>>
>> Yes I can see the difference. Let me explain it in detail.
>>
>> I am doing a stress test of "kexec -e" with the below reboot
>> script.
>>
>> ################################
>> #!/bin/sh
>>
>> sleep 5
>> i=$RANDOM
>> j=$(( $i % 2))
>>
>> mount /dev/mmcblk0p1 /mnt
>> count=`cat /mnt/cnt`
>>
>> if [ $j -eq 0 ] ; then
>>     echo "KEXEC rebootng to BE count = $count"
>>     echo $RANDOM > /mnt/"$count""_BE"
>>     kexec -l /mnt/vmlinux_BE.strip
>> --command-line="console=ttyS0,115200 earlyprintk=uart8
>> 250-32bit,0x1c020000 debug swiotlb=65536 log_buf_len=4M"
>> else
>>    echo "KEXEC rebooting to LE count = $count"
>>    echo $RANDOM > /mnt/"$count""_LE"
>>     kexec -l /mnt/vmlinux_LE.strip
>> --command-line="console=ttyS0,115200 earlyprintk=uart8
>> 250-32bit,0x1c020000 debug swiotlb=65536 log_buf_len=4M"
>> fi
>>
>> count=$(( $count + 1 ))
>> echo "$count">/mnt/cnt
>> umount /mnt
>> kexec -e
>> exit $?
>> ###############################
>>
>> Observations with the default code
>> @https://git.linaro.org/people/geoff.levand/linux-kexec.git
>> Changed last on "Mon, 4 Aug 2014 23:24:10 +0000 (16:24 -0700)"
>>
>> a) LE to LE worked without L3 cache on
>> b) BE to BE worked without L3 cache on
>> c) Random endian switching does not work in any case (with L3, No L3)
>>     It breaks very early and unstable.
>>
>> Now with the below modifications
>>
>
> I think the more cleaner approach is to invalidate
> the cache lines from arch/arm64/kernel/relocate_kernel.S
> As this code is already aware of the destination it has
> to copy the 2nd stage kernel.
>
>
> ########################
> diff --git a/arch/arm64/kernel/relocate_kernel.S
> b/arch/arm64/kernel/relocate_kernel.S
> index 4b077e1..6880c1a 100644
> --- a/arch/arm64/kernel/relocate_kernel.S
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -31,6 +31,13 @@
>
>  .align 3
>
> +.macro dcache_line_size, reg, tmp
> +mrs    \tmp, ctr_el0                   // read CTR
> +ubfm   \tmp, \tmp, #16, #19            // cache line size encoding
> +mov    \reg, #4                        // bytes per word
> +lsl    \reg, \reg, \tmp                // actual cache line size
> +.endm
> +
>  .globl relocate_new_kernel
>  relocate_new_kernel:
>
> @@ -58,23 +65,46 @@ relocate_new_kernel:
>
>         /* source: copy_page(x20 = dest, x21 = addr) */
>
> +       mov     x0, x13
> +       add     x1, x13, #PAGE_SIZE
> +
> +       /* Invalidate the destination cache area */
> +__inval_cache_range:
> +       dcache_line_size x2, x3
> +       sub     x3, x2, #1
> +       tst     x1, x3                          // end cache line aligned?
> +       bic     x1, x1, x3
> +       b.eq    1f
> +       dc      civac, x1                       // clean & invalidate D / U line
> +1:     tst     x0, x3                          // start cache line aligned?
> +       bic     x0, x0, x3
> +       b.eq    2f
> +       dc      civac, x0                       // clean & invalidate D / U line
> +       b       3f
> +2:     dc      ivac, x0                        // invalidate D / U line
> +3:     add     x0, x0, x2
> +       cmp     x0, x1
> +       b.lo    2b
> +       dsb     sy
> +
>         mov x20, x13
>         mov x21, x14
>
> -       prfm    pldl1strm, [x21, #64]
> -1:     ldp     x22, x23, [x21]
> +       /*prfm  pldl1strm, [x21, #64] */
> +.Lcopy_data:
> +       ldp     x22, x23, [x21]
>         ldp     x24, x25, [x21, #16]
>         ldp     x26, x27, [x21, #32]
>         ldp     x28, x29, [x21, #48]
>         add     x21, x21, #64
> -       prfm    pldl1strm, [x21, #64]
> +       /*prfm  pldl1strm, [x21, #64]*/
>         stnp    x22, x23, [x20]
>         stnp    x24, x25, [x20, #16]
>         stnp    x26, x27, [x20, #32]
>         stnp    x28, x29, [x20, #48]
>         add     x20, x20, #64
>         tst     x21, #(PAGE_SIZE - 1)
> -       b.ne    1b
> +       b.ne    .Lcopy_data
>
>         /* dest += PAGE_SIZE */
>
> @@ -115,6 +145,8 @@ relocate_new_kernel:
>         mov     x3, xzr
>
>         ldr     x4, kexec_kimage_start
> +       dsb     sy
> +       isb
>         br      x4
>
>  .align 3
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index f1619c0..c62cba7 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -52,6 +52,13 @@
>   */
>  ENTRY(cpu_cache_off)
>         mrs     x0, sctlr_el1
> +       /* Turn off I-Cache */
> +       bic     x0, x0, #1 << 12                // clear SCTLR.C
> +       msr     sctlr_el1, x0
> +       isb
> +       dsb     sy
> +
> +       mrs     x0, sctlr_el1
>         bic     x0, x0, #1 << 2                 // clear SCTLR.C
>         msr     sctlr_el1, x0
>         isb
>
> #############################
>
> --Arun
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-LE-BE-switching-worked-with-L3.patch
Type: text/x-patch
Size: 2946 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20140812/1d732f96/attachment.bin>


More information about the linux-arm-kernel mailing list