Kexec and KVM not working gracefully together
Frediano Ziglio
freddy77 at gmail.com
Thu Feb 5 08:56:42 PST 2015
2015-02-05 14:27 GMT+00:00 Frediano Ziglio <freddy77 at gmail.com>:
> 2015-02-05 13:32 GMT+00:00 Marc Zyngier <marc.zyngier at arm.com>:
>> On 05/02/15 10:51, Frediano Ziglio wrote:
>>> 2015-02-05 10:27 GMT+00:00 AKASHI Takahiro <takahiro.akashi at linaro.org>:
>>>> On 02/05/2015 06:43 PM, Marc Zyngier wrote:
>>>>>
>>>>> On Thu, 5 Feb 2015 06:17:04 +0000
>>>>> AKASHI Takahiro <takahiro.akashi at linaro.org> wrote:
>>>>>
>>>>>> Frediano
>>>>>> Cc: Marc
>>>>>>
>>>>>> Are you going to fix this issue on arm?
>>>>>> As I'm now working on the same issue on arm64,
>>>>>> we should share the idea and some code.
>>>>>
>>>>>
>>>>> Just to be perfectly clear: KVM support for Kexec on arm and arm64
>>>>> should be exactly the same, with the exception of some (very
>>>>> limited) assembly code for the MMU teardown.
>>>>
>>>>
>>>> Yep, that's exactly what I meant.
>>>>
>>>>> And I'd like to see both architectures addressed at the same time,
>>>>> without statements such as "we'll do that as a next step". As such, I'd
>>>>> very much recommend that Frediano and you work together to address this
>>>>> problem.
>>>>
>>>>
>>>> I hope so.
>>>>
>>>> Thanks,
>>>> -Takahiro AKASHI
>>>>
>>>>> Thanks,
>>>>>
>>>>> M.
>>>>>
>>>
>>>
>>>
>>> Well, let's code speak. I'm actually testing it with an ARM32 platform
>>> with a single CPU (platform have 16 cores but I'm using a no SMP
>>> kernel for my tests for different reason, I should be able to test SMP
>>> in a couple of days).
>>>
>>> The key point is the switch back code (arm.c, kvm_cpu_reset code). The idea:
>>> - at boot time I save HSCTLR value
>>
>> Why do you need this? The content of that register is known at any point
>> of the lifetime of the kernel, and so is its value at reboot time
>> (basically MMU and caches disabled, ARM mode).
>>
>> This should be part of your MMU teardown process.
>>
>
> I should have put a RFC in the subject. Probably I can just adjust the
> value without saving the boot one.
>
>>> - restore boot page tables and trampoline before cpu reset
>>> - call kvm_cpu_reset instead of cpu_reset (this will call cpu_reset if
>>> kvm is disabled)
>>> - set hvbar to trampoline (virtual memory)
>>> - set hvbar to physical trampoline and boot pages (with trampoline
>>> mapped at physical and virtual that is identity)
>>> - flush memory while disabling caching on hypervisor mode (not doing
>>> cause a lot of instability, now I rebooted more than 100 times in a
>>> row)
>>> - call hypervisor mode to do the reset cpu restoring the boot HSCTLR value
>>>
>>>
>>> Hope gmail does not screw my patch...
>>>
>>>
>>> diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
>>> index 3a67bec..71380a1 100644
>>> --- a/arch/arm/include/asm/kvm_asm.h
>>> +++ b/arch/arm/include/asm/kvm_asm.h
>>> @@ -97,7 +97,24 @@ extern char __kvm_hyp_code_end[];
>>> extern void __kvm_flush_vm_context(void);
>>> extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
>>>
>>> +extern void __kvm_set_vectors(unsigned long phys_vector_base);
>>> +
>>> extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>>> +
>>> +#ifdef CONFIG_HAVE_KVM
>>
>> That doesn't seem right. Shouldn't that be #ifndef?
>>
>
> Well... I don't know where I got that define, the name is wrong too.
> However I think that the kernel should switch to hypervisor mode
> either if kvm is enabled or not. head.S test for CONFIG_ARM_VIRT_EXT
> so probably I should have the same test here. I should also try to
> compile with either KVM enabled or not just to check that kernel links
> correctly.
>
>>> +static void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
>>> +{
>>> + phys_reset(addr);
>>> +}
>>> +static inline int kvm_mmu_reset_prepare(void)
>>> +{
>>> + return 0;
>>> +}
>>> +#else
>>> +extern void kvm_cpu_reset(void (*phys_reset)(void *), void *addr);
>>> +extern int kvm_mmu_reset_prepare(void);
>>> +#endif
>>> +
>>> #endif
>>>
>>> #endif /* __ARM_KVM_ASM_H__ */
>>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>>> index 04b4ea0..c0b6c20 100644
>>> --- a/arch/arm/include/asm/kvm_host.h
>>> +++ b/arch/arm/include/asm/kvm_host.h
>>> @@ -192,6 +192,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu,
>>> const struct kvm_one_reg *);
>>> int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>> int exception_index);
>>>
>>> +extern unsigned kvm_saved_sctlr;
>>> +
>>> static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
>>> phys_addr_t pgd_ptr,
>>> unsigned long hyp_stack_ptr,
>>> @@ -213,7 +215,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t
>>> boot_pgd_ptr,
>>> * PCS!).
>>> */
>>>
>>> - kvm_call_hyp(NULL, 0, boot_pgd_ptr);
>>> + kvm_saved_sctlr = (unsigned) kvm_call_hyp(NULL, 0, boot_pgd_ptr);
>>>
>>> kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
>>> }
>>> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
>>> index fdfa3a7..026a1e4 100644
>>> --- a/arch/arm/kernel/process.c
>>> +++ b/arch/arm/kernel/process.c
>>> @@ -41,6 +41,7 @@
>>> #include <asm/system_misc.h>
>>> #include <asm/mach/time.h>
>>> #include <asm/tls.h>
>>> +#include <asm/kvm_asm.h>
>>>
>>> #ifdef CONFIG_CC_STACKPROTECTOR
>>> #include <linux/stackprotector.h>
>>> @@ -60,7 +61,7 @@ static const char *isa_modes[] __maybe_unused = {
>>> };
>>>
>>> extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
>>> -typedef void (*phys_reset_t)(unsigned long);
>>> +typedef void (*phys_reset_t)(void *);
>>>
>>> /*
>>> * A temporary stack to use for CPU reset. This is static so that we
>>> @@ -89,7 +90,7 @@ static void __soft_restart(void *addr)
>>>
>>> /* Switch to the identity mapping. */
>>> phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
>>> - phys_reset((unsigned long)addr);
>>> + kvm_cpu_reset(phys_reset, addr);
>>>
>>> /* Should never get here. */
>>> BUG();
>>> @@ -107,6 +108,8 @@ void soft_restart(unsigned long addr)
>>> if (num_online_cpus() == 1)
>>> outer_disable();
>>>
>>> + kvm_mmu_reset_prepare();
>>> +
>>> /* Change to the new stack and continue with the reset. */
>>> call_with_stack(__soft_restart, (void *)addr, (void *)stack);
>>>
>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>>> index 0b0d58a..7727678 100644
>>> --- a/arch/arm/kvm/arm.c
>>> +++ b/arch/arm/kvm/arm.c
>>> @@ -63,6 +63,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock);
>>>
>>> static bool vgic_present;
>>>
>>> +unsigned kvm_saved_sctlr;
>>> +
>>> static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
>>> {
>>> BUG_ON(preemptible());
>>> @@ -1079,6 +1081,37 @@ out_err:
>>> return err;
>>> }
>>>
>>> +void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
>>> +{
>>> + unsigned long vector;
>>> +
>>> + if (!is_hyp_mode_available()) {
>>> + phys_reset(addr);
>>> + return;
>>> + }
>>> +
>>> + vector = (unsigned long) kvm_get_idmap_vector();
>>> +
>>> + kvm_call_hyp(__kvm_flush_vm_context);
>>
>> What are you trying to achieve here? This only makes sense if you're
>> reassigning VMIDs. I would certainly hope that, by the time you're
>> tearing KVM down for reboot, no VM is up and running.
>>
>
> Too many try flushing the cache... I can remove this flush.
>
>>> +
>>> + /* set vectors so we call initialization routines */
>>> + /* trampoline is still on memory, physical ram is not mapped */
>>
>> Comment is incredibly misleading. Physical memory *IS* mapped. What you
>> have is a kernel mapping of that page.
>>
>
> Right, should be "trampoline is mapped at TRAMPOLINE_VA but not at its
> physical address so we don't have an identity map to be able to
> disable MMU".
>
>>> + kvm_call_hyp(__kvm_set_vectors,(unsigned long) (TRAMPOLINE_VA +
>>> (vector & (PAGE_SIZE-1))));
>>
>> That really deserves a comment as to *why* you're doing that offsetting.
>>
>
> I think "Set exception vectors at trampoline at TRAMPOLINE_VA address
> which is mapped".
>
>>> +
>>> + /* set HVBAR to physical, page table to identity to do the switch */
>>> + kvm_call_hyp((void*) 0x100, (unsigned long) vector,
>>> kvm_mmu_get_boot_httbr());
>>
>> Where is that 0x100 (stack?) coming from?
>>
>
> Could be 0x4, it's a valid pointer stack, I don't use stack but I use
> to reuse second phase in the identity map.
>
>>> + flush_cache_all();
>>
>> Why do you need a flush_all? Why can't you do it by VA?
>>> +
>>> + /* Turn off caching on Hypervisor mode */
>>> + kvm_call_hyp((void*) 1);
>>> +
>>> + flush_cache_all();
>>
>> The fact that you have to do it twice is indicative of other problems.
>>
>
> It's the same code in __soft_restart. The first flush is to make sure
> that code after the cache are disabled see updated values, the second
> one is to make sure that code don't see old cached values after cache
> is enabled (this happen when second kernel enable it). Without these
> flushes you got memory corruption. Yes, probably I can do by VA but we
> are on a slow code path restarting the system with a single CPU, best
> safe then sorry.
>
>>> + /* restore execution */
>>> + kvm_call_hyp((void*) 3, kvm_saved_sctlr, addr);
>>
>> That seems useless to me (as mentioned above). The whole sequence can
>> probably be reduced to a couple of HYP calls, and made much more
>> straightforward.
>>
>
> Yes, could be. However it's better to call flush functions from
> supervisor mode as all kernel code is mainly though for that mode.
> This call beside restoring the saver HSCTLR also jump to the code that
> setup the new kernel so I would say it's quite useful.
>
>>> +}
>>> +
>>> /* NOP: Compiling as a module not supported */
>>> void kvm_arch_exit(void)
>>> {
>>> diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
>>> index 3988e72..79f435a 100644
>>> --- a/arch/arm/kvm/init.S
>>> +++ b/arch/arm/kvm/init.S
>>> @@ -68,12 +68,21 @@ __kvm_hyp_init:
>>> W(b) .
>>>
>>> __do_hyp_init:
>>> + @ check for special values, odd numbers can't be a stack pointer
>>> + cmp r0, #1
>>> + beq cpu_proc_fin
>>> + cmp r0, #3
>>> + beq restore
>>> +
>>> cmp r0, #0 @ We have a SP?
>>> bne phase2 @ Yes, second stage init
>>>
>>> @ Set the HTTBR to point to the hypervisor PGD pointer passed
>>> mcrr p15, 4, rr_lo_hi(r2, r3), c2
>>>
>>> + @ Save HSCR to be able to return it, save and restore
>>> + mrc p15, 4, r3, c1, c0, 0 @ HSCR
>>> +
>>> @ Set the HTCR and VTCR to the same shareability and cacheability
>>> @ settings as the non-secure TTBCR and with T0SZ == 0.
>>> mrc p15, 4, r0, c2, c0, 2 @ HTCR
>>> @@ -125,6 +134,9 @@ __do_hyp_init:
>>> isb
>>> mcr p15, 4, r0, c1, c0, 0 @ HSCR
>>>
>>> + @ Return initial HSCR register
>>> + mov r0, r3
>>> +
>>> @ End of init phase-1
>>> eret
>>>
>>> @@ -151,6 +163,29 @@ target: @ We're now in the trampoline code,
>>> switch page tables
>>>
>>> eret
>>>
>>> +cpu_proc_fin:
>>> + mrc p15, 4, r0, c1, c0, 0 @ ctrl register
>>> + bic r0, r0, #0x1000 @ ...i............
>>> + bic r0, r0, #0x0006 @ .............ca.
>>> + mcr p15, 4, r0, c1, c0, 0 @ disable caches
>>> + eret
>>> +
>>> +restore:
>>> + isb
>>> + mcr p15, 4, r1, c1, c0, 0 @ HSCR
>>> + isb
>>> + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
>>> + isb
>>> + dsb ish
>>> +
>>> + @ Disable MMU, caches and Thumb in SCTLR
>>> + mrc p15, 0, r1, c1, c0, 0 @ SCTLR
>>> + ldr r0, =0x40003807
>>> + bic r1, r1, r0
>>> + mcr p15, 0, r1, c1, c0, 0
>>> +
>>> + bx r2
>>> +
>>> .ltorg
>>>
>>> .globl __kvm_hyp_init_end
>>> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
>>> index 01dcb0e..449e7dd 100644
>>> --- a/arch/arm/kvm/interrupts.S
>>> +++ b/arch/arm/kvm/interrupts.S
>>> @@ -87,6 +87,18 @@ ENDPROC(__kvm_flush_vm_context)
>>>
>>>
>>> /********************************************************************
>>> + * Set HVBAR
>>> + *
>>> + * void __kvm_set_vectors(unsigned long phys_vector_base);
>>> + */
>>> +ENTRY(__kvm_set_vectors)
>>> + mcr p15, 4, r0, c12, c0, 0 @ set HVBAR
>>> + dsb ish
>>> + isb
>>> + bx lr
>>> +ENDPROC(__kvm_set_vectors)
>>> +
>>> +/********************************************************************
>>> * Hypervisor world-switch code
>>> *
>>> *
>>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>>> index 1366625..9149ae5 100644
>>> --- a/arch/arm/kvm/mmu.c
>>> +++ b/arch/arm/kvm/mmu.c
>>> @@ -46,6 +46,8 @@ static phys_addr_t hyp_idmap_vector;
>>>
>>> #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
>>>
>>> +static int kvm_mmu_boot_init(void);
>>> +
>>> static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
>>> {
>>> /*
>>> @@ -369,6 +371,11 @@ void free_boot_hyp_pgd(void)
>>> free_page((unsigned long)init_bounce_page);
>>> init_bounce_page = NULL;
>>>
>>> + /* avoid to reuse possibly invalid values if bounce page is freed */
>>> + hyp_idmap_start = 0;
>>> + hyp_idmap_end = 0;
>>> + hyp_idmap_vector = 0;
>>> +
>>
>> Well, in this case, why do we free the page? We'd better keep it around
>> if we have Kexec...
>>
>
> Well.. yes, only I though that people that freed that 16kb (should be)
> memory wants that 16kb memory :)
> Keeping the trampoline and boot PGD available is much more easier indeed.
>
>>> mutex_unlock(&kvm_hyp_pgd_mutex);
>>> }
>>>
>>> @@ -1252,7 +1259,28 @@ phys_addr_t kvm_get_idmap_vector(void)
>>> return hyp_idmap_vector;
>>> }
>>>
>>> -int kvm_mmu_init(void)
>>> +/* assure we have MMU setup correctly */
>>> +int kvm_mmu_reset_prepare(void)
>>> +{
>>> + int err = 0;
>>> +
>>> + if (boot_hyp_pgd)
>>> + goto out;
>>> +
>>> + err = kvm_mmu_boot_init();
>>> + if (err)
>>> + goto out;
>>> +
>>> + err = __create_hyp_mappings(hyp_pgd,
>>> + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
>>> + __phys_to_pfn(hyp_idmap_start),
>>> + PAGE_HYP);
>>> +
>>> +out:
>>> + return err;
>>> +}
>>> +
>>
>> ... and this code can then go away.
>>
>
> Ok, I'll remove
>
>>> +static int kvm_mmu_boot_init(void)
>>> {
>>> int err;
>>>
>>> @@ -1294,10 +1322,9 @@ int kvm_mmu_init(void)
>>> (unsigned long)phys_base);
>>> }
>>>
>>> - hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
>>> hyp_pgd_order);
>>> boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
>>> hyp_pgd_order);
>>>
>>> - if (!hyp_pgd || !boot_hyp_pgd) {
>>> + if (!boot_hyp_pgd) {
>>> kvm_err("Hyp mode PGD not allocated\n");
>>> err = -ENOMEM;
>>> goto out;
>>> @@ -1326,6 +1353,27 @@ int kvm_mmu_init(void)
>>> goto out;
>>> }
>>>
>>> + return 0;
>>> +out:
>>> + free_boot_hyp_pgd();
>>> + return err;
>>> +}
>>> +
>>> +int kvm_mmu_init(void)
>>> +{
>>> + int err;
>>> +
>>> + err = kvm_mmu_boot_init();
>>> + if (err)
>>> + goto out0;
>>> +
>>> + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
>>> + if (!hyp_pgd) {
>>> + kvm_err("Hyp mode PGD not allocated\n");
>>> + err = -ENOMEM;
>>> + goto out;
>>> + }
>>> +
>>> /* Map the same page again into the runtime page tables */
>>> err = __create_hyp_mappings(hyp_pgd,
>>> TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
>>> @@ -1340,6 +1388,7 @@ int kvm_mmu_init(void)
>>> return 0;
>>> out:
>>> free_hyp_pgds();
>>> +out0:
>>> return err;
>>> }
>>>
>>>
>>> Frediano
>>>
>>
>> Thanks for having a look into this, as you're the first one to actually
>> do something. That still requires a lot of work, and some careful
>> thoughts as to how to do this on arm64.
>>
>> Looking forward to your next patch,
>>
>> M.
>
>
> So, my todo list:
> - removed saved value and test again
> - remove vm flush, useless
> - correct compile check on header
> - try KVM enabled and not, compile link and tests
> - rewrite comments, add more where necessary
> - remove code to free and allocate again boot PGD and related, keep in
> memory if KEXEC is enabled
> - try SMP
> Beside last step I probably could do something before next week.
>
> Frediano
New version. Changes:
- removed saved value and test again
- remove vm flush, useless
- correct compile check on header
- try KVM enabled and not, compile link and tests
- rewrite comments, add more where necessary
- remove code to free and allocate again boot PGD and related, keep in
memory if KEXEC is enabled
Still not trying SMP. Still to be considered RFC. I tried different
compile options (KEXEC and KVM turned on/off). I realized that I
should test if kernel is started with SVC mode code still works
correctly. ARM_VIRT_EXT is always turned on for V7 CPU.
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 3a67bec..985f0a3 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -97,7 +97,19 @@ extern char __kvm_hyp_code_end[];
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_set_vectors(unsigned long phys_vector_base);
+
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+#ifndef CONFIG_ARM_VIRT_EXT
+static void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
+{
+ phys_reset(addr);
+}
+#else
+extern void kvm_cpu_reset(void (*phys_reset)(void *), void *addr);
+#endif
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 2a55373..30339ad 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -164,13 +164,63 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE
bx lr @ The boot CPU mode is left in r4.
ENDPROC(__hyp_stub_install_secondary)
+#undef CPU_RESET
+#if defined(CONFIG_ARM_VIRT_EXT) && !defined(CONFIG_KVM) && !defined(ZIMAGE)
+# define CPU_RESET 1
+#endif
+
__hyp_stub_do_trap:
+#ifdef CPU_RESET
+ cmp r0, #-2
+ bne 1f
+
+ mrc p15, 4, r0, c1, c0, 0 @ HSCR
+ ldr r2, =0x40003807
+ bic r0, r0, r2
+ mcr p15, 4, r0, c1, c0, 0 @ HSCR
+ isb
+
+ @ Disable MMU, caches and Thumb in SCTLR
+ mrc p15, 0, r0, c1, c0, 0 @ SCTLR
+ bic r0, r0, r2
+ mcr p15, 0, r0, c1, c0, 0
+
+ bx r1
+ .ltorg
+1:
+#endif
cmp r0, #-1
mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
mcrne p15, 4, r0, c12, c0, 0 @ set HVBAR
__ERET
ENDPROC(__hyp_stub_do_trap)
+#ifdef CPU_RESET
+/*
+ * r0 cpu_reset function
+ * r1 address to jump to
+ */
+ENTRY(kvm_cpu_reset)
+ mov r2, r0
+
+ @ __boot_cpu_mode should be HYP_MODE
+ adr r0, .L__boot_cpu_mode_offset
+ ldr r3, [r0]
+ ldr r0, [r0, r3]
+ cmp r0, #HYP_MODE
+ beq reset_to_hyp
+
+ @ Call SVC mode cpu_reset
+ mov r0, r1
+THUMB( orr r2, r2, 1 )
+ bx r2
+
+reset_to_hyp:
+ mov r0, #-2
+ b __hyp_set_vectors
+ENDPROC(kvm_cpu_reset)
+#endif
+
/*
* __hyp_set_vectors: Call this after boot to set the initial hypervisor
* vectors as part of hypervisor installation. On an SMP system, this should
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fdfa3a7..c018719 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -41,6 +41,7 @@
#include <asm/system_misc.h>
#include <asm/mach/time.h>
#include <asm/tls.h>
+#include <asm/kvm_asm.h>
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
@@ -60,7 +61,7 @@ static const char *isa_modes[] __maybe_unused = {
};
extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
-typedef void (*phys_reset_t)(unsigned long);
+typedef void (*phys_reset_t)(void *);
/*
* A temporary stack to use for CPU reset. This is static so that we
@@ -89,7 +90,8 @@ static void __soft_restart(void *addr)
/* Switch to the identity mapping. */
phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
- phys_reset((unsigned long)addr);
+
+ kvm_cpu_reset(phys_reset, addr);
/* Should never get here. */
BUG();
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0b0d58a..e4d4a2b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1009,7 +1009,7 @@ static int init_hyp_mode(void)
if (err)
goto out_free_mappings;
-#ifndef CONFIG_HOTPLUG_CPU
+#if !defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_KEXEC)
free_boot_hyp_pgd();
#endif
@@ -1079,6 +1079,53 @@ out_err:
return err;
}
+void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
+{
+ unsigned long vector;
+
+ if (!is_hyp_mode_available()) {
+ phys_reset(addr);
+ return;
+ }
+
+ vector = (unsigned long) kvm_get_idmap_vector();
+
+ /*
+ * Set vectors so we call initialization routines.
+ * trampoline is mapped at TRAMPOLINE_VA but not at its physical
+ * address so we don't have an identity map to be able to
+ * disable MMU. We need to set exception vector at trampoline
+ * at TRAMPOLINE_VA address which is mapped.
+ */
+ kvm_call_hyp(__kvm_set_vectors,(unsigned long) (TRAMPOLINE_VA +
(vector & (PAGE_SIZE-1))));
+
+ /*
+ * Set HVBAR to physical address, page table to identity to do the switch
+ */
+ kvm_call_hyp((void*) 4, (unsigned long) vector, kvm_mmu_get_boot_httbr());
+
+ /*
+ * Flush to make sure code after the cache are disabled see updated values
+ */
+ flush_cache_all();
+
+ /*
+ * Turn off caching on Hypervisor mode
+ */
+ kvm_call_hyp((void*) 1);
+
+ /*
+ * Flush to make sude code don't see old cached values after cache is
+ * enabled
+ */
+ flush_cache_all();
+
+ /*
+ * Restore execution
+ */
+ kvm_call_hyp((void*) 3, addr);
+}
+
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72..c2f1d4d 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -68,6 +68,12 @@ __kvm_hyp_init:
W(b) .
__do_hyp_init:
+ @ check for special values, odd numbers can't be a stack pointer
+ cmp r0, #1
+ beq cpu_proc_fin
+ cmp r0, #3
+ beq restore
+
cmp r0, #0 @ We have a SP?
bne phase2 @ Yes, second stage init
@@ -151,6 +157,33 @@ target: @ We're now in the trampoline code,
switch page tables
eret
+cpu_proc_fin:
+ mrc p15, 4, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1000 @ ...i............
+ bic r0, r0, #0x0006 @ .............ca.
+ mcr p15, 4, r0, c1, c0, 0 @ disable caches
+ eret
+
+restore:
+ @ Disable MMU, caches and Thumb in HSCTLR
+ mrc p15, 4, r0, c1, c0, 0 @ HSCR
+ ldr r2, =0x40003807
+ bic r0, r0, r2
+ mcr p15, 4, r0, c1, c0, 0 @ HSCR
+ isb
+
+ @ Invalidate old TLBs
+ mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
+ isb
+ dsb ish
+
+ @ Disable MMU, caches and Thumb in SCTLR
+ mrc p15, 0, r0, c1, c0, 0 @ SCTLR
+ bic r0, r0, r2
+ mcr p15, 0, r0, c1, c0, 0
+
+ bx r1
+
.ltorg
.globl __kvm_hyp_init_end
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 01dcb0e..449e7dd 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -87,6 +87,18 @@ ENDPROC(__kvm_flush_vm_context)
/********************************************************************
+ * Set HVBAR
+ *
+ * void __kvm_set_vectors(unsigned long phys_vector_base);
+ */
+ENTRY(__kvm_set_vectors)
+ mcr p15, 4, r0, c12, c0, 0 @ set HVBAR
+ dsb ish
+ isb
+ bx lr
+ENDPROC(__kvm_set_vectors)
+
+/********************************************************************
* Hypervisor world-switch code
*
*
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1366625..f853858 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -369,6 +369,11 @@ void free_boot_hyp_pgd(void)
free_page((unsigned long)init_bounce_page);
init_bounce_page = NULL;
+ /* avoid to reuse possibly invalid values if bounce page is freed */
+ hyp_idmap_start = 0;
+ hyp_idmap_end = 0;
+ hyp_idmap_vector = 0;
+
mutex_unlock(&kvm_hyp_pgd_mutex);
}
Frediano
More information about the linux-arm-kernel
mailing list