Kexec and KVM not working gracefully together
Frediano Ziglio
freddy77 at gmail.com
Thu Feb 5 02:51:58 PST 2015
2015-02-05 10:27 GMT+00:00 AKASHI Takahiro <takahiro.akashi at linaro.org>:
> On 02/05/2015 06:43 PM, Marc Zyngier wrote:
>>
>> On Thu, 5 Feb 2015 06:17:04 +0000
>> AKASHI Takahiro <takahiro.akashi at linaro.org> wrote:
>>
>>> Frediano
>>> Cc: Marc
>>>
>>> Are you going to fix this issue on arm?
>>> As I'm now working on the same issue on arm64,
>>> we should share the idea and some code.
>>
>>
>> Just to be perfectly clear: KVM support for Kexec on arm and arm64
>> should be exactly the same, with the exception of some (very
>> limited) assembly code for the MMU teardown.
>
>
> Yep, that's exactly what I meant.
>
>> And I'd like to see both architectures addressed at the same time,
>> without statements such as "we'll do that as a next step". As such, I'd
>> very much recommend that Frediano and you work together to address this
>> problem.
>
>
> I hope so.
>
> Thanks,
> -Takahiro AKASHI
>
>> Thanks,
>>
>> M.
>>
Well, let's code speak. I'm actually testing it with an ARM32 platform
with a single CPU (platform have 16 cores but I'm using a no SMP
kernel for my tests for different reason, I should be able to test SMP
in a couple of days).
The key point is the switch back code (arm.c, kvm_cpu_reset code). The idea:
- at boot time I save HSCTLR value
- restore boot page tables and trampoline before cpu reset
- call kvm_cpu_reset instead of cpu_reset (this will call cpu_reset if
kvm is disabled)
- set hvbar to trampoline (virtual memory)
- set hvbar to physical trampoline and boot pages (with trampoline
mapped at physical and virtual that is identity)
- flush memory while disabling caching on hypervisor mode (not doing
cause a lot of instability, now I rebooted more than 100 times in a
row)
- call hypervisor mode to do the reset cpu restoring the boot HSCTLR value
Hope gmail does not screw my patch...
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 3a67bec..71380a1 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -97,7 +97,24 @@ extern char __kvm_hyp_code_end[];
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_set_vectors(unsigned long phys_vector_base);
+
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+#ifdef CONFIG_HAVE_KVM
+static void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
+{
+ phys_reset(addr);
+}
+static inline int kvm_mmu_reset_prepare(void)
+{
+ return 0;
+}
+#else
+extern void kvm_cpu_reset(void (*phys_reset)(void *), void *addr);
+extern int kvm_mmu_reset_prepare(void);
+#endif
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 04b4ea0..c0b6c20 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -192,6 +192,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
+extern unsigned kvm_saved_sctlr;
+
static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
@@ -213,7 +215,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t
boot_pgd_ptr,
* PCS!).
*/
- kvm_call_hyp(NULL, 0, boot_pgd_ptr);
+ kvm_saved_sctlr = (unsigned) kvm_call_hyp(NULL, 0, boot_pgd_ptr);
kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fdfa3a7..026a1e4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -41,6 +41,7 @@
#include <asm/system_misc.h>
#include <asm/mach/time.h>
#include <asm/tls.h>
+#include <asm/kvm_asm.h>
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
@@ -60,7 +61,7 @@ static const char *isa_modes[] __maybe_unused = {
};
extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
-typedef void (*phys_reset_t)(unsigned long);
+typedef void (*phys_reset_t)(void *);
/*
* A temporary stack to use for CPU reset. This is static so that we
@@ -89,7 +90,7 @@ static void __soft_restart(void *addr)
/* Switch to the identity mapping. */
phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
- phys_reset((unsigned long)addr);
+ kvm_cpu_reset(phys_reset, addr);
/* Should never get here. */
BUG();
@@ -107,6 +108,8 @@ void soft_restart(unsigned long addr)
if (num_online_cpus() == 1)
outer_disable();
+ kvm_mmu_reset_prepare();
+
/* Change to the new stack and continue with the reset. */
call_with_stack(__soft_restart, (void *)addr, (void *)stack);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0b0d58a..7727678 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -63,6 +63,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
+unsigned kvm_saved_sctlr;
+
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@@ -1079,6 +1081,37 @@ out_err:
return err;
}
+void kvm_cpu_reset(void (*phys_reset)(void *), void *addr)
+{
+ unsigned long vector;
+
+ if (!is_hyp_mode_available()) {
+ phys_reset(addr);
+ return;
+ }
+
+ vector = (unsigned long) kvm_get_idmap_vector();
+
+ kvm_call_hyp(__kvm_flush_vm_context);
+
+ /* set vectors so we call initialization routines */
+ /* trampoline is still on memory, physical ram is not mapped */
+ kvm_call_hyp(__kvm_set_vectors,(unsigned long) (TRAMPOLINE_VA +
(vector & (PAGE_SIZE-1))));
+
+ /* set HVBAR to physical, page table to identity to do the switch */
+ kvm_call_hyp((void*) 0x100, (unsigned long) vector,
kvm_mmu_get_boot_httbr());
+
+ flush_cache_all();
+
+ /* Turn off caching on Hypervisor mode */
+ kvm_call_hyp((void*) 1);
+
+ flush_cache_all();
+
+ /* restore execution */
+ kvm_call_hyp((void*) 3, kvm_saved_sctlr, addr);
+}
+
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72..79f435a 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -68,12 +68,21 @@ __kvm_hyp_init:
W(b) .
__do_hyp_init:
+ @ check for special values, odd numbers can't be a stack pointer
+ cmp r0, #1
+ beq cpu_proc_fin
+ cmp r0, #3
+ beq restore
+
cmp r0, #0 @ We have a SP?
bne phase2 @ Yes, second stage init
@ Set the HTTBR to point to the hypervisor PGD pointer passed
mcrr p15, 4, rr_lo_hi(r2, r3), c2
+ @ Save HSCR to be able to return it, save and restore
+ mrc p15, 4, r3, c1, c0, 0 @ HSCR
+
@ Set the HTCR and VTCR to the same shareability and cacheability
@ settings as the non-secure TTBCR and with T0SZ == 0.
mrc p15, 4, r0, c2, c0, 2 @ HTCR
@@ -125,6 +134,9 @@ __do_hyp_init:
isb
mcr p15, 4, r0, c1, c0, 0 @ HSCR
+ @ Return initial HSCR register
+ mov r0, r3
+
@ End of init phase-1
eret
@@ -151,6 +163,29 @@ target: @ We're now in the trampoline code,
switch page tables
eret
+cpu_proc_fin:
+ mrc p15, 4, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1000 @ ...i............
+ bic r0, r0, #0x0006 @ .............ca.
+ mcr p15, 4, r0, c1, c0, 0 @ disable caches
+ eret
+
+restore:
+ isb
+ mcr p15, 4, r1, c1, c0, 0 @ HSCR
+ isb
+ mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
+ isb
+ dsb ish
+
+ @ Disable MMU, caches and Thumb in SCTLR
+ mrc p15, 0, r1, c1, c0, 0 @ SCTLR
+ ldr r0, =0x40003807
+ bic r1, r1, r0
+ mcr p15, 0, r1, c1, c0, 0
+
+ bx r2
+
.ltorg
.globl __kvm_hyp_init_end
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 01dcb0e..449e7dd 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -87,6 +87,18 @@ ENDPROC(__kvm_flush_vm_context)
/********************************************************************
+ * Set HVBAR
+ *
+ * void __kvm_set_vectors(unsigned long phys_vector_base);
+ */
+ENTRY(__kvm_set_vectors)
+ mcr p15, 4, r0, c12, c0, 0 @ set HVBAR
+ dsb ish
+ isb
+ bx lr
+ENDPROC(__kvm_set_vectors)
+
+/********************************************************************
* Hypervisor world-switch code
*
*
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1366625..9149ae5 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -46,6 +46,8 @@ static phys_addr_t hyp_idmap_vector;
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
+static int kvm_mmu_boot_init(void);
+
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
/*
@@ -369,6 +371,11 @@ void free_boot_hyp_pgd(void)
free_page((unsigned long)init_bounce_page);
init_bounce_page = NULL;
+ /* avoid to reuse possibly invalid values if bounce page is freed */
+ hyp_idmap_start = 0;
+ hyp_idmap_end = 0;
+ hyp_idmap_vector = 0;
+
mutex_unlock(&kvm_hyp_pgd_mutex);
}
@@ -1252,7 +1259,28 @@ phys_addr_t kvm_get_idmap_vector(void)
return hyp_idmap_vector;
}
-int kvm_mmu_init(void)
+/* assure we have MMU setup correctly */
+int kvm_mmu_reset_prepare(void)
+{
+ int err = 0;
+
+ if (boot_hyp_pgd)
+ goto out;
+
+ err = kvm_mmu_boot_init();
+ if (err)
+ goto out;
+
+ err = __create_hyp_mappings(hyp_pgd,
+ TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
+ __phys_to_pfn(hyp_idmap_start),
+ PAGE_HYP);
+
+out:
+ return err;
+}
+
+static int kvm_mmu_boot_init(void)
{
int err;
@@ -1294,10 +1322,9 @@ int kvm_mmu_init(void)
(unsigned long)phys_base);
}
- hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
hyp_pgd_order);
boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
hyp_pgd_order);
- if (!hyp_pgd || !boot_hyp_pgd) {
+ if (!boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
err = -ENOMEM;
goto out;
@@ -1326,6 +1353,27 @@ int kvm_mmu_init(void)
goto out;
}
+ return 0;
+out:
+ free_boot_hyp_pgd();
+ return err;
+}
+
+int kvm_mmu_init(void)
+{
+ int err;
+
+ err = kvm_mmu_boot_init();
+ if (err)
+ goto out0;
+
+ hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ if (!hyp_pgd) {
+ kvm_err("Hyp mode PGD not allocated\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
/* Map the same page again into the runtime page tables */
err = __create_hyp_mappings(hyp_pgd,
TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
@@ -1340,6 +1388,7 @@ int kvm_mmu_init(void)
return 0;
out:
free_hyp_pgds();
+out0:
return err;
}
Frediano
More information about the linux-arm-kernel
mailing list