[PATCH v12 12/16] arm64: kdump: implement machine_crash_shutdown()
Marc Zyngier
marc.zyngier at arm.com
Thu Dec 10 05:43:38 PST 2015
On 10/12/15 12:55, AKASHI Takahiro wrote:
> On 12/10/2015 08:44 PM, Marc Zyngier wrote:
>> On 10/12/15 11:34, AKASHI Takahiro wrote:
>>> Marc,
>>>
>>> I was back from my vacation.
>>>
>>> On 11/27/2015 11:39 PM, Marc Zyngier wrote:
>>>> On 24/11/15 22:25, Geoff Levand wrote:
>>>>> From: AKASHI Takahiro <takahiro.akashi at linaro.org>
>>>>>
>>>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>>>> save registers' status in per-cpu ELF notes before starting the crash
>>>>> dump kernel. See kernel_kexec().
>>>>>
>>>>> ipi_cpu_stop() is a bit modified and used to support this behavior.
>>>>>
>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi at linaro.org>
>>>>> ---
>>>>> arch/arm64/include/asm/kexec.h | 34 +++++++++++++++++++++++++++++++++-
>>>>> arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>>>>> arch/arm64/kernel/smp.c | 16 ++++++++++++++--
>>>>> 3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>>> index 46d63cd..555a955 100644
>>>>> --- a/arch/arm64/include/asm/kexec.h
>>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>>> @@ -30,6 +30,8 @@
>>>>>
>>>>> #if !defined(__ASSEMBLY__)
>>>>>
>>>>> +extern bool in_crash_kexec;
>>>>> +
>>>>> /**
>>>>> * crash_setup_regs() - save registers for the panic kernel
>>>>> *
>>>>> @@ -40,7 +42,37 @@
>>>>> static inline void crash_setup_regs(struct pt_regs *newregs,
>>>>> struct pt_regs *oldregs)
>>>>> {
>>>>> - /* Empty routine needed to avoid build errors. */
>>>>> + if (oldregs) {
>>>>> + memcpy(newregs, oldregs, sizeof(*newregs));
>>>>> + } else {
>>>>> + __asm__ __volatile__ (
>>>>> + "stp x0, x1, [%3, #16 * 0]\n"
>>>>> + "stp x2, x3, [%3, #16 * 1]\n"
>>>>> + "stp x4, x5, [%3, #16 * 2]\n"
>>>>> + "stp x6, x7, [%3, #16 * 3]\n"
>>>>> + "stp x8, x9, [%3, #16 * 4]\n"
>>>>> + "stp x10, x11, [%3, #16 * 5]\n"
>>>>> + "stp x12, x13, [%3, #16 * 6]\n"
>>>>> + "stp x14, x15, [%3, #16 * 7]\n"
>>>>> + "stp x16, x17, [%3, #16 * 8]\n"
>>>>> + "stp x18, x19, [%3, #16 * 9]\n"
>>>>> + "stp x20, x21, [%3, #16 * 10]\n"
>>>>> + "stp x22, x23, [%3, #16 * 11]\n"
>>>>> + "stp x24, x25, [%3, #16 * 12]\n"
>>>>> + "stp x26, x27, [%3, #16 * 13]\n"
>>>>> + "stp x28, x29, [%3, #16 * 14]\n"
>>>>> + "str x30, [%3, #16 * 15]\n"
>>>>> + "mov %0, sp\n"
>>>>> + "adr %1, 1f\n"
>>>>> + "mrs %2, spsr_el1\n"
>>>>> + "1:"
>>>>> + : "=r" (newregs->sp),
>>>>> + "=r" (newregs->pc),
>>>>> + "=r" (newregs->pstate)
>>>>> + : "r" (&newregs->regs)
>>>>> + : "memory"
>>>>> + );
>>>>
>>>> I wonder how useful this thing is, given that it starts by corrupting
>>>> whatever register is holding newregs->regs. Maybe this is not supposed
>>>> to be accurate anyway...
>>>
>>> I'm not quite sure about what part of my code you're mentioning here, but
>>> crash_setup_regs() is solely called by crash_kexec(), and panic() is
>>> the only caller of crash_kexec() with NULL argument which, in turn, is
>>> used as 'oldregs' in crash_setup_regs().
>>
>> You have this assembly sequence:
>>
>> stp x0, x1, [%3, #16 * 0]
>> [...]
>>
>> where %3 itself is one of the x[0..30] registers.
>
> Not only %3, but also
>
>> So you are saving
>> things that have already been corrupted by the saving procedure. Not
>> sure how useful that is, but as I said, maybe it is not supposed to be
>> completely accurate.
>
> x0, x1 ... are the current values in panic(), and not the exact cpu contexts
> at the place we are really interested in.
> We have no way here in panic() to know them, but sp and pc would still be useful
> for back-tracing in later investigation of dump file.
>
> Please note that the same problem exists on arm (and x86) implementation.
As I said: if people don't expect to have a precise dump of the register
file, then fine.
>>> Given this fact, I think that the values saved in newregs as indicated above
>>> will be the best estimate of current cpu contexts.
>>>
>>> The other caller of crash_kexec() is die() in traps.c, but here we call
>>> it with explicit cpu contexts at exception.
>>>
>>>>
>>>>> + }
>>>>> }
>>>>>
>>>>> #endif /* !defined(__ASSEMBLY__) */
>>>>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>>>>> index da28a26..d2d7e90 100644
>>>>> --- a/arch/arm64/kernel/machine_kexec.c
>>>>> +++ b/arch/arm64/kernel/machine_kexec.c
>>>>> @@ -9,6 +9,7 @@
>>>>> * published by the Free Software Foundation.
>>>>> */
>>>>>
>>>>> +#include <linux/kernel.h>
>>>>> #include <linux/kexec.h>
>>>>> #include <linux/of_fdt.h>
>>>>> #include <linux/slab.h>
>>>>> @@ -23,6 +24,7 @@
>>>>> extern const unsigned char arm64_relocate_new_kernel[];
>>>>> extern const unsigned long arm64_relocate_new_kernel_size;
>>>>>
>>>>> +bool in_crash_kexec;
>>>>> static unsigned long kimage_start;
>>>>>
>>>>> /**
>>>>> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>>>>> */
>>>>>
>>>>> cpu_soft_restart(virt_to_phys(cpu_reset),
>>>>> - is_hyp_mode_available(),
>>>>> + in_crash_kexec ? 0 : is_hyp_mode_available(),
>>>>> reboot_code_buffer_phys, kimage->head, kimage_start);
>>>>>
>>>>> BUG(); /* Should never get here. */
>>>>> }
>>>>>
>>>>> +/**
>>>>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>>>>> + */
>>>>> void machine_crash_shutdown(struct pt_regs *regs)
>>>>> {
>>>>> - /* Empty routine needed to avoid build errors. */
>>>>> + struct pt_regs dummy_regs;
>>>>> + int cpu;
>>>>> +
>>>>> + local_irq_disable();
>>>>> +
>>>>> + in_crash_kexec = true;
>>>>> +
>>>>> + /*
>>>>> + * clear and initialize the per-cpu info. This is necessary
>>>>> + * because, otherwise, slots for offline cpus would never be
>>>>> + * filled up. See smp_send_stop().
>>>>> + */
>>>>> + memset(&dummy_regs, 0, sizeof(dummy_regs));
>>>>> + for_each_possible_cpu(cpu)
>>>>> + crash_save_cpu(&dummy_regs, cpu);
>>>>> +
>>>>> + /* shutdown non-boot cpus */
>>>>> + smp_send_stop();
>>>>> +
>>>>> + /* for boot cpu */
>>>>> + crash_save_cpu(regs, smp_processor_id());
>>>>> +
>>>>> + pr_info("Starting crashdump kernel...\n");
>>>>> }
>>>>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>>>>> index b1adc51..15aabef 100644
>>>>> --- a/arch/arm64/kernel/smp.c
>>>>> +++ b/arch/arm64/kernel/smp.c
>>>>> @@ -37,6 +37,7 @@
>>>>> #include <linux/completion.h>
>>>>> #include <linux/of.h>
>>>>> #include <linux/irq_work.h>
>>>>> +#include <linux/kexec.h>
>>>>>
>>>>> #include <asm/alternative.h>
>>>>> #include <asm/atomic.h>
>>>>> @@ -54,6 +55,8 @@
>>>>> #include <asm/ptrace.h>
>>>>> #include <asm/virt.h>
>>>>>
>>>>> +#include "cpu-reset.h"
>>>>> +
>>>>> #define CREATE_TRACE_POINTS
>>>>> #include <trace/events/ipi.h>
>>>>>
>>>>> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>>>> /*
>>>>> * ipi_cpu_stop - handle IPI from smp_send_stop()
>>>>> */
>>>>> -static void ipi_cpu_stop(unsigned int cpu)
>>>>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>>>> {
>>>>> +#ifdef CONFIG_KEXEC
>>>>> + /* printing messages may slow down the shutdown. */
>>>>> + if (!in_crash_kexec)
>>>>> +#endif
>>>>> if (system_state == SYSTEM_BOOTING ||
>>>>> system_state == SYSTEM_RUNNING) {
>>>>> raw_spin_lock(&stop_lock);
>>>>
>>>> Irrespective of how useful this change is, how about having a predicate
>>>> instead? Something like:
>>>>
>>>> static inline bool is_in_crash_kexec(void)
>>>> {
>>>> #ifdef CONFIG_KEXEC
>>>> return in_crash_kexec;
>>>> #else
>>>> return false;
>>>> #endif
>>>> }
>>>
>>> OK, I will take your idea.
>>>
>>>> located in machine_kexec.c (making the in_crash_kernel static), and then
>>>
>>> but cannot make in_crash_kernel static because it is also used in both smp.c
>>> and machine_kexec.c.
>>
>> smp.c only reads from in_crash_kernel (at least from what I can see in
>> this patch), so it should be able to use the accessor.
>
> Only if we define the accessor as a real function, not an inline function in a header :)
I'll leave the implementation details in your capable hands. :-)
Thanks,
M.
--
Jazz is not dead. It just smells funny...
More information about the kexec
mailing list