[V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path

Corey Minyard cminyard at mvista.com
Fri Aug 12 06:55:41 PDT 2016


I'll try to test this, but I have one comment inline...

On 08/11/2016 10:17 PM, Dave Young wrote:
> On 08/10/16 at 05:09pm, Hidehiro Kawai wrote:
>> Daniel Walker reported problems which happens when
>> crash_kexec_post_notifiers kernel option is enabled
>> (https://lkml.org/lkml/2015/6/24/44).
>>
>> In that case, smp_send_stop() is called before entering kdump routines
>> which assume other CPUs are still online.  As the result, kdump
>> routines fail to save other CPUs' registers.  Additionally for MIPS
>> OCTEON, it misses to stop the watchdog timer.
>>
>> To fix this problem, call a new kdump friendly function,
>> crash_smp_send_stop(), instead of the smp_send_stop() when
>> crash_kexec_post_notifiers is enabled.  crash_smp_send_stop() is a
>> weak function, and it just call smp_send_stop().  Architecture
>> codes should override it so that kdump can work appropriately.
>> This patch provides MIPS version.
>>
>> Reported-by: Daniel Walker <dwalker at fifo99.com>
>> Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" option)
>> Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez at hitachi.com>
>> Cc: Ralf Baechle <ralf at linux-mips.org>
>> Cc: David Daney <david.daney at cavium.com>
>> Cc: Aaro Koskinen <aaro.koskinen at iki.fi>
>> Cc: "Steven J. Hill" <steven.hill at cavium.com>
>> Cc: Corey Minyard <cminyard at mvista.com>
>>
>> ---
>> I'm not familiar with MIPS, and I don't have a test environment and
>> just did build tests only.  Please don't apply this patch until
>> someone does enough tests, otherwise simply drop this patch.
>> ---
>>   arch/mips/cavium-octeon/setup.c  |   14 ++++++++++++++
>>   arch/mips/include/asm/kexec.h    |    1 +
>>   arch/mips/kernel/crash.c         |   18 +++++++++++++++++-
>>   arch/mips/kernel/machine_kexec.c |    1 +
>>   4 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
>> index cb16fcc..5537f95 100644
>> --- a/arch/mips/cavium-octeon/setup.c
>> +++ b/arch/mips/cavium-octeon/setup.c
>> @@ -267,6 +267,17 @@ static void octeon_crash_shutdown(struct pt_regs *regs)
>>   	default_machine_crash_shutdown(regs);
>>   }
>>   
>> +#ifdef CONFIG_SMP
>> +void octeon_crash_smp_send_stop(void)
>> +{
>> +	int cpu;
>> +
>> +	/* disable watchdogs */
>> +	for_each_online_cpu(cpu)
>> +		cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
>> +}
>> +#endif
>> +
>>   #endif /* CONFIG_KEXEC */
>>   
>>   #ifdef CONFIG_CAVIUM_RESERVE32
>> @@ -911,6 +922,9 @@ void __init prom_init(void)
>>   	_machine_kexec_shutdown = octeon_shutdown;
>>   	_machine_crash_shutdown = octeon_crash_shutdown;
>>   	_machine_kexec_prepare = octeon_kexec_prepare;
>> +#ifdef CONFIG_SMP
>> +	_crash_smp_send_stop = octeon_crash_smp_send_stop;
>> +#endif
>>   #endif
>>   
>>   	octeon_user_io_init();
>> diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h
>> index ee25ebb..493a3cc 100644
>> --- a/arch/mips/include/asm/kexec.h
>> +++ b/arch/mips/include/asm/kexec.h
>> @@ -45,6 +45,7 @@ extern const unsigned char kexec_smp_wait[];
>>   extern unsigned long secondary_kexec_args[4];
>>   extern void (*relocated_kexec_smp_wait) (void *);
>>   extern atomic_t kexec_ready_to_reboot;
>> +extern void (*_crash_smp_send_stop)(void);
>>   #endif
>>   #endif
>>   
>> diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
>> index 610f0f3..1723b17 100644
>> --- a/arch/mips/kernel/crash.c
>> +++ b/arch/mips/kernel/crash.c
>> @@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs)
>>   
>>   static void crash_kexec_prepare_cpus(void)
>>   {
>> +	static int cpus_stopped;
>>   	unsigned int msecs;
>> +	unsigned int ncpus;
>>   
>> -	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
>> +	if (cpus_stopped)
>> +		return;

Wouldn't you want an atomic operation and some special handling here to
ensure that only one CPU does this?  So if a CPU comes in here and
another CPU is already in the process stopping the CPUs it won't result in a
deadlock.

-corey

>> +
>> +	ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
>>   
>>   	dump_send_ipi(crash_shutdown_secondary);
>>   	smp_wmb();
>> @@ -64,6 +69,17 @@ static void crash_kexec_prepare_cpus(void)
>>   		cpu_relax();
>>   		mdelay(1);
>>   	}
>> +
>> +	cpus_stopped = 1;
>> +}
>> +
>> +/* Override the weak function in kernel/panic.c */
>> +void crash_smp_send_stop(void)
>> +{
>> +	if (_crash_smp_send_stop)
>> +		_crash_smp_send_stop();
>> +
>> +	crash_kexec_prepare_cpus();
>>   }
>>   
>>   #else /* !defined(CONFIG_SMP)  */
>> diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c
>> index 50980bf3..5972520 100644
>> --- a/arch/mips/kernel/machine_kexec.c
>> +++ b/arch/mips/kernel/machine_kexec.c
>> @@ -25,6 +25,7 @@ void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
>>   #ifdef CONFIG_SMP
>>   void (*relocated_kexec_smp_wait) (void *);
>>   atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
>> +void (*_crash_smp_send_stop)(void) = NULL;
>>   #endif
>>   
>>   int
>>
>>
> Can any mips people review this patch and have a test?
>
> Thanks
> Dave
>




More information about the kexec mailing list