[PATCH v5 5/6] crash: forward memory_notify args to arch crash hotplug handler
Eric DeVolder
eric.devolder at oracle.com
Tue Nov 22 10:03:15 PST 2022
On 11/20/22 17:25, Sourabh Jain wrote:
> The way memory hot remove is handled on PowerPC, it is hard to update
> the elfcorehdr without memory_notify args.
>
> On PowePC memblock data structure is used to prepare elfcorehdr for kdump.
> Since the notifier used for memory hotplug crash handler get initiated
> before the memblock data structure update happens (as depicted below),
> the newly prepared elfcorehdr still holds the old memory regions. So if
> the system crash with obsolete elfcorehdr, makedumpfile failed to collect
> vmcore.
>
> Sequence of actions done on PowerPC to serve the memory hot remove:
>
> Initiate memory hot remove
> |
> v
> offline pages
> |
> v
> initiate memory notify call chain
> for MEM_OFFLINE event.
> (same is used for crash update)
> |
> v
> prepare new elfcorehdr for kdump using
> memblock data structure
> |
> v
> update memblock data structure
>
> How passing memory_notify to arch crash hotplug handler will help?
>
> memory_notify holds the start PFN and page count, with that base address
> and size of hot unplugged memory can calculated and same can be use to
> avoid hot unplugged memory region to get added in the elfcorehdr.
>
> Signed-off-by: Sourabh Jain <sourabhjain at linux.ibm.com>
> ---
> arch/powerpc/include/asm/kexec.h | 2 +-
> arch/powerpc/kexec/core_64.c | 3 ++-
> arch/x86/include/asm/kexec.h | 2 +-
> arch/x86/kernel/crash.c | 3 ++-
> include/linux/kexec.h | 3 ++-
> kernel/crash_core.c | 16 +++++++---------
> 6 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index a4c0a035cb407..f32126a22f6ed 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -109,7 +109,7 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges);
> int machine_kexec_post_load(struct kimage *image);
> #define machine_kexec_post_load machine_kexec_post_load
>
> -void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action);
> +void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action, void *arg);
Be aware in the latest patch series, the hp_action argument was removed. You'll need to introduce it
again since ppc needs it.
> #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>
> #endif
> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> index 3dea1ce6b469c..6803d7e352a96 100644
> --- a/arch/powerpc/kexec/core_64.c
> +++ b/arch/powerpc/kexec/core_64.c
> @@ -575,11 +575,12 @@ int update_cpus_node(void *fdt)
> * arch_crash_hotplug_handler() - Handle hotplug kexec segements changes FDT, elfcorehdr
> * @image: the active struct kimage
> * @hp_action: the hot un/plug action being handled
> + * @arg: struct memory_notify data handler
> *
> * To accurately reflect CPU hot un/plug changes, the FDT must be updated with the
> * new list of CPUs.
> */
> -void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action)
> +void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action, void *arg)
> {
> void *fdt;
>
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index d72d347bd1d3b..e105a6b8a51b6 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -213,7 +213,7 @@ extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
> extern void kdump_nmi_shootdown_cpus(void);
>
> void arch_crash_handle_hotplug_event(struct kimage *image,
> - unsigned int hp_action);
> + unsigned int hp_action, void *arg);
> #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>
> #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 2687acf28977f..428121560c3cd 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -457,13 +457,14 @@ int crash_load_segments(struct kimage *image)
> * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
> * @image: the active struct kimage
> * @hp_action: the hot un/plug action being handled
> + * @arg: struct memory_notify data handler
> *
> * To accurately reflect hot un/plug changes, the new elfcorehdr
> * is prepared in a kernel buffer, and then it is written on top
> * of the existing/old elfcorehdr.
> */
> void arch_crash_handle_hotplug_event(struct kimage *image,
> - unsigned int hp_action)
> + unsigned int hp_action, void *arg)
> {
> unsigned long mem, memsz;
> unsigned long elfsz = 0;
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index e2dbbcbf37dcc..43b668484264b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -537,7 +537,8 @@ static inline void arch_unmap_crash_pages(void *ptr)
>
> #ifndef arch_crash_handle_hotplug_event
> static inline void arch_crash_handle_hotplug_event(struct kimage *image,
> - unsigned int hp_action)
> + unsigned int hp_action,
> + void *arg)
> {
> }
> #endif
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index f6cccdcadc9f3..3132466b5e429 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -641,7 +641,7 @@ subsys_initcall(crash_save_vmcoreinfo_init);
> * list of segments it checks (since the elfcorehdr changes and thus
> * would require an update to purgatory itself to update the digest).
> */
> -static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu, void *arg)
> {
> /* Obtain lock while changing crash information */
> if (kexec_trylock()) {
> @@ -704,7 +704,7 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> cpu : KEXEC_CRASH_HP_INVALID_CPU;
>
> /* Now invoke arch-specific update handler */
> - arch_crash_handle_hotplug_event(image, hp_action);
> + arch_crash_handle_hotplug_event(image, hp_action, arg);
>
> /* No longer handling a hotplug event */
> image->hotplug_event = false;
> @@ -719,17 +719,15 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> }
> }
>
> -static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *arg)
> {
> switch (val) {
> case MEM_ONLINE:
> - handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
> - KEXEC_CRASH_HP_INVALID_CPU);
> + handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0, arg);
> break;
>
> case MEM_OFFLINE:
> - handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
> - KEXEC_CRASH_HP_INVALID_CPU);
> + handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0, arg);
> break;
> }
> return NOTIFY_OK;
> @@ -742,13 +740,13 @@ static struct notifier_block crash_memhp_nb = {
>
> static int crash_cpuhp_online(unsigned int cpu)
> {
> - handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
> + handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu, NULL);
> return 0;
> }
>
> static int crash_cpuhp_offline(unsigned int cpu)
> {
> - handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
> + handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu, NULL);
> return 0;
> }
>
>
Otherwise this lgtm.
eric
More information about the kexec
mailing list