[PATCH v5 5/6] crash: forward memory_notify args to arch crash hotplug handler

Eric DeVolder eric.devolder at oracle.com
Tue Nov 22 10:03:15 PST 2022



On 11/20/22 17:25, Sourabh Jain wrote:
> The way memory hot remove is handled on PowerPC, it is hard to update
> the elfcorehdr without memory_notify args.
> 
> On PowePC memblock data structure is used to prepare elfcorehdr for kdump.
> Since the notifier used for memory hotplug crash handler get initiated
> before the memblock data structure update happens (as depicted below),
> the newly prepared elfcorehdr still holds the old memory regions. So if
> the system crash with obsolete elfcorehdr, makedumpfile failed to collect
> vmcore.
> 
> Sequence of actions done on PowerPC to serve the memory hot remove:
> 
>   Initiate memory hot remove
>            |
>            v
>   offline pages
>            |
>            v
>   initiate memory notify call chain
>   for MEM_OFFLINE event.
>   (same is used for crash update)
>            |
>            v
>   prepare new elfcorehdr for kdump using
>   memblock data structure
>            |
>            v
>   update memblock data structure
> 
> How passing memory_notify to arch crash hotplug handler will help?
> 
> memory_notify holds the start PFN and page count, with that base address
> and size of hot unplugged memory can calculated and same can be use to
> avoid hot unplugged memory region to get added in the elfcorehdr.
> 
> Signed-off-by: Sourabh Jain <sourabhjain at linux.ibm.com>
> ---
>   arch/powerpc/include/asm/kexec.h |  2 +-
>   arch/powerpc/kexec/core_64.c     |  3 ++-
>   arch/x86/include/asm/kexec.h     |  2 +-
>   arch/x86/kernel/crash.c          |  3 ++-
>   include/linux/kexec.h            |  3 ++-
>   kernel/crash_core.c              | 16 +++++++---------
>   6 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index a4c0a035cb407..f32126a22f6ed 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -109,7 +109,7 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges);
>   int machine_kexec_post_load(struct kimage *image);
>   #define machine_kexec_post_load machine_kexec_post_load
>   
> -void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action);
> +void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action, void *arg);

Be aware in the latest patch series, the hp_action argument was removed. You'll need to introduce it 
again since ppc needs it.

>   #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>   
>   #endif
> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> index 3dea1ce6b469c..6803d7e352a96 100644
> --- a/arch/powerpc/kexec/core_64.c
> +++ b/arch/powerpc/kexec/core_64.c
> @@ -575,11 +575,12 @@ int update_cpus_node(void *fdt)
>    * arch_crash_hotplug_handler() - Handle hotplug kexec segements changes FDT, elfcorehdr
>    * @image: the active struct kimage
>    * @hp_action: the hot un/plug action being handled
> + * @arg: struct memory_notify data handler
>    *
>    * To accurately reflect CPU hot un/plug changes, the FDT must be updated with the
>    * new list of CPUs.
>    */
> -void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action)
> +void arch_crash_handle_hotplug_event(struct kimage *image, unsigned int hp_action, void *arg)
>   {
>   	void *fdt;
>   
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index d72d347bd1d3b..e105a6b8a51b6 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -213,7 +213,7 @@ extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
>   extern void kdump_nmi_shootdown_cpus(void);
>   
>   void arch_crash_handle_hotplug_event(struct kimage *image,
> -				    unsigned int hp_action);
> +				     unsigned int hp_action, void *arg);
>   #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>   
>   #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 2687acf28977f..428121560c3cd 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -457,13 +457,14 @@ int crash_load_segments(struct kimage *image)
>    * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
>    * @image: the active struct kimage
>    * @hp_action: the hot un/plug action being handled
> + * @arg: struct memory_notify data handler
>    *
>    * To accurately reflect hot un/plug changes, the new elfcorehdr
>    * is prepared in a kernel buffer, and then it is written on top
>    * of the existing/old elfcorehdr.
>    */
>   void arch_crash_handle_hotplug_event(struct kimage *image,
> -				    unsigned int hp_action)
> +				     unsigned int hp_action, void *arg)
>   {
>   	unsigned long mem, memsz;
>   	unsigned long elfsz = 0;
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index e2dbbcbf37dcc..43b668484264b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -537,7 +537,8 @@ static inline void arch_unmap_crash_pages(void *ptr)
>   
>   #ifndef arch_crash_handle_hotplug_event
>   static inline void arch_crash_handle_hotplug_event(struct kimage *image,
> -						  unsigned int hp_action)
> +						   unsigned int hp_action,
> +						   void *arg)
>   {
>   }
>   #endif
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index f6cccdcadc9f3..3132466b5e429 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -641,7 +641,7 @@ subsys_initcall(crash_save_vmcoreinfo_init);
>    * list of segments it checks (since the elfcorehdr changes and thus
>    * would require an update to purgatory itself to update the digest).
>    */
> -static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu, void *arg)
>   {
>   	/* Obtain lock while changing crash information */
>   	if (kexec_trylock()) {
> @@ -704,7 +704,7 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>   					cpu : KEXEC_CRASH_HP_INVALID_CPU;
>   
>   			/* Now invoke arch-specific update handler */
> -			arch_crash_handle_hotplug_event(image, hp_action);
> +			arch_crash_handle_hotplug_event(image, hp_action, arg);
>   
>   			/* No longer handling a hotplug event */
>   			image->hotplug_event = false;
> @@ -719,17 +719,15 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>   	}
>   }
>   
> -static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *arg)
>   {
>   	switch (val) {
>   	case MEM_ONLINE:
> -		handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
> -			KEXEC_CRASH_HP_INVALID_CPU);
> +		handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0, arg);
>   		break;
>   
>   	case MEM_OFFLINE:
> -		handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
> -			KEXEC_CRASH_HP_INVALID_CPU);
> +		handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0, arg);
>   		break;
>   	}
>   	return NOTIFY_OK;
> @@ -742,13 +740,13 @@ static struct notifier_block crash_memhp_nb = {
>   
>   static int crash_cpuhp_online(unsigned int cpu)
>   {
> -	handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
> +	handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu, NULL);
>   	return 0;
>   }
>   
>   static int crash_cpuhp_offline(unsigned int cpu)
>   {
> -	handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
> +	handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu, NULL);
>   	return 0;
>   }
>   
> 

Otherwise this lgtm.
eric



More information about the kexec mailing list