[PATCH v10 4/5] crash: forward memory_notify args to arch crash hotplug handler

Eric DeVolder eric.devolder at oracle.com
Mon Apr 24 07:02:17 PDT 2023



On 4/23/23 05:52, Sourabh Jain wrote:
> On PowePC memblock regions are used to prepare elfcorehdr which
s/PowePC/PowerPC/

> describes the memory regions of the running kernel to the kdump kernel.
> Since the notifier used for the memory hotplug crash handler gets
> initiated before the update of the memblock region happens (as depicted
> below) the newly prepared elfcorehdr still holds the old memory regions.
> If the elfcorehdr is prepared with stale memblock regions then the newly
> prepared elfcorehdr will still be holding stale memory regions. And dump
> collection with stale elfcorehdr will lead to dump collection failure or
> incomplete dump collection.
> 
> The sequence of actions done on PowerPC when an LMB memory hot removed:
> 
>   Initiate memory hot remove
>            |
>            v
>   offline pages
>            |
>            v
>   initiate memory notify call
>   chain for MEM_OFFLINE event  <---> Prepare new elfcorehdr and replace
>   				    it with old one
>            |
>            v
>   update memblock regions
> 
> Such challenges only exist for memory remove case. For the memory add
> case the memory regions are updated first and then memory notify calls
> the arch crash hotplug handler to update the elfcorehdr.
> 
> This patch passes additional information about the hot removed LMB to
> the arch crash hotplug handler in the form of memory_notify object.
> 
> How passing memory_notify to arch crash hotplug handler will help?
> 
> memory_notify holds the start PFN and page count of the hot removed
> memory. With that base address and the size of the hot removed memory
> can be calculated and same can be used to avoid adding hot removed
> memory region to get added in the elfcorehdr.
> 
> Signed-off-by: Sourabh Jain <sourabhjain at linux.ibm.com>
> Reviewed-by: Laurent Dufour <laurent.dufour at fr.ibm.com>
> ---
>   arch/powerpc/include/asm/kexec.h |  2 +-
>   arch/powerpc/kexec/core_64.c     |  3 ++-
>   arch/x86/include/asm/kexec.h     |  2 +-
>   arch/x86/kernel/crash.c          |  3 ++-
>   include/linux/kexec.h            |  2 +-
>   kernel/crash_core.c              | 14 +++++++-------
>   6 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index f01ba767af56e..7e811bad5ec92 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -104,7 +104,7 @@ struct crash_mem;
>   int update_cpus_node(void *fdt);
>   int get_crash_memory_ranges(struct crash_mem **mem_ranges);
>   #if defined(CONFIG_CRASH_HOTPLUG)
> -void arch_crash_handle_hotplug_event(struct kimage *image);
> +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
>   #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>   #endif
>   #endif
> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> index 611b89bcea2be..147ea6288a526 100644
> --- a/arch/powerpc/kexec/core_64.c
> +++ b/arch/powerpc/kexec/core_64.c
> @@ -551,10 +551,11 @@ int update_cpus_node(void *fdt)
>    * arch_crash_hotplug_handler() - Handle crash CPU/Memory hotplug events to update the
>    *                                necessary kexec segments based on the hotplug event.
s/arch/crash_hotplug_handler/arch_crash_handle_hotplug_event/

>    * @image: the active struct kimage
> + * @arg: struct memory_notify handler for memory add/remove case and NULL for CPU case.
>    *
>    * Update FDT segment to include newly added CPU. No action for CPU remove case.
>    */
> -void arch_crash_handle_hotplug_event(struct kimage *image)
> +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
>   {
>   	void *fdt, *ptr;
>   	unsigned long mem;
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index 1bc852ce347d4..70c3b23b468b6 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -213,7 +213,7 @@ extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
>   extern void kdump_nmi_shootdown_cpus(void);
>   
>   #ifdef CONFIG_CRASH_HOTPLUG
> -void arch_crash_handle_hotplug_event(struct kimage *image);
> +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
>   #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
>   
>   #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index ead602636f3e0..b45d13193b579 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -445,11 +445,12 @@ int crash_load_segments(struct kimage *image)
>   /**
>    * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
>    * @image: the active struct kimage
> + * @arg: struct memory_notify handler for memory add/remove case and NULL for CPU case.
>    *
>    * The new elfcorehdr is prepared in a kernel buffer, and then it is
>    * written on top of the existing/old elfcorehdr.
>    */
> -void arch_crash_handle_hotplug_event(struct kimage *image)
> +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
>   {
>   	void *elfbuf = NULL, *old_elfcorehdr;
>   	unsigned long nr_mem_ranges;
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 0ac41f48de0b1..69765e6a92d0d 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -506,7 +506,7 @@ static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) {
>   #endif
>   
>   #ifndef arch_crash_handle_hotplug_event
> -static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
> +static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { }
>   #endif
>   
>   #ifndef crash_hotplug_cpu_support
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 4aa3c7a6b390f..7afe5f60d2bfe 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -718,7 +718,7 @@ subsys_initcall(crash_save_vmcoreinfo_init);
>    * list of segments it checks (since the elfcorehdr changes and thus
>    * would require an update to purgatory itself to update the digest).
>    */
> -static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu, void *arg)
>   {
>   	struct kimage *image;
>   
> @@ -775,7 +775,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>   	image->hp_action = hp_action;
>   
>   	/* Now invoke arch-specific update handler */
> -	arch_crash_handle_hotplug_event(image);
> +	arch_crash_handle_hotplug_event(image, arg);
>   
>   	/* No longer handling a hotplug event */
>   	image->hp_action = KEXEC_CRASH_HP_NONE;
> @@ -789,17 +789,17 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>   	kexec_unlock();
>   }
>   
> -static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *arg)
>   {
>   	switch (val) {
>   	case MEM_ONLINE:
>   		crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
> -			KEXEC_CRASH_HP_INVALID_CPU);
> +			KEXEC_CRASH_HP_INVALID_CPU, arg);
>   		break;
>   
>   	case MEM_OFFLINE:
>   		crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
> -			KEXEC_CRASH_HP_INVALID_CPU);
> +			KEXEC_CRASH_HP_INVALID_CPU, arg);
>   		break;
>   	}
>   	return NOTIFY_OK;
> @@ -812,13 +812,13 @@ static struct notifier_block crash_memhp_nb = {
>   
>   static int crash_cpuhp_online(unsigned int cpu)
>   {
> -	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu, NULL);
>   	return 0;
>   }
>   
>   static int crash_cpuhp_offline(unsigned int cpu)
>   {
> -	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu, NULL);
>   	return 0;
>   }
>   



More information about the kexec mailing list