[PATCH v20 2/8] crash: add generic infrastructure for crash hotplug support

Eric DeVolder eric.devolder at oracle.com
Mon Mar 20 08:06:29 PDT 2023



On 3/20/23 03:13, Baoquan He wrote:
> On 03/17/23 at 05:21pm, Eric DeVolder wrote:
> ......
>> @@ -697,3 +700,137 @@ static int __init crash_save_vmcoreinfo_init(void)
>>   }
>>   
>>   subsys_initcall(crash_save_vmcoreinfo_init);
>> +
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> +#undef pr_fmt
>> +#define pr_fmt(fmt) "crash hp: " fmt
>> +/*
>> + * To accurately reflect hot un/plug changes of cpu and memory resources
>> + * (including onling and offlining of those resources), the elfcorehdr
>> + * (which is passed to the crash kernel via the elfcorehdr= parameter)
>> + * must be updated with the new list of CPUs and memories.
>> + *
>> + * In order to make changes to elfcorehdr, two conditions are needed:
>> + * First, the segment containing the elfcorehdr must be large enough
>> + * to permit a growing number of resources; the elfcorehdr memory size
>> + * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
>> + * Second, purgatory must explicitly exclude the elfcorehdr from the
>> + * list of segments it checks (since the elfcorehdr changes and thus
>> + * would require an update to purgatory itself to update the digest).
>> + */
>> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>> +{
>> +	/* Obtain lock while changing crash information */
>> +	if (!kexec_trylock())
>> +		return;
>> +
>> +	/* Check kdump is loaded */
>> +	if (kexec_crash_image) {
> 
> Here, what I mean is:
> 
> 	/* Obtain lock while changing crash information */
> 	if (!kexec_trylock())
> 		return;
> 
> 	/*If kdump is not loaded*/
> 	if (!kexec_crash_image)
> 		goto out;	
> 
> Then we reduce one tab of indentation for the following code block, e.g
> the for loop block will have smaller pressure on breaking the 80 chars
> limitation.
> 

Ah, yes, ok. I'll make that change. Do you prefer I post that soon, or give this v20 some more time?
eric

>> +		struct kimage *image = kexec_crash_image;
>> +
>> +		if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>> +			hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>> +			pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>> +		else
>> +			pr_debug("hp_action %u\n", hp_action);
>> +
>> +		/*
>> +		 * When the struct kimage is allocated, the elfcorehdr_index
>> +		 * is set to -1. Find the segment containing the elfcorehdr,
>> +		 * if not already found. This works for both the kexec_load
>> +		 * and kexec_file_load paths.
>> +		 */
>> +		if (image->elfcorehdr_index < 0) {
>> +			unsigned long mem;
>> +			unsigned char *ptr;
>> +			unsigned int n;
>> +
>> +			for (n = 0; n < image->nr_segments; n++) {
>> +				mem = image->segment[n].mem;
>> +				ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
>> +				if (ptr) {
>> +					/* The segment containing elfcorehdr */
>> +					if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
>> +						image->elfcorehdr_index = (int)n;
>> +					kunmap_local(ptr);
>> +				}
>> +			}
>> +		}
>> +
>> +		if (image->elfcorehdr_index < 0) {
>> +			pr_err("unable to locate elfcorehdr segment");
>> +			goto out;
>> +		}
>> +
>> +		/* Needed in order for the segments to be updated */
>> +		arch_kexec_unprotect_crashkres();
>> +
>> +		/* Differentiate between normal load and hotplug update */
>> +		image->hp_action = hp_action;
>> +
>> +		/* Now invoke arch-specific update handler */
>> +		arch_crash_handle_hotplug_event(image);
>> +
>> +		/* No longer handling a hotplug event */
>> +		image->hp_action = KEXEC_CRASH_HP_NONE;
>> +		image->elfcorehdr_updated = true;
>> +
>> +		/* Change back to read-only */
>> +		arch_kexec_protect_crashkres();
>> +	}
>> +
>> +out:
>> +	/* Release lock now that update complete */
>> +	kexec_unlock();
>> +}
>> +
>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>> +{
>> +	switch (val) {
>> +	case MEM_ONLINE:
>> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +
>> +	case MEM_OFFLINE:
>> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +	}
>> +	return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block crash_memhp_nb = {
>> +	.notifier_call = crash_memhp_notifier,
>> +	.priority = 0
>> +};
>> +
>> +static int crash_cpuhp_online(unsigned int cpu)
>> +{
>> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int crash_cpuhp_offline(unsigned int cpu)
>> +{
>> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int __init crash_hotplug_init(void)
>> +{
>> +	int result = 0;
>> +
>> +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
>> +		register_memory_notifier(&crash_memhp_nb);
>> +
>> +	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
>> +		result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
>> +			"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
>> +	}
>> +
>> +	return result;
>> +}
>> +
>> +subsys_initcall(crash_hotplug_init);
>> +#endif
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index 3d578c6fefee..8296d019737c 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -277,6 +277,12 @@ struct kimage *do_kimage_alloc_init(void)
>>   	/* Initialize the list of unusable pages */
>>   	INIT_LIST_HEAD(&image->unusable_pages);
>>   
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> +	image->hp_action = KEXEC_CRASH_HP_NONE;
>> +	image->elfcorehdr_index = -1;
>> +	image->elfcorehdr_updated = false;
>> +#endif
>> +
>>   	return image;
>>   }
>>   
>> -- 
>> 2.31.1
>>
> 



More information about the kexec mailing list