[PATCH v1 07/11] fs/proc/vmcore: introduce PROC_VMCORE_DEVICE_RAM to detect device RAM ranges in 2nd kernel

Thu Nov 21 23:31:19 PST 2024

On 10/25/24 at 05:11pm, David Hildenbrand wrote:
......snip...
> diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
> index 3e90416ee54e..c332a9a4920b 100644
> --- a/fs/proc/vmcore.c
> +++ b/fs/proc/vmcore.c
> @@ -69,6 +69,8 @@ static LIST_HEAD(vmcore_cb_list);
>  /* Whether the vmcore has been opened once. */
>  static bool vmcore_opened;
>  
> +static void vmcore_process_device_ram(struct vmcore_cb *cb);
> +
>  void register_vmcore_cb(struct vmcore_cb *cb)
>  {
>  	INIT_LIST_HEAD(&cb->next);
> @@ -80,6 +82,8 @@ void register_vmcore_cb(struct vmcore_cb *cb)
>  	 */
>  	if (vmcore_opened)
>  		pr_warn_once("Unexpected vmcore callback registration\n");
> +	else if (cb->get_device_ram)
> +		vmcore_process_device_ram(cb);

Global variable 'vmcore_opened' is used to indicate if /proc/vmcore is
opened. With &vmcore_mutex, we don't need to worry about concurrent
opening and modification. However, if people just open /proc/vmcore and
close it after checking, then s390 will miss the vmcore dumping, is it
acceptable?

>  	mutex_unlock(&vmcore_mutex);
>  }
>  EXPORT_SYMBOL_GPL(register_vmcore_cb);
> @@ -1511,6 +1515,158 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
......
> +
> +static void vmcore_process_device_ram(struct vmcore_cb *cb)
> +{
> +	unsigned char *e_ident = (unsigned char *)elfcorebuf;
> +	struct vmcore_mem_node *first, *m;
> +	LIST_HEAD(list);
> +	int count;
> +
> +	if (cb->get_device_ram(cb, &list)) {
> +		pr_err("Kdump: obtaining device ram ranges failed\n");
> +		return;
> +	}
> +	count = list_count_nodes(&list);
> +	if (!count)
> +		return;
> +
> +	/* We only support Elf64 dumps for now. */
> +	if (WARN_ON_ONCE(e_ident[EI_CLASS] != ELFCLASS64)) {
> +		pr_err("Kdump: device ram ranges only support Elf64\n");
> +		goto out_free;
> +	}

Only supporting Elf64 dumps seems to be a basic checking, do we need
to put it at the beginning of function? Otherwise, we spend efforts to
call cb->get_device_ram(), then fail.

> +
> +	/*
> +	 * For some reason these ranges are already know? Might happen
> +	 * with unusual register->unregister->register sequences; we'll simply
> +	 * sanity check using the first range.
> +	 */
> +	first = list_first_entry(&list, struct vmcore_mem_node, list);
> +	list_for_each_entry(m, &vmcore_list, list) {
> +		unsigned long long m_end = m->paddr + m->size;
> +		unsigned long long first_end = first->paddr + first->size;
> +
> +		if (first->paddr < m_end && m->paddr < first_end)
> +			goto out_free;
> +	}
> +
> +	/* If adding the mem nodes succeeds, they must not be freed. */
> +	if (!vmcore_add_device_ram_elf64(&list, count))
> +		return;
> +out_free:
> +	vmcore_free_mem_nodes(&list);
> +}
> +#else /* !CONFIG_PROC_VMCORE_DEVICE_RAM */
> +static void vmcore_process_device_ram(struct vmcore_cb *cb)
> +{
> +}
> +#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
> +
>  /* Free all dumps in vmcore device dump list */
>  static void vmcore_free_device_dumps(void)
>  {
> diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
> index 722dbcff7371..8e581a053d7f 100644
> --- a/include/linux/crash_dump.h
> +++ b/include/linux/crash_dump.h