[PATCH v5 09/14] efi: passing kexec necessary efi data via setup_data

Borislav Petkov bp at alien8.de
Wed Dec 11 17:20:57 EST 2013


On Mon, Dec 09, 2013 at 05:42:22PM +0800, Dave Young wrote:
> Add a new setup_data type SETUP_EFI for kexec use.
> Passing the saved fw_vendor, runtime, config tables and efi runtime mappings.
> 
> When entering virtual mode, directly mapping the efi runtime ragions which
> we passed in previously. And skip the step to call SetVirtualAddressMap.
> 
> Specially for HP z420 workstation we need save the smbios physical address.
> The kernel boot sequence proceeds in the following order.  Step 2
> requires efi.smbios to be the physical address.  However, I found that on
> HP z420 EFI system table has a virtual address of SMBIOS in step 1.  Hence,
> we need set it back to the physical address with the smbios in
> efi_setup_data.  (When it is still the physical address, it simply sets
> the same value.)
> 
> 1. efi_init() - Set efi.smbios from EFI system table
> 2. dmi_scan_machine() - Temporary map efi.smbios to access SMBIOS table
> 3. efi_enter_virtual_mode() - Map EFI ranges
> 
> Tested on ovmf+qemu, lenovo thinkpad, a dell laptop and an
> HP z420 workstation.
> 
> v2: refresh based on previous patch changes, code cleanup.
> v3: use ioremap instead of phys_to_virt for efi_setup
> v5: improve some code structure per comments from Matt
>     Boris: improve code structure, spell fix, etc.
>     Improve changelog from Toshi.
>     change the variable efi_setup to the physical address of efi setup_data
>     instead of the ioremapped virt address
> 
> Signed-off-by: Dave Young <dyoung at redhat.com>
> ---
>  arch/x86/include/asm/efi.h            |  11 ++
>  arch/x86/include/uapi/asm/bootparam.h |   1 +
>  arch/x86/kernel/setup.c               |   3 +
>  arch/x86/platform/efi/efi.c           | 195 ++++++++++++++++++++++++++++++----
>  4 files changed, 187 insertions(+), 23 deletions(-)

...

> @@ -115,6 +116,25 @@ static int __init setup_storage_paranoia(char *arg)
>  }
>  early_param("efi_no_storage_paranoia", setup_storage_paranoia);
>  
> +void __init parse_efi_setup(u64 phys_addr)
> +{
> +	struct setup_data *sd;
> +
> +	if (!efi_enabled(EFI_64BIT)) {
> +		pr_warn("SETUP_EFI not supported on 32-bit\n");
> +		return;
> +	}

Shouldn't this function be in two versions in efi_64.c and efi_32.c?
This way you don't need this check with cryptic printk message.

> +
> +	sd = early_memremap(phys_addr, sizeof(struct setup_data));
> +	if (!sd) {
> +		pr_warn("efi: early_memremap setup_data failed\n");
> +		return;
> +	}
> +	efi_setup = phys_addr + sizeof(struct setup_data);
> +	nr_efi_runtime_map = (sd->len - sizeof(struct efi_setup_data)) /
> +			     sizeof(efi_memory_desc_t);
> +	early_memunmap(sd, sizeof(struct setup_data));
> +}
>  
>  static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
>  {
> @@ -494,18 +514,28 @@ static int __init efi_systab_init(void *phys)
>  {
>  	if (efi_enabled(EFI_64BIT)) {
>  		efi_system_table_64_t *systab64;
> +		struct efi_setup_data *data = NULL;
>  		u64 tmp = 0;
>  
> +		if (efi_setup) {
> +			data = early_memremap(efi_setup, sizeof(*data));
> +			if (!data)
> +				return -ENOMEM;
> +		}
>  		systab64 = early_memremap((unsigned long)phys,
>  					 sizeof(*systab64));
>  		if (systab64 == NULL) {
>  			pr_err("Couldn't map the system table!\n");
> +			if (data)
> +				early_memunmap(data, sizeof(*data));
>  			return -ENOMEM;
>  		}
>  
>  		efi_systab.hdr = systab64->hdr;
> -		efi_systab.fw_vendor = systab64->fw_vendor;
> -		tmp |= systab64->fw_vendor;
> +
> +		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
> +					      systab64->fw_vendor;
> +		tmp |= efi_systab.fw_vendor;
>  		efi_systab.fw_revision = systab64->fw_revision;
>  		efi_systab.con_in_handle = systab64->con_in_handle;
>  		tmp |= systab64->con_in_handle;
> @@ -519,15 +549,20 @@ static int __init efi_systab_init(void *phys)
>  		tmp |= systab64->stderr_handle;
>  		efi_systab.stderr = systab64->stderr;
>  		tmp |= systab64->stderr;
> -		efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
> -		tmp |= systab64->runtime;
> +		efi_systab.runtime = data ?
> +				     (void *)(unsigned long)data->runtime :
> +				     (void *)(unsigned long)systab64->runtime;
> +		tmp |= (unsigned long)efi_systab.runtime;
>  		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
>  		tmp |= systab64->boottime;
>  		efi_systab.nr_tables = systab64->nr_tables;
> -		efi_systab.tables = systab64->tables;
> -		tmp |= systab64->tables;
> +		efi_systab.tables = data ? (unsigned long)data->tables :
> +					   systab64->tables;
> +		tmp |= efi_systab.tables;
>  
>  		early_memunmap(systab64, sizeof(*systab64));
> +		if (data)
> +			early_memunmap(data, sizeof(*data));
>  #ifdef CONFIG_X86_32
>  		if (tmp >> 32) {
>  			pr_err("EFI data located above 4GB, disabling EFI.\n");
> @@ -631,6 +666,61 @@ static int __init efi_memmap_init(void)
>  	return 0;
>  }
>  
> +/*
> + * For kexec kernel there's some special config table entries which could be
> + * converted to virtual addresses after entering virtual mode. In kexec kernel
> + * we need the physical addresses instead, thus passing them via setup_data
> + * and update the entries to physical addresses in this function.

Rewrite:

"A number of config table entries get remapped to virtual addresses
after entering EFI virtual mode. However, the kexec kernel requires
their physical addresses therefore we pass them via setup_data and
correct those entries to their respective physical addresses here."

> + *
> + * Currently only handles smbios which is necessary for HP z420.

Didn't we say that this behavior is coming from a generic UEFI fw
implementation and if so, no need to mention z420?

> + */
> +static int __init efi_reuse_config(u64 tables, int nr_tables)
> +{
> +	int i, sz, ret = 0;
> +	void *p, *tablep;
> +	struct efi_setup_data *data;
> +
> +	if (!efi_setup)
> +		return 0;
> +
> +	if (!efi_enabled(EFI_64BIT))
> +		return 0;
> +
> +	data = early_memremap(efi_setup, sizeof(*data));
> +	if (!data) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	if (!data->smbios)
> +		goto out_memremap;
> +
> +	sz = sizeof(efi_config_table_64_t);
> +
> +	p = tablep = early_memremap(tables, nr_tables * sz);
> +	if (!p) {
> +		pr_err("Could not map Configuration table!\n");
> +		ret = -ENOMEM;
> +		goto out_memremap;
> +	}
> +
> +	for (i = 0; i < efi.systab->nr_tables; i++) {
> +		efi_guid_t guid;
> +
> +		guid = ((efi_config_table_64_t *)p)->guid;
> +
> +		if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
> +			((efi_config_table_64_t *)p)->table = data->smbios;
> +		p += sz;
> +	}
> +	early_memunmap(tablep, nr_tables * sz);
> +
> +out_memremap:
> +	early_memunmap(data, sizeof(*data));
> +out:
> +	return ret;
> +}
> +
>  void __init efi_init(void)
>  {
>  	efi_char16_t *c16;
> @@ -676,6 +766,8 @@ void __init efi_init(void)
>  		efi.systab->hdr.revision >> 16,
>  		efi.systab->hdr.revision & 0xffff, vendor);
>  
> +	efi_reuse_config(efi.systab->tables, efi.systab->nr_tables);
> +
>  	if (efi_config_init(arch_tables))
>  		return;
>  
> @@ -886,6 +978,50 @@ out_krealloc:
>  }
>  
>  /*
> + * Map efi regions which was passed via setup_data. The virt_addr is a fixed

			   were

> + * addr which was used in first kernel in case kexec boot.
						 ^
						of a

> + */
> +static int __init map_regions_fixed(void)
> +{
> +	int i, s, ret = 0;
> +	u64 end, systab;
> +	unsigned long size;
> +	efi_memory_desc_t *md;
> +	struct efi_setup_data *data;
> +
> +	s = sizeof(*data) + nr_efi_runtime_map * sizeof(data->map[0]);
> +	data = early_memremap(efi_setup, s);
> +	if (!data) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}

newline.

> +	for (i = 0, md = data->map; i < nr_efi_runtime_map; i++, md++) {
> +		efi_map_region_fixed(md); /* FIXME: add error handling */
> +		size = md->num_pages << PAGE_SHIFT;
> +		end = md->phys_addr + size;
> +
> +		systab = (u64) (unsigned long) efi_phys.systab;
> +		if (md->phys_addr <= systab && systab < end) {
> +			systab += md->virt_addr - md->phys_addr;
> +			efi.systab = (efi_system_table_t *)(unsigned long)systab;
> +		}
> +		ret = save_runtime_map(md, i);

Wait a minute, this is executed in the second, kexec-ed kernel, right?
Why do we need to save the map there too?

> +		if (ret)
> +			goto out_save_runtime;
> +	}
> +
> +	early_memunmap(data, s);
> +	return 0;
> +
> +out_save_runtime:
> +	kfree(efi_runtime_map);
> +	nr_efi_runtime_map = 0;
> +	early_memunmap(data, s);
> +out:
> +	return ret;
> +}
> +
> +/*
>   * This function will switch the EFI runtime services to virtual mode.
>   * Essentially, we look through the EFI memmap and map every region that
>   * has the runtime attribute bit set in its memory descriptor into the
> @@ -901,12 +1037,16 @@ out_krealloc:
>   * so that we're in a different address space when calling a runtime
>   * function. For function arguments passing we do copy the PGDs of the
>   * kernel page table into ->trampoline_pgd prior to each call.
> + *
> + * Specially for kexec boot, efi runtime maps in previous kernel should
> + * be passed in via setup_data. In that case runtime ranges will be mapped
> + * to the same virtual addresses exactly same as the ones in previous kernel.

"... to the same ..exactly same as ... " sounds funny. What's wrong with

"... to the same virtual addresses as the first kernel."

or if you really insist on "exact":

"... to the same exact virtual addresses as the first kernel."

>   */
>  void __init efi_enter_virtual_mode(void)
>  {

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--



More information about the kexec mailing list