[RFC] kexec_file: Add support for purgatory built as PIE

Baoquan He bhe at redhat.com
Fri Nov 4 00:38:40 PDT 2016


On 11/02/16 at 04:00am, Thiago Jung Bauermann wrote:
> Hello,
> 
> The kexec_file code currently builds the purgatory as a partially linked object 
> (using ld -r). Is there a particular reason to use that instead of a position 
> independent executable (PIE)?

It's taken as "-r", relocatable in user space kexec-tools too originally.
I think Vivek just keeps it the same when moving into kernel.

> 
> I found a discussion from 2013 in the archives but from what I understood it 
> was about the purgatory as a separate object vs having it linked into the 
> kernel, which is different from what I'm asking:
> 
> http://lists.infradead.org/pipermail/kexec/2013-December/010535.html
> 
> Here is my motivation for this question:
> 
>  On ppc64 purgatory.ro has 12 relocation types when built as a partially 
> linked object. This makes arch_kexec_apply_relocations_add duplicate a lot of 
> code with module_64.c:apply_relocate_add to implement these relocations. The 
> alternative is to do some refactoring so that both functions can share the 
> implementation of the relocations. This is done in patches 5 and 6 of the 
> kexec_file_load implementation for powerpc:

In user space kexec-tools utility, you also got this problem?

> 
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-October/149984.html
> 
> Michael Ellerman would prefer if module_64.c didn't need to be changed, and 
> suggested that the purgatory could be a position independent executable. 
> Indeed, in that case there are only 4 relocation types in purgatory.ro (which 
> aren't even implemented in module_64.c:apply_relocate_add), so the relocation 
> code for the purgatory can leave that file alone and have its own relocation 
> implementation.
> 
> Also, the purgatory is an executable and not an intermediary output from the 
> compiler, so in my mind it makes sense conceptually that it is easier to build 
> it as a PIE than as a partially linked object.
> 
> The patch below adds the support needed in kexec_file.c to allow powerpc-
> specific code to load and relocate a purgatory binary built as PIE. This is WIP 
> and can probably be refined a bit. Would you accept a change along these lines?
> 
> Signed-off-by: Thiago Jung Bauermann <bauerman at linux.vnet.ibm.com>
> ---
>  arch/Kconfig            |   3 +
>  kernel/kexec_file.c     | 159 ++++++++++++++++++++++++++++++++++++++++++++++--
>  kernel/kexec_internal.h |  26 ++++++++
>  3 files changed, 183 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 659bdd079277..7fd6879be222 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -5,6 +5,9 @@
>  config KEXEC_CORE
>  	bool
>  
> +config HAVE_KEXEC_FILE_PIE_PURGATORY
> +	bool
> +
>  config OPROFILE
>  	tristate "OProfile system profiling"
>  	depends on PROFILING
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index 0c2df7f73792..dfc3e015160d 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -633,7 +633,149 @@ static int kexec_calculate_store_digests(struct kimage *image)
>  	return ret;
>  }
>  
> -/* Actually load purgatory. Lot of code taken from kexec-tools */
> +#ifdef CONFIG_HAVE_KEXEC_FILE_PIE_PURGATORY
> +/* Load PIE purgatory using the program header information. */
> +static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
> +				  unsigned long max, int top_down)
> +{
> +	struct purgatory_info *pi = &image->purgatory_info;
> +	unsigned long first_offset;
> +	unsigned long orig_load_addr = 0;
> +	const void *src;
> +	int i, ret;
> +	const Elf_Phdr *phdrs = (const void *) pi->ehdr + pi->ehdr->e_phoff;
> +	const Elf_Phdr *phdr;
> +	const Elf_Shdr *sechdrs_c;
> +	Elf_Shdr *sechdr;
> +	Elf_Shdr *sechdrs = NULL;
> +	struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
> +				  .buf_min = min, .buf_max = max,
> +				  .top_down = top_down };
> +
> +	/*
> +	 * sechdrs_c points to section headers in purgatory and are read
> +	 * only. No modifications allowed.
> +	 */
> +	sechdrs_c = (void *) pi->ehdr + pi->ehdr->e_shoff;
> +
> +	/*
> +	 * We can not modify sechdrs_c[] and its fields. It is read only.
> +	 * Copy it over to a local copy where one can store some temporary
> +	 * data and free it at the end. We need to modify ->sh_addr and
> +	 * ->sh_offset fields to keep track of permanent and temporary
> +	 * locations of sections.
> +	 */
> +	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
> +	if (!sechdrs)
> +		return -ENOMEM;
> +
> +	memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
> +
> +	/*
> +	 * We seem to have multiple copies of sections. First copy is which
> +	 * is embedded in kernel in read only section. Some of these sections
> +	 * will be copied to a temporary buffer and relocated. And these
> +	 * sections will finally be copied to their final destination at
> +	 * segment load time.
> +	 *
> +	 * Use ->sh_offset to reflect section address in memory. It will
> +	 * point to original read only copy if section is not allocatable.
> +	 * Otherwise it will point to temporary copy which will be relocated.
> +	 *
> +	 * Use ->sh_addr to contain final address of the section where it
> +	 * will go during execution time.
> +	 */
> +	for (sechdr = sechdrs; sechdr < sechdrs + pi->ehdr->e_shnum; sechdr++) {
> +		if (sechdr->sh_type == SHT_NOBITS)
> +			continue;
> +
> +		sechdr->sh_offset = (unsigned long) pi->ehdr + sechdr->sh_offset;
> +	}
> +
> +	/* Determine how much memory is needed to load the executable. */
> +	for (phdr = phdrs; phdr < phdrs + pi->ehdr->e_phnum; phdr++) {
> +		if (phdr->p_type != PT_LOAD)
> +			continue;
> +
> +		if (!orig_load_addr) {
> +			orig_load_addr = phdr->p_vaddr - phdr->p_offset;
> +			kbuf.bufsz = first_offset = phdr->p_offset;
> +		}
> +
> +		if (kbuf.buf_align < phdr->p_align) {
> +			pr_debug("buf_align was %lx, now is %llx\n",
> +				 kbuf.buf_align, phdr->p_align);
> +			kbuf.buf_align = phdr->p_align;
> +		}
> +
> +		kbuf.bufsz += phdr->p_memsz;
> +	}
> +
> +	/* Allocate buffer for purgatory. */
> +	kbuf.buffer = vzalloc(kbuf.bufsz);
> +	if (!kbuf.buffer) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	/* Add buffer to segment list. */
> +	kbuf.memsz = kbuf.bufsz;
> +	ret = kexec_add_buffer(&kbuf);
> +	if (ret)
> +		goto out;
> +
> +	pi->purgatory_load_addr = kbuf.mem;
> +
> +	/* Load executable. */
> +	for (phdr = phdrs; phdr < phdrs + pi->ehdr->e_phnum; phdr++) {
> +		if (phdr->p_type != PT_LOAD)
> +			continue;
> +
> +		src = (const void *) pi->ehdr + phdr->p_offset;
> +		memcpy(kbuf.buffer + phdr->p_offset, src, phdr->p_filesz);
> +
> +		pr_debug("loaded segment of size %llx at %llx (base = %lx, offset = %llx)\n",
> +			 phdr->p_memsz, pi->purgatory_load_addr + phdr->p_offset, pi->purgatory_load_addr, phdr->p_offset);
> +
> +		/*
> +		 * Find sections within this segment and update their
> +		 * ->sh_offset to point to within the buffer.
> +		 */
> +		for (i = 0; i < pi->ehdr->e_shnum; i++) {
> +			if (sechdrs[i].sh_addr >= phdr->p_vaddr &&
> +			    sechdrs[i].sh_addr + sechdrs[i].sh_size <= phdr->p_vaddr + phdr->p_memsz) {
> +				sechdrs[i].sh_addr = sechdrs[i].sh_addr - orig_load_addr + pi->purgatory_load_addr;
> +				sechdrs[i].sh_offset = (unsigned long long) kbuf.buffer + sechdrs_c[i].sh_offset;
> +			}
> +		}
> +	}
> +
> +	/* Make kernel jump to purgatory after shutdown */
> +	image->start = pi->ehdr->e_entry - orig_load_addr + pi->purgatory_load_addr;
> +
> +	/* Used later to get/set symbol values */
> +	pi->sechdrs = sechdrs;
> +
> +	/*
> +	 * Used later to identify which section is purgatory and skip it
> +	 * from checksumming.
> +	 */
> +	pi->purgatory_buf = kbuf.buffer;
> +
> +	pr_debug("purgatory entry point at %lx\n", image->start);
> +
> +	return 0;
> +out:
> +	vfree(sechdrs);
> +	vfree(kbuf.buffer);
> +
> +	return ret;
> +}
> +#else /* CONFIG_HAVE_KEXEC_FILE_PIE_PURGATORY */
> +/*
> + * Load relocatable object purgatory using the section header information.
> + * A lot of code taken from kexec-tools.
> + */
>  static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
>  				  unsigned long max, int top_down)
>  {
> @@ -813,6 +955,7 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
>  	vfree(kbuf.buffer);
>  	return ret;
>  }
> +#endif /* CONFIG_HAVE_KEXEC_FILE_PIE_PURGATORY */
>  
>  static int kexec_apply_relocations(struct kimage *image)
>  {
> @@ -886,7 +1029,7 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
>  	pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
>  
>  	if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
> -	    || pi->ehdr->e_type != ET_REL
> +	    || pi->ehdr->e_type != PURGATORY_ELF_TYPE
>  	    || !elf_check_arch(pi->ehdr)
>  	    || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
>  		return -ENOEXEC;
> @@ -942,7 +1085,13 @@ static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
>  
>  		/* Go through symbols for a match */
>  		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
> -			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
> +			/*
> +			 * FIXME: See if we can or should export the .TOC.
> +			 * symbol as global instead of searching local symbols
> +			 * here.
> +			 */
> +			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL &&
> +			    ELF_ST_BIND(syms[k].st_info) != STB_LOCAL)
>  				continue;
>  
>  			if (strcmp(strtab + syms[k].st_name, name) != 0)
> @@ -979,7 +1128,7 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
>  	 * Returns the address where symbol will finally be loaded after
>  	 * kexec_load_segment()
>  	 */
> -	return (void *)(sechdr->sh_addr + sym->st_value);
> +	return (void *)(sechdr->sh_addr + sym_value_offset(pi, sym));
>  }
>  
>  /*
> @@ -1013,7 +1162,7 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
>  	}
>  
>  	sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
> -					sym->st_value;
> +					sym_value_offset(pi, sym);
>  
>  	if (get_value)
>  		memcpy((void *)buf, sym_buf, size);
> diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
> index 4cef7e4706b0..c253b00f88d0 100644
> --- a/kernel/kexec_internal.h
> +++ b/kernel/kexec_internal.h
> @@ -20,6 +20,32 @@ struct kexec_sha_region {
>  	unsigned long len;
>  };
>  
> +#ifdef CONFIG_HAVE_KEXEC_FILE_PIE_PURGATORY
> +#define PURGATORY_ELF_TYPE ET_EXEC
> +
> +/*
> + * In position-independent executables, the symbol value is an absolute address,
> + * so convert it to a section-relative offset.
> + */
> +static inline Elf_Addr sym_value_offset(struct purgatory_info *pi, Elf_Sym *sym)
> +{
> +	const Elf_Shdr *sechdrs_c = (const void *) pi->ehdr + pi->ehdr->e_shoff;
> +
> +	return sym->st_value - sechdrs_c[sym->st_shndx].sh_addr;
> +}
> +#else
> +#define PURGATORY_ELF_TYPE ET_REL
> +
> +/*
> + * In a relocatable object, the symbol value already is a section-relative
> + * offset.
> + */
> +static inline Elf_Addr sym_value_offset(struct purgatory_info *pi, Elf_Sym *sym)
> +{
> +	return sym->st_value;
> +}
> +#endif
> +
>  void kimage_file_post_load_cleanup(struct kimage *image);
>  #else /* CONFIG_KEXEC_FILE */
>  static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
> -- 
> 2.7.4
> 
> 
> 
> _______________________________________________
> kexec mailing list
> kexec at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec



More information about the kexec mailing list