[PATCH v5sub1 8/8] arm64: allow kernel Image to be loaded anywhere in physical memory

Fu Wei wefu at redhat.com
Mon Feb 1 08:28:07 PST 2016


Hi Mark

On 02/01/2016 10:50 PM, Mark Rutland wrote:
> On Mon, Feb 01, 2016 at 11:54:53AM +0100, Ard Biesheuvel wrote:
>> This relaxes the kernel Image placement requirements, so that it
>> may be placed at any 2 MB aligned offset in physical memory.
>>
>> This is accomplished by ignoring PHYS_OFFSET when installing
>> memblocks, and accounting for the apparent virtual offset of
>> the kernel Image. As a result, virtual address references
>> below PAGE_OFFSET are correctly mapped onto physical references
>> into the kernel Image regardless of where it sits in memory.
>>
>> Note that limiting memory using mem= is not unambiguous anymore after
>> this change, considering that the kernel may be at the top of physical
>> memory, and clipping from the bottom rather than the top will discard
>> any 32-bit DMA addressable memory first. To deal with this, the handling
>> of mem= is reimplemented to clip top down, but take special care not to
>> clip memory that covers the kernel image.
>>
>> Since mem= should not be considered a production feature, a panic notifier
>> handler is installed that dumps the memory limit at panic time if one was
>> set.
>
> Good idea!
>
> It would be great if we could follow up with a sizes.h update for SZ_4G,
> though that's only a nice-to-have, and in no way should block this.
>
> Other than that, this looks good. Thanks for putting this together!
>
> Reviewed-by: Mark Rutland <mark.rutland at arm.com>
>
> For the Documentation/arm64 parts we'll need to ask Fu Wei to update the
> zh_CN/ translation to match.

Great thanks for your info
Yes, I will working on it

>
> Mark.
>
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>>   Documentation/arm64/booting.txt         |  20 ++--
>>   arch/arm64/include/asm/boot.h           |   6 ++
>>   arch/arm64/include/asm/kernel-pgtable.h |  12 +++
>>   arch/arm64/include/asm/kvm_asm.h        |   2 +-
>>   arch/arm64/include/asm/memory.h         |  15 +--
>>   arch/arm64/kernel/head.S                |   6 +-
>>   arch/arm64/kernel/image.h               |  13 ++-
>>   arch/arm64/mm/init.c                    | 100 +++++++++++++++++++-
>>   arch/arm64/mm/mmu.c                     |   3 +
>>   9 files changed, 155 insertions(+), 22 deletions(-)
>>
>> diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
>> index 701d39d3171a..56d6d8b796db 100644
>> --- a/Documentation/arm64/booting.txt
>> +++ b/Documentation/arm64/booting.txt
>> @@ -109,7 +109,13 @@ Header notes:
>>   			1 - 4K
>>   			2 - 16K
>>   			3 - 64K
>> -  Bits 3-63:	Reserved.
>> +  Bit 3:	Kernel physical placement
>> +			0 - 2MB aligned base should be as close as possible
>> +			    to the base of DRAM, since memory below it is not
>> +			    accessible via the linear mapping
>> +			1 - 2MB aligned base may be anywhere in physical
>> +			    memory
>> +  Bits 4-63:	Reserved.
>>
>>   - When image_size is zero, a bootloader should attempt to keep as much
>>     memory as possible free for use by the kernel immediately after the
>> @@ -117,14 +123,14 @@ Header notes:
>>     depending on selected features, and is effectively unbound.
>>
>>   The Image must be placed text_offset bytes from a 2MB aligned base
>> -address near the start of usable system RAM and called there. Memory
>> -below that base address is currently unusable by Linux, and therefore it
>> -is strongly recommended that this location is the start of system RAM.
>> -The region between the 2 MB aligned base address and the start of the
>> -image has no special significance to the kernel, and may be used for
>> -other purposes.
>> +address anywhere in usable system RAM and called there. The region
>> +between the 2 MB aligned base address and the start of the image has no
>> +special significance to the kernel, and may be used for other purposes.
>>   At least image_size bytes from the start of the image must be free for
>>   use by the kernel.
>> +NOTE: versions prior to v4.6 cannot make use of memory below the
>> +physical offset of the Image so it is recommended that the Image be
>> +placed as close as possible to the start of system RAM.
>>
>>   Any memory described to the kernel (even that below the start of the
>>   image) which is not marked as reserved from the kernel (e.g., with a
>> diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
>> index 81151b67b26b..ebf2481889c3 100644
>> --- a/arch/arm64/include/asm/boot.h
>> +++ b/arch/arm64/include/asm/boot.h
>> @@ -11,4 +11,10 @@
>>   #define MIN_FDT_ALIGN		8
>>   #define MAX_FDT_SIZE		SZ_2M
>>
>> +/*
>> + * arm64 requires the kernel image to placed
>> + * TEXT_OFFSET bytes beyond a 2 MB aligned base
>> + */
>> +#define MIN_KIMG_ALIGN		SZ_2M
>> +
>>   #endif
>> diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
>> index a459714ee29e..5c6375d8528b 100644
>> --- a/arch/arm64/include/asm/kernel-pgtable.h
>> +++ b/arch/arm64/include/asm/kernel-pgtable.h
>> @@ -79,5 +79,17 @@
>>   #define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
>>   #endif
>>
>> +/*
>> + * To make optimal use of block mappings when laying out the linear
>> + * mapping, round down the base of physical memory to a size that can
>> + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
>> + * (64k granule), or a multiple that can be mapped using contiguous bits
>> + * in the page tables: 32 * PMD_SIZE (16k granule)
>> + */
>> +#ifdef CONFIG_ARM64_64K_PAGES
>> +#define ARM64_MEMSTART_ALIGN	SZ_512M
>> +#else
>> +#define ARM64_MEMSTART_ALIGN	SZ_1G
>> +#endif
>>
>>   #endif	/* __ASM_KERNEL_PGTABLE_H */
>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>> index f5aee6e764e6..054ac25e7c2e 100644
>> --- a/arch/arm64/include/asm/kvm_asm.h
>> +++ b/arch/arm64/include/asm/kvm_asm.h
>> @@ -26,7 +26,7 @@
>>   #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
>>   #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
>>
>> -#define kvm_ksym_ref(sym)		((void *)&sym - KIMAGE_VADDR + PAGE_OFFSET)
>> +#define kvm_ksym_ref(sym)		phys_to_virt((u64)&sym - kimage_voffset)
>>
>>   #ifndef __ASSEMBLY__
>>   struct kvm;
>> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
>> index 4388651d1f0d..61005e7dd6cb 100644
>> --- a/arch/arm64/include/asm/memory.h
>> +++ b/arch/arm64/include/asm/memory.h
>> @@ -88,10 +88,10 @@
>>   #define __virt_to_phys(x) ({						\
>>   	phys_addr_t __x = (phys_addr_t)(x);				\
>>   	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
>> -			     (__x - KIMAGE_VADDR + PHYS_OFFSET); })
>> +			     (__x - kimage_voffset); })
>>
>>   #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
>> -#define __phys_to_kimg(x)	((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR))
>> +#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
>>
>>   /*
>>    * Convert a page to/from a physical address
>> @@ -127,13 +127,14 @@ extern phys_addr_t		memstart_addr;
>>   /* PHYS_OFFSET - the physical address of the start of memory. */
>>   #define PHYS_OFFSET		({ memstart_addr; })
>>
>> +/* the offset between the kernel virtual and physical mappings */
>> +extern u64			kimage_voffset;
>> +
>>   /*
>> - * The maximum physical address that the linear direct mapping
>> - * of system RAM can cover. (PAGE_OFFSET can be interpreted as
>> - * a 2's complement signed quantity and negated to derive the
>> - * maximum size of the linear mapping.)
>> + * Allow all memory at the discovery stage. We will clip it later.
>>    */
>> -#define MAX_MEMBLOCK_ADDR	({ memstart_addr - PAGE_OFFSET - 1; })
>> +#define MIN_MEMBLOCK_ADDR	0
>> +#define MAX_MEMBLOCK_ADDR	U64_MAX
>>
>>   /*
>>    * PFNs are used to describe any physical page; this means
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 04d38a058b19..05b98289093e 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -428,7 +428,11 @@ __mmap_switched:
>>   	and	x4, x4, #~(THREAD_SIZE - 1)
>>   	msr	sp_el0, x4			// Save thread_info
>>   	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
>> -	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
>> +
>> +	ldr	x4, =KIMAGE_VADDR		// Save the offset between
>> +	sub	x4, x4, x24			// the kernel virtual and
>> +	str_l	x4, kimage_voffset, x5		// physical mappings
>> +
>>   	mov	x29, #0
>>   #ifdef CONFIG_KASAN
>>   	bl	kasan_early_init
>> diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
>> index 999633bd7294..c9c62cab25a4 100644
>> --- a/arch/arm64/kernel/image.h
>> +++ b/arch/arm64/kernel/image.h
>> @@ -42,15 +42,18 @@
>>   #endif
>>
>>   #ifdef CONFIG_CPU_BIG_ENDIAN
>> -#define __HEAD_FLAG_BE	1
>> +#define __HEAD_FLAG_BE		1
>>   #else
>> -#define __HEAD_FLAG_BE	0
>> +#define __HEAD_FLAG_BE		0
>>   #endif
>>
>> -#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
>> +#define __HEAD_FLAG_PAGE_SIZE	((PAGE_SHIFT - 10) / 2)
>>
>> -#define __HEAD_FLAGS	((__HEAD_FLAG_BE << 0) |	\
>> -			 (__HEAD_FLAG_PAGE_SIZE << 1))
>> +#define __HEAD_FLAG_PHYS_BASE	1
>> +
>> +#define __HEAD_FLAGS		((__HEAD_FLAG_BE << 0) |	\
>> +				 (__HEAD_FLAG_PAGE_SIZE << 1) |	\
>> +				 (__HEAD_FLAG_PHYS_BASE << 3))
>>
>>   /*
>>    * These will output as part of the Image header, which should be little-endian
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 1d627cd8121c..e8e853a1024c 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -35,8 +35,10 @@
>>   #include <linux/efi.h>
>>   #include <linux/swiotlb.h>
>>
>> +#include <asm/boot.h>
>>   #include <asm/fixmap.h>
>>   #include <asm/kasan.h>
>> +#include <asm/kernel-pgtable.h>
>>   #include <asm/memory.h>
>>   #include <asm/sections.h>
>>   #include <asm/setup.h>
>> @@ -158,9 +160,80 @@ static int __init early_mem(char *p)
>>   }
>>   early_param("mem", early_mem);
>>
>> +/*
>> + * clip_mem_range() - remove memblock memory between @min and @max until
>> + *                    we meet the limit in 'memory_limit'.
>> + */
>> +static void __init clip_mem_range(u64 min, u64 max)
>> +{
>> +	u64 mem_size, to_remove;
>> +	int i;
>> +
>> +again:
>> +	mem_size = memblock_phys_mem_size();
>> +	if (mem_size <= memory_limit || max <= min)
>> +		return;
>> +
>> +	to_remove = mem_size - memory_limit;
>> +
>> +	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
>> +		struct memblock_region *r = memblock.memory.regions + i;
>> +		u64 start = max(min, r->base);
>> +		u64 end = min(max, r->base + r->size);
>> +
>> +		if (start >= max || end <= min)
>> +			continue;
>> +
>> +		if (end > min) {
>> +			u64 size = min(to_remove, end - max(start, min));
>> +
>> +			memblock_remove(end - size, size);
>> +		} else {
>> +			memblock_remove(start, min(max - start, to_remove));
>> +		}
>> +		goto again;
>> +	}
>> +}
>> +
>>   void __init arm64_memblock_init(void)
>>   {
>> -	memblock_enforce_memory_limit(memory_limit);
>> +	const s64 linear_region_size = -(s64)PAGE_OFFSET;
>> +
>> +	/*
>> +	 * Select a suitable value for the base of physical memory.
>> +	 */
>> +	memstart_addr = round_down(memblock_start_of_DRAM(),
>> +				   ARM64_MEMSTART_ALIGN);
>> +
>> +	/*
>> +	 * Remove the memory that we will not be able to cover with the
>> +	 * linear mapping. Take care not to clip the kernel which may be
>> +	 * high in memory.
>> +	 */
>> +	memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)),
>> +			ULLONG_MAX);
>> +	if (memblock_end_of_DRAM() > linear_region_size)
>> +		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
>> +
>> +	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
>> +		u64 kbase = round_down(__pa(_text), MIN_KIMG_ALIGN);
>> +		u64 kend = PAGE_ALIGN(__pa(_end));
>> +		u64 const sz_4g = 0x100000000UL;
>> +
>> +		/*
>> +		 * Clip memory in order of preference:
>> +		 * - above the kernel and above 4 GB
>> +		 * - between 4 GB and the start of the kernel (if the kernel
>> +		 *   is loaded high in memory)
>> +		 * - between the kernel and 4 GB (if the kernel is loaded
>> +		 *   low in memory)
>> +		 * - below 4 GB
>> +		 */
>> +		clip_mem_range(max(sz_4g, kend), ULLONG_MAX);
>> +		clip_mem_range(sz_4g, kbase);
>> +		clip_mem_range(kend, sz_4g);
>> +		clip_mem_range(0, min(kbase, sz_4g));
>> +	}
>>
>>   	/*
>>   	 * Register the kernel text, kernel data, initrd, and initial
>> @@ -381,3 +454,28 @@ static int __init keepinitrd_setup(char *__unused)
>>
>>   __setup("keepinitrd", keepinitrd_setup);
>>   #endif
>> +
>> +/*
>> + * Dump out memory limit information on panic.
>> + */
>> +static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
>> +{
>> +	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
>> +		pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
>> +	} else {
>> +		pr_emerg("Memory Limit: none\n");
>> +	}
>> +	return 0;
>> +}
>> +
>> +static struct notifier_block mem_limit_notifier = {
>> +	.notifier_call = dump_mem_limit,
>> +};
>> +
>> +static int __init register_mem_limit_dumper(void)
>> +{
>> +	atomic_notifier_chain_register(&panic_notifier_list,
>> +				       &mem_limit_notifier);
>> +	return 0;
>> +}
>> +__initcall(register_mem_limit_dumper);
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 4c4b15932963..8dda38378959 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -46,6 +46,9 @@
>>
>>   u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
>>
>> +u64 kimage_voffset __read_mostly;
>> +EXPORT_SYMBOL(kimage_voffset);
>> +
>>   /*
>>    * Empty_zero_page is a special page that is used for zero-initialized data
>>    * and COW.
>> --
>> 2.5.0
>>



More information about the linux-arm-kernel mailing list