[RFC V2] mm: Enable generic pfn_valid() to handle early sections with memmap holes

David Hildenbrand david at redhat.com
Tue May 25 03:04:40 PDT 2021


On 25.05.21 12:03, Mike Rapoport wrote:
> On Tue, May 25, 2021 at 03:22:53PM +0530, Anshuman Khandual wrote:
>>
>>
>> On 5/25/21 12:02 PM, Mike Rapoport wrote:
>>> On Tue, May 25, 2021 at 11:30:15AM +0530, Anshuman Khandual wrote:
>>>>
>>>> On 5/24/21 12:22 PM, Mike Rapoport wrote:
>>>>> Hello Anshuman,
>>>>>
>>>>> On Mon, May 24, 2021 at 10:28:32AM +0530, Anshuman Khandual wrote:
>>>>>>
>>>>>> On 4/22/21 1:20 PM, Anshuman Khandual wrote:
>>>>>>> Platforms like arm and arm64 have redefined pfn_valid() because their early
>>>>>>> memory sections might have contained memmap holes after freeing parts of it
>>>>>>> during boot, which should be skipped while validating a pfn for struct page
>>>>>>> backing. This scenario on certain platforms where memmap is not continuous,
>>>>>>> could be captured with a new option CONFIG_HAVE_EARLY_SECTION_MEMMAP_HOLES.
>>>>>>> Then the generic pfn_valid() can be improved to accommodate such platforms.
>>>>>>> This reduces overall code footprint and also improves maintainability.
>>>>>>>
>>>>>>> free_unused_memmap() and pfn_to_online_page() have been updated to include
>>>>>>> such cases. This also exports memblock_is_memory() for all drivers that use
>>>>>>> pfn_valid() but lack required visibility. After the new config is in place,
>>>>>>> drop CONFIG_HAVE_ARCH_PFN_VALID from arm64 platforms.
>>>>>>>
>>>>>>> Cc: Catalin Marinas <catalin.marinas at arm.com>
>>>>>>> Cc: Will Deacon <will at kernel.org>
>>>>>>> Cc: Andrew Morton <akpm at linux-foundation.org>
>>>>>>> Cc: Mike Rapoport <rppt at kernel.org>
>>>>>>> Cc: David Hildenbrand <david at redhat.com>
>>>>>>> Cc: linux-arm-kernel at lists.infradead.org
>>>>>>> Cc: linux-kernel at vger.kernel.org
>>>>>>> Cc: linux-mm at kvack.org
>>>>>>> Suggested-by: David Hildenbrand <david at redhat.com>
>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual at arm.com>
>>>>>>> ---
>>>>>>> This patch applies on the latest mainline kernel after Mike's series
>>>>>>> regarding arm64 based pfn_valid().
>>>>>>>
>>>>>>> https://lore.kernel.org/linux-mm/20210422061902.21614-1-rppt@kernel.org/T/#t
>>>>>>>
>>>>>>> Changes in RFC V2:
>>>>>>>
>>>>>>> - Dropped support for arm (32 bit)
>>>>>>> - Replaced memblock_is_map_memory() check with memblock_is_memory()
>>>>>>> - MEMBLOCK_NOMAP memory are no longer skipped for pfn_valid()
>>>>>>> - Updated pfn_to_online_page() per David
>>>>>>> - Updated free_unused_memmap() to preserve existing semantics per Mike
>>>>>>> - Exported memblock_is_memory() instead of memblock_is_map_memory()
>>>>>>>
>>>>>>> Changes in RFC V1:
>>>>>>>
>>>>>>> - https://patchwork.kernel.org/project/linux-mm/patch/1615174073-10520-1-git-send-email-anshuman.khandual@arm.com/
>>>>>>>
>>>>>>>   arch/arm64/Kconfig            |  2 +-
>>>>>>>   arch/arm64/include/asm/page.h |  1 -
>>>>>>>   arch/arm64/mm/init.c          | 41 -----------------------------------
>>>>>>>   include/linux/mmzone.h        | 18 ++++++++++++++-
>>>>>>>   mm/Kconfig                    |  9 ++++++++
>>>>>>>   mm/memblock.c                 |  8 +++++--
>>>>>>>   mm/memory_hotplug.c           |  5 +++++
>>>>>>>   7 files changed, 38 insertions(+), 46 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>>>>>> index b4a9b493ce72..4cdc3570ffa9 100644
>>>>>>> --- a/arch/arm64/Kconfig
>>>>>>> +++ b/arch/arm64/Kconfig
>>>>>>> @@ -144,7 +144,6 @@ config ARM64
>>>>>>>   	select HAVE_ARCH_KGDB
>>>>>>>   	select HAVE_ARCH_MMAP_RND_BITS
>>>>>>>   	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
>>>>>>> -	select HAVE_ARCH_PFN_VALID
>>>>>>>   	select HAVE_ARCH_PREL32_RELOCATIONS
>>>>>>>   	select HAVE_ARCH_SECCOMP_FILTER
>>>>>>>   	select HAVE_ARCH_STACKLEAK
>>>>>>> @@ -167,6 +166,7 @@ config ARM64
>>>>>>>   		if $(cc-option,-fpatchable-function-entry=2)
>>>>>>>   	select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
>>>>>>>   		if DYNAMIC_FTRACE_WITH_REGS
>>>>>>> +	select HAVE_EARLY_SECTION_MEMMAP_HOLES
>>>>>>>   	select HAVE_EFFICIENT_UNALIGNED_ACCESS
>>>>>>>   	select HAVE_FAST_GUP
>>>>>>>   	select HAVE_FTRACE_MCOUNT_RECORD
>>>>>>> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
>>>>>>> index 75ddfe671393..fcbef3eec4b2 100644
>>>>>>> --- a/arch/arm64/include/asm/page.h
>>>>>>> +++ b/arch/arm64/include/asm/page.h
>>>>>>> @@ -37,7 +37,6 @@ void copy_highpage(struct page *to, struct page *from);
>>>>>>>   
>>>>>>>   typedef struct page *pgtable_t;
>>>>>>>   
>>>>>>> -int pfn_valid(unsigned long pfn);
>>>>>>>   int pfn_is_map_memory(unsigned long pfn);
>>>>>>>   
>>>>>>>   #include <asm/memory.h>
>>>>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>>>>> index f431b38d0837..5731a11550d8 100644
>>>>>>> --- a/arch/arm64/mm/init.c
>>>>>>> +++ b/arch/arm64/mm/init.c
>>>>>>> @@ -217,47 +217,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
>>>>>>>   	free_area_init(max_zone_pfns);
>>>>>>>   }
>>>>>>>   
>>>>>>> -int pfn_valid(unsigned long pfn)
>>>>>>> -{
>>>>>>> -	phys_addr_t addr = PFN_PHYS(pfn);
>>>>>>> -
>>>>>>> -	/*
>>>>>>> -	 * Ensure the upper PAGE_SHIFT bits are clear in the
>>>>>>> -	 * pfn. Else it might lead to false positives when
>>>>>>> -	 * some of the upper bits are set, but the lower bits
>>>>>>> -	 * match a valid pfn.
>>>>>>> -	 */
>>>>>>> -	if (PHYS_PFN(addr) != pfn)
>>>>>>> -		return 0;
>>>>>>> -
>>>>>>> -#ifdef CONFIG_SPARSEMEM
>>>>>>> -{
>>>>>>> -	struct mem_section *ms;
>>>>>>> -
>>>>>>> -	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
>>>>>>> -		return 0;
>>>>>>> -
>>>>>>> -	ms = __pfn_to_section(pfn);
>>>>>>> -	if (!valid_section(ms))
>>>>>>> -		return 0;
>>>>>>> -
>>>>>>> -	/*
>>>>>>> -	 * ZONE_DEVICE memory does not have the memblock entries.
>>>>>>> -	 * memblock_is_memory() check for ZONE_DEVICE based
>>>>>>> -	 * addresses will always fail. Even the normal hotplugged
>>>>>>> -	 * memory will never have MEMBLOCK_NOMAP flag set in their
>>>>>>> -	 * memblock entries. Skip memblock search for all non early
>>>>>>> -	 * memory sections covering all of hotplug memory including
>>>>>>> -	 * both normal and ZONE_DEVICE based.
>>>>>>> -	 */
>>>>>>> -	if (!early_section(ms))
>>>>>>> -		return pfn_section_valid(ms, pfn);
>>>>>>> -}
>>>>>>> -#endif
>>>>>>> -	return memblock_is_memory(addr);
>>>>>>> -}
>>>>>>> -EXPORT_SYMBOL(pfn_valid);
>>>>>>> -
>>>>>>>   int pfn_is_map_memory(unsigned long pfn)
>>>>>>>   {
>>>>>>>   	phys_addr_t addr = PFN_PHYS(pfn);
>>>>>>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>>>>>>> index 961f0eeefb62..18bf71665211 100644
>>>>>>> --- a/include/linux/mmzone.h
>>>>>>> +++ b/include/linux/mmzone.h
>>>>>>> @@ -1421,10 +1421,22 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
>>>>>>>    *
>>>>>>>    * Return: 1 for PFNs that have memory map entries and 0 otherwise
>>>>>>>    */
>>>>>>> +bool memblock_is_memory(phys_addr_t addr);
>>>>>>> +
>>>>>>>   static inline int pfn_valid(unsigned long pfn)
>>>>>>>   {
>>>>>>> +	phys_addr_t addr = PFN_PHYS(pfn);
>>>>>>>   	struct mem_section *ms;
>>>>>>>   
>>>>>>> +	/*
>>>>>>> +	 * Ensure the upper PAGE_SHIFT bits are clear in the
>>>>>>> +	 * pfn. Else it might lead to false positives when
>>>>>>> +	 * some of the upper bits are set, but the lower bits
>>>>>>> +	 * match a valid pfn.
>>>>>>> +	 */
>>>>>>> +	if (PHYS_PFN(addr) != pfn)
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>>   	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
>>>>>>>   		return 0;
>>>>>>>   	ms = __nr_to_section(pfn_to_section_nr(pfn));
>>>>>>> @@ -1434,7 +1446,11 @@ static inline int pfn_valid(unsigned long pfn)
>>>>>>>   	 * Traditionally early sections always returned pfn_valid() for
>>>>>>>   	 * the entire section-sized span.
>>>>>>>   	 */
>>>>>>> -	return early_section(ms) || pfn_section_valid(ms, pfn);
>>>>>>> +	if (early_section(ms))
>>>>>>> +		return IS_ENABLED(CONFIG_HAVE_EARLY_SECTION_MEMMAP_HOLES) ?
>>>>>>> +			memblock_is_memory(pfn << PAGE_SHIFT) : 1;
>>>>>>> +
>>>>>>> +	return pfn_section_valid(ms, pfn);
>>>>>>>   }
>>>>>>>   #endif
>>>>>>
>>>>>> Hello David/Mike,
>>>>>>
>>>>>> Now that pfn_is_map_memory() usage has been decoupled from pfn_valid() and
>>>>>> SPARSEMEM_VMEMMAP is only available memory model on arm64, wondering if we
>>>>>> still need this HAVE_EARLY_SECTION_MEMMAP_HOLES proposal ? Please do kindly
>>>>>> suggest. Thank you.
>>>>>
>>>>> Even now arm64 still frees parts of the memory map and pfn_valid() should
>>>>> be able to tell if a part of a section is freed or not.
>>>>>
>>>>> For instance for the following memory configuration
>>>>>      
>>>>>          |<----section---->|<----hole---->|<----section---->|
>>>>>          +--------+--------+--------------+--------+--------+
>>>>>          | bank 0 | unused |              | bank 1 | unused |
>>>>>          +--------+--------+--------------+--------+--------+
>>>>>
>>>>> the memory map corresponding to the "unused" areas is freed, but the generic
>>>>> pfn_valid() will still return 1 there.
>>>>
>>>> But is not free_unused_memmap() return early when CONFIG_SPARSEMEM_VMEMMAP
>>>> is enabled, which is the only option now on arm64. Then how can memmap have
>>>> holes (from unused areas) anymore ? Am I missing something here.
>>>   
>>> Ah, you are right, I missed this detail myself :)
>>>
>>> With CONFIG_SPARSEMEM_VMEMMAP as the only memory model for arm64, we can
>>> simply rid of arm64::pfn_valid() without any changes to the generic
>>> version.
>>
>> Though just moved the pfn bits sanity check into generic pfn_valid().
>> I hope this looks okay.
>>
>>  From 7a63f460bcb6ae171c2081bfad81edd9e8f3b7a0 Mon Sep 17 00:00:00 2001
>> From: Anshuman Khandual <anshuman.khandual at arm.com>
>> Date: Tue, 25 May 2021 10:27:09 +0100
>> Subject: [PATCH] arm64/mm: Drop HAVE_ARCH_PFN_VALID
>>
>> CONFIG_SPARSEMEM_VMEMMAP is now the only available memory model on arm64
>> platforms and free_unused_memmap() would just return without creating any
>> holes in the memmap mapping. There is no need for any special handling in
>> pfn_valid() and HAVE_ARCH_PFN_VALID can just be dropped. This also moves
>> the pfn upper bits sanity check into generic pfn_valid().
>>
>> Signed-off-by: Anshuman Khandual <anshuman.khandual at arm.com>
> 
> Acked-by: Mike Rapoport <rppt at linux.ibm.com>
> 

Indeed, looks good

Acked-by: David Hildenbrand <david at redhat.com>

>> ---
>>   arch/arm64/Kconfig            |  1 -
>>   arch/arm64/include/asm/page.h |  1 -
>>   arch/arm64/mm/init.c          | 37 -----------------------------------
>>   include/linux/mmzone.h        |  9 +++++++++
>>   4 files changed, 9 insertions(+), 39 deletions(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index d7dc8698cf8e..7904728befcc 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -154,7 +154,6 @@ config ARM64
>>   	select HAVE_ARCH_KGDB
>>   	select HAVE_ARCH_MMAP_RND_BITS
>>   	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
>> -	select HAVE_ARCH_PFN_VALID
>>   	select HAVE_ARCH_PREL32_RELOCATIONS
>>   	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
>>   	select HAVE_ARCH_SECCOMP_FILTER
>> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
>> index 75ddfe671393..fcbef3eec4b2 100644
>> --- a/arch/arm64/include/asm/page.h
>> +++ b/arch/arm64/include/asm/page.h
>> @@ -37,7 +37,6 @@ void copy_highpage(struct page *to, struct page *from);
>>   
>>   typedef struct page *pgtable_t;
>>   
>> -int pfn_valid(unsigned long pfn);
>>   int pfn_is_map_memory(unsigned long pfn);
>>   
>>   #include <asm/memory.h>
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 725aa84f2faa..49019ea0c8a8 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -219,43 +219,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
>>   	free_area_init(max_zone_pfns);
>>   }
>>   
>> -int pfn_valid(unsigned long pfn)
>> -{
>> -	phys_addr_t addr = PFN_PHYS(pfn);
>> -	struct mem_section *ms;
>> -
>> -	/*
>> -	 * Ensure the upper PAGE_SHIFT bits are clear in the
>> -	 * pfn. Else it might lead to false positives when
>> -	 * some of the upper bits are set, but the lower bits
>> -	 * match a valid pfn.
>> -	 */
>> -	if (PHYS_PFN(addr) != pfn)
>> -		return 0;
>> -
>> -	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
>> -		return 0;
>> -
>> -	ms = __pfn_to_section(pfn);
>> -	if (!valid_section(ms))
>> -		return 0;
>> -
>> -	/*
>> -	 * ZONE_DEVICE memory does not have the memblock entries.
>> -	 * memblock_is_map_memory() check for ZONE_DEVICE based
>> -	 * addresses will always fail. Even the normal hotplugged
>> -	 * memory will never have MEMBLOCK_NOMAP flag set in their
>> -	 * memblock entries. Skip memblock search for all non early
>> -	 * memory sections covering all of hotplug memory including
>> -	 * both normal and ZONE_DEVICE based.
>> -	 */
>> -	if (!early_section(ms))
>> -		return pfn_section_valid(ms, pfn);
>> -
>> -	return memblock_is_memory(addr);
>> -}
>> -EXPORT_SYMBOL(pfn_valid);
>> -
>>   int pfn_is_map_memory(unsigned long pfn)
>>   {
>>   	phys_addr_t addr = PFN_PHYS(pfn);
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index a9b263d4cf9d..d0c4fc506fa3 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -1443,6 +1443,15 @@ static inline int pfn_valid(unsigned long pfn)
>>   {
>>   	struct mem_section *ms;
>>   
>> +	/*
>> +	 * Ensure the upper PAGE_SHIFT bits are clear in the
>> +	 * pfn. Else it might lead to false positives when
>> +	 * some of the upper bits are set, but the lower bits
>> +	 * match a valid pfn.
>> +	 */
>> +	if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
>> +		return 0;
>> +
>>   	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
>>   		return 0;
>>   	ms = __nr_to_section(pfn_to_section_nr(pfn));
>> -- 
>> 2.20.1
> 


-- 
Thanks,

David / dhildenb




More information about the linux-arm-kernel mailing list