[PATCH Makedumpfile 2/4] x86_64: translate all VA to PA using page table values
Pratyush Anand
panand at redhat.com
Tue Oct 25 08:12:02 PDT 2016
Hi Atsushi,
Thanks for the review.
On Tuesday 25 October 2016 02:50 PM, Atsushi Kumagai wrote:
> Hello Pratysh,
>
>> Currently we translate some of the VA areas using linear mapping while some
>> other(which can not be linearly mapped) using page table.
>>
>> However, we will have entry of a page in the page table irrespective of its
>> virtual region. So, we can always look into page table for any VA to PA
>> translation. This approach will solve lot of complexity in makedumpfile. It
>> will in turn remove dependency over variables like VMALLOC_START,
>> MODULES_VADDR etc whose definition keeps changing in newer kernel version.
>>
>> Moreover, I do not see any side effect of this approach in terms of
>> execution timing. I tested with IBM x3950 X6 machine having 4136359 MB of
>> memory. These are the results of makedumpfile execution time:
>>
>> Without this patch:
>> ===================
>> With -d 31:
>> Trial 1: 237.59526248 S
>> Trial 2: 235.236914962 S
>> Trail 3: 237.678712045 S
>>
>> With -d 1:
>> Trial 1: 2548.905296877 S
>> Trial 2: 2549.759881756 S
>>
>> With this patch:
>> ===================
>> With -d 31:
>> Trial 1: 232.713841516 S
>> Trial 2: 228.45697177 S
>> Trail 3: 232.942262441 S
>>
>> With -d 1:
>> Trial 1: 2768.424565806 S
>> Trial 2: 2749.622115455 S
>> Trail 3: 2537.770359073 S
>
> Could you increase the number of trials ?
OK, I can do that. Might take some time, as I will have to arrange that
high memory machine again.
> If the average time is close to the results of Trial 1 (2768s) and 2 (2749s),
> the regression rate is 8% and it sounds neither large nor small.
> If the average is a level of 2500s like Trial 3, it's ideal.
>
>> Signed-off-by: Pratyush Anand <panand at redhat.com>
>> ---
>> arch/x86_64.c | 42 ++++++++----------------------------------
>> makedumpfile.h | 4 ++--
>> 2 files changed, 10 insertions(+), 36 deletions(-)
>>
>> diff --git a/arch/x86_64.c b/arch/x86_64.c
>> index a96fd8ae00a1..fe2764a8bec2 100644
>> --- a/arch/x86_64.c
>> +++ b/arch/x86_64.c
>> @@ -203,6 +203,12 @@ vtop4_x86_64(unsigned long vaddr)
>> {
>> unsigned long page_dir, pml4, pgd_paddr, pgd_pte, pmd_paddr, pmd_pte;
>> unsigned long pte_paddr, pte;
>> + unsigned long phys_base;
>> +
>> + if (SYMBOL(phys_base) != NOT_FOUND_SYMBOL)
>> + phys_base = info->phys_base;
>> + else
>> + phys_base = 0;
>>
>> if (SYMBOL(init_level4_pgt) == NOT_FOUND_SYMBOL) {
>> ERRMSG("Can't get the symbol of init_level4_pgt.\n");
>> @@ -212,9 +218,9 @@ vtop4_x86_64(unsigned long vaddr)
>> /*
>> * Get PGD.
>> */
>> - page_dir = SYMBOL(init_level4_pgt);
>> + page_dir = SYMBOL(init_level4_pgt) - __START_KERNEL_map + phys_base;
>
> I want to confirm that this VA to PA translation is always safe,
> otherwise we should do the condition check which was done in
> vaddr_to_paddr_x86_64(), isn't it ?
>
I think this should be safe, however x86 expert can comment better.
Baoquan any comment here?
~Pratyush
>
> Thanks,
> Atsushi Kumagai
>
>> page_dir += pml4_index(vaddr) * sizeof(unsigned long);
>> - if (!readmem(VADDR, page_dir, &pml4, sizeof pml4)) {
>> + if (!readmem(PADDR, page_dir, &pml4, sizeof pml4)) {
>> ERRMSG("Can't get pml4 (page_dir:%lx).\n", page_dir);
>> return NOT_PADDR;
>> }
>> @@ -285,38 +291,6 @@ vtop4_x86_64(unsigned long vaddr)
>> return (pte & ENTRY_MASK) + PAGEOFFSET(vaddr);
>> }
>>
>> -unsigned long long
>> -vaddr_to_paddr_x86_64(unsigned long vaddr)
>> -{
>> - unsigned long phys_base;
>> - unsigned long long paddr;
>> -
>> - /*
>> - * Check the relocatable kernel.
>> - */
>> - if (SYMBOL(phys_base) != NOT_FOUND_SYMBOL)
>> - phys_base = info->phys_base;
>> - else
>> - phys_base = 0;
>> -
>> - if (is_vmalloc_addr_x86_64(vaddr)) {
>> - if ((paddr = vtop4_x86_64(vaddr)) == NOT_PADDR) {
>> - ERRMSG("Can't convert a virtual address(%lx) to " \
>> - "physical address.\n", vaddr);
>> - return NOT_PADDR;
>> - }
>> - } else if (vaddr >= __START_KERNEL_map) {
>> - paddr = vaddr - __START_KERNEL_map + phys_base;
>> -
>> - } else {
>> - if (is_xen_memory())
>> - paddr = vaddr - PAGE_OFFSET_XEN_DOM0;
>> - else
>> - paddr = vaddr - PAGE_OFFSET;
>> - }
>> - return paddr;
>> -}
>> -
>> /*
>> * for Xen extraction
>> */
>> diff --git a/makedumpfile.h b/makedumpfile.h
>> index a5955ff750e5..13559651feb6 100644
>> --- a/makedumpfile.h
>> +++ b/makedumpfile.h
>> @@ -863,12 +863,12 @@ int is_vmalloc_addr_x86_64(ulong vaddr);
>> int get_phys_base_x86_64(void);
>> int get_machdep_info_x86_64(void);
>> int get_versiondep_info_x86_64(void);
>> -unsigned long long vaddr_to_paddr_x86_64(unsigned long vaddr);
>> +unsigned long long vtop4_x86_64(unsigned long vaddr);
>> #define find_vmemmap() find_vmemmap_x86_64()
>> #define get_phys_base() get_phys_base_x86_64()
>> #define get_machdep_info() get_machdep_info_x86_64()
>> #define get_versiondep_info() get_versiondep_info_x86_64()
>> -#define vaddr_to_paddr(X) vaddr_to_paddr_x86_64(X)
>> +#define vaddr_to_paddr(X) vtop4_x86_64(X)
>> #define is_phys_addr(X) (!is_vmalloc_addr_x86_64(X))
>> #endif /* x86_64 */
>>
>> --
>> 2.7.4
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
More information about the kexec
mailing list