[Xen-devel] [PATCH 4/9] kexec: extend hypercall with improved load/unload ops

Don Slutz dslutz at verizon.com
Thu Nov 7 15:56:35 EST 2013


For what it is worth.

Reviewed-by: Don Slutz <dslutz at verizon.com>
     -Don Slutz

On 11/06/13 09:49, David Vrabel wrote:
> From: David Vrabel <david.vrabel at citrix.com>
>
> In the existing kexec hypercall, the load and unload ops depend on
> internals of the Linux kernel (the page list and code page provided by
> the kernel).  The code page is used to transition between Xen context
> and the image so using kernel code doesn't make sense and will not
> work for PVH guests.
>
> Add replacement KEXEC_CMD_kexec_load and KEXEC_CMD_kexec_unload ops
> that no longer require a code page to be provided by the guest -- Xen
> now provides the code for calling the image directly.
>
> The new load op looks similar to the Linux kexec_load system call and
> allows the guest to provide the image data to be loaded.  The guest
> specifies the architecture of the image which may be a 32-bit subarch
> of the hypervisor's architecture (i.e., an EM_386 image on an
> EM_X86_64 hypervisor).
>
> The toolstack can now load images without kernel involvement.  This is
> required for supporting kexec when using a dom0 with an upstream
> kernel.
>
> Crash images are copied directly into the crash region on load.
> Default images are copied into domheap pages and a list of source and
> destination machine addresses is created.  This is list is used in
> kexec_reloc() to relocate the image to its destination.
>
> The old load and unload sub-ops are still available (as
> KEXEC_CMD_load_v1 and KEXEC_CMD_unload_v1) and are implemented on top
> of the new infrastructure.
>
> Signed-off-by: David Vrabel <david.vrabel at citrix.com>
> Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
> ---
>   xen/arch/x86/machine_kexec.c        |  192 +++++++++++------
>   xen/arch/x86/x86_64/Makefile        |    2 +-
>   xen/arch/x86/x86_64/compat_kexec.S  |  187 ----------------
>   xen/arch/x86/x86_64/kexec_reloc.S   |  198 +++++++++++++++++
>   xen/common/kexec.c                  |  398 +++++++++++++++++++++++++++++------
>   xen/common/kimage.c                 |  122 +++++++++++-
>   xen/include/asm-x86/fixmap.h        |    3 -
>   xen/include/asm-x86/machine_kexec.h |   16 ++
>   xen/include/xen/kexec.h             |   16 +-
>   xen/include/xen/kimage.h            |    6 +
>   10 files changed, 804 insertions(+), 336 deletions(-)
>   delete mode 100644 xen/arch/x86/x86_64/compat_kexec.S
>   create mode 100644 xen/arch/x86/x86_64/kexec_reloc.S
>   create mode 100644 xen/include/asm-x86/machine_kexec.h
>
> diff --git a/xen/arch/x86/machine_kexec.c b/xen/arch/x86/machine_kexec.c
> index 68b9705..b70d5a6 100644
> --- a/xen/arch/x86/machine_kexec.c
> +++ b/xen/arch/x86/machine_kexec.c
> @@ -1,9 +1,18 @@
>   /******************************************************************************
>    * machine_kexec.c
>    *
> + * Copyright (C) 2013 Citrix Systems R&D Ltd.
> + *
> + * Portions derived from Linux's arch/x86/kernel/machine_kexec_64.c.
> + *
> + *   Copyright (C) 2002-2005 Eric Biederman  <ebiederm at xmission.com>
> + *
>    * Xen port written by:
>    * - Simon 'Horms' Horman <horms at verge.net.au>
>    * - Magnus Damm <magnus at valinux.co.jp>
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2.  See the file COPYING for more details.
>    */
>   
>   #include <xen/types.h>
> @@ -11,63 +20,124 @@
>   #include <xen/guest_access.h>
>   #include <asm/fixmap.h>
>   #include <asm/hpet.h>
> +#include <asm/page.h>
> +#include <asm/machine_kexec.h>
>   
> -typedef void (*relocate_new_kernel_t)(
> -                unsigned long indirection_page,
> -                unsigned long *page_list,
> -                unsigned long start_address,
> -                unsigned int preserve_context);
> -
> -int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
> +/*
> + * Add a mapping for a page to the page tables used during kexec.
> + */
> +int machine_kexec_add_page(struct kexec_image *image, unsigned long vaddr,
> +                           unsigned long maddr)
>   {
> -    unsigned long prev_ma = 0;
> -    int fix_base = FIX_KEXEC_BASE_0 + (slot * (KEXEC_XEN_NO_PAGES >> 1));
> -    int k;
> +    struct page_info *l4_page;
> +    struct page_info *l3_page;
> +    struct page_info *l2_page;
> +    struct page_info *l1_page;
> +    l4_pgentry_t *l4 = NULL;
> +    l3_pgentry_t *l3 = NULL;
> +    l2_pgentry_t *l2 = NULL;
> +    l1_pgentry_t *l1 = NULL;
> +    int ret = -ENOMEM;
> +
> +    l4_page = image->aux_page;
> +    if ( !l4_page )
> +    {
> +        l4_page = kimage_alloc_control_page(image, 0);
> +        if ( !l4_page )
> +            goto out;
> +        image->aux_page = l4_page;
> +    }
>   
> -    /* setup fixmap to point to our pages and record the virtual address
> -     * in every odd index in page_list[].
> -     */
> +    l4 = __map_domain_page(l4_page);
> +    l4 += l4_table_offset(vaddr);
> +    if ( !(l4e_get_flags(*l4) & _PAGE_PRESENT) )
> +    {
> +        l3_page = kimage_alloc_control_page(image, 0);
> +        if ( !l3_page )
> +            goto out;
> +        l4e_write(l4, l4e_from_page(l3_page, __PAGE_HYPERVISOR));
> +    }
> +    else
> +        l3_page = l4e_get_page(*l4);
> +
> +    l3 = __map_domain_page(l3_page);
> +    l3 += l3_table_offset(vaddr);
> +    if ( !(l3e_get_flags(*l3) & _PAGE_PRESENT) )
> +    {
> +        l2_page = kimage_alloc_control_page(image, 0);
> +        if ( !l2_page )
> +            goto out;
> +        l3e_write(l3, l3e_from_page(l2_page, __PAGE_HYPERVISOR));
> +    }
> +    else
> +        l2_page = l3e_get_page(*l3);
> +
> +    l2 = __map_domain_page(l2_page);
> +    l2 += l2_table_offset(vaddr);
> +    if ( !(l2e_get_flags(*l2) & _PAGE_PRESENT) )
> +    {
> +        l1_page = kimage_alloc_control_page(image, 0);
> +        if ( !l1_page )
> +            goto out;
> +        l2e_write(l2, l2e_from_page(l1_page, __PAGE_HYPERVISOR));
> +    }
> +    else
> +        l1_page = l2e_get_page(*l2);
> +
> +    l1 = __map_domain_page(l1_page);
> +    l1 += l1_table_offset(vaddr);
> +    l1e_write(l1, l1e_from_pfn(maddr >> PAGE_SHIFT, __PAGE_HYPERVISOR));
> +
> +    ret = 0;
> +out:
> +    if ( l1 )
> +        unmap_domain_page(l1);
> +    if ( l2 )
> +        unmap_domain_page(l2);
> +    if ( l3 )
> +        unmap_domain_page(l3);
> +    if ( l4 )
> +        unmap_domain_page(l4);
> +    return ret;
> +}
>   
> -    for ( k = 0; k < KEXEC_XEN_NO_PAGES; k++ )
> +int machine_kexec_load(struct kexec_image *image)
> +{
> +    void *code_page;
> +    int ret;
> +
> +    switch ( image->arch )
>       {
> -        if ( (k & 1) == 0 )
> -        {
> -            /* Even pages: machine address. */
> -            prev_ma = image->page_list[k];
> -        }
> -        else
> -        {
> -            /* Odd pages: va for previous ma. */
> -            if ( is_pv_32on64_domain(dom0) )
> -            {
> -                /*
> -                 * The compatability bounce code sets up a page table
> -                 * with a 1-1 mapping of the first 1G of memory so
> -                 * VA==PA here.
> -                 *
> -                 * This Linux purgatory code still sets up separate
> -                 * high and low mappings on the control page (entries
> -                 * 0 and 1) but it is harmless if they are equal since
> -                 * that PT is not live at the time.
> -                 */
> -                image->page_list[k] = prev_ma;
> -            }
> -            else
> -            {
> -                set_fixmap(fix_base + (k >> 1), prev_ma);
> -                image->page_list[k] = fix_to_virt(fix_base + (k >> 1));
> -            }
> -        }
> +    case EM_386:
> +    case EM_X86_64:
> +        break;
> +    default:
> +        return -EINVAL;
>       }
>   
> +    code_page = __map_domain_page(image->control_code_page);
> +    memcpy(code_page, kexec_reloc, kexec_reloc_size);
> +    unmap_domain_page(code_page);
> +
> +    /*
> +     * Add a mapping for the control code page to the same virtual
> +     * address as kexec_reloc.  This allows us to keep running after
> +     * these page tables are loaded in kexec_reloc.
> +     */
> +    ret = machine_kexec_add_page(image, (unsigned long)kexec_reloc,
> +                                 page_to_maddr(image->control_code_page));
> +    if ( ret < 0 )
> +        return ret;
> +
>       return 0;
>   }
>   
> -void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
> +void machine_kexec_unload(struct kexec_image *image)
>   {
> +    /* no-op. kimage_free() frees all control pages. */
>   }
>   
> -void machine_reboot_kexec(xen_kexec_image_t *image)
> +void machine_reboot_kexec(struct kexec_image *image)
>   {
>       BUG_ON(smp_processor_id() != 0);
>       smp_send_stop();
> @@ -75,13 +145,10 @@ void machine_reboot_kexec(xen_kexec_image_t *image)
>       BUG();
>   }
>   
> -void machine_kexec(xen_kexec_image_t *image)
> +void machine_kexec(struct kexec_image *image)
>   {
> -    struct desc_ptr gdt_desc = {
> -        .base = (unsigned long)(boot_cpu_gdt_table - FIRST_RESERVED_GDT_ENTRY),
> -        .limit = LAST_RESERVED_GDT_BYTE
> -    };
>       int i;
> +    unsigned long reloc_flags = 0;
>   
>       /* We are about to permenantly jump out of the Xen context into the kexec
>        * purgatory code.  We really dont want to be still servicing interupts.
> @@ -109,29 +176,12 @@ void machine_kexec(xen_kexec_image_t *image)
>        * not like running with NMIs disabled. */
>       enable_nmis();
>   
> -    /*
> -     * compat_machine_kexec() returns to idle pagetables, which requires us
> -     * to be running on a static GDT mapping (idle pagetables have no GDT
> -     * mappings in their per-domain mapping area).
> -     */
> -    asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
> +    if ( image->arch == EM_386 )
> +        reloc_flags |= KEXEC_RELOC_FLAG_COMPAT;
>   
> -    if ( is_pv_32on64_domain(dom0) )
> -    {
> -        compat_machine_kexec(image->page_list[1],
> -                             image->indirection_page,
> -                             image->page_list,
> -                             image->start_address);
> -    }
> -    else
> -    {
> -        relocate_new_kernel_t rnk;
> -
> -        rnk = (relocate_new_kernel_t) image->page_list[1];
> -        (*rnk)(image->indirection_page, image->page_list,
> -               image->start_address,
> -               0 /* preserve_context */);
> -    }
> +    kexec_reloc(page_to_maddr(image->control_code_page),
> +                page_to_maddr(image->aux_page),
> +                image->head, image->entry_maddr, reloc_flags);
>   }
>   
>   int machine_kexec_get(xen_kexec_range_t *range)
> diff --git a/xen/arch/x86/x86_64/Makefile b/xen/arch/x86/x86_64/Makefile
> index d56e12d..7f8fb3d 100644
> --- a/xen/arch/x86/x86_64/Makefile
> +++ b/xen/arch/x86/x86_64/Makefile
> @@ -11,11 +11,11 @@ obj-y += mmconf-fam10h.o
>   obj-y += mmconfig_64.o
>   obj-y += mmconfig-shared.o
>   obj-y += compat.o
> -obj-bin-y += compat_kexec.o
>   obj-y += domain.o
>   obj-y += physdev.o
>   obj-y += platform_hypercall.o
>   obj-y += cpu_idle.o
>   obj-y += cpufreq.o
> +obj-bin-y += kexec_reloc.o
>   
>   obj-$(crash_debug)   += gdbstub.o
> diff --git a/xen/arch/x86/x86_64/compat_kexec.S b/xen/arch/x86/x86_64/compat_kexec.S
> deleted file mode 100644
> index fc92af9..0000000
> --- a/xen/arch/x86/x86_64/compat_kexec.S
> +++ /dev/null
> @@ -1,187 +0,0 @@
> -/*
> - * Compatibility kexec handler.
> - */
> -
> -/*
> - * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
> - * currently true but if it ever changes then compat_pg_table will
> - * need to be moved back below 4G at run time.
> - */
> -
> -#include <xen/config.h>
> -
> -#include <asm/asm_defns.h>
> -#include <asm/msr.h>
> -#include <asm/page.h>
> -
> -/* The unrelocated physical address of a symbol. */
> -#define SYM_PHYS(sym)          ((sym) - __XEN_VIRT_START)
> -
> -/* Load physical address of symbol into register and relocate it. */
> -#define RELOCATE_SYM(sym,reg)  mov $SYM_PHYS(sym), reg ; \
> -                               add xen_phys_start(%rip), reg
> -
> -/*
> - * Relocate a physical address in memory. Size of temporary register
> - * determines size of the value to relocate.
> - */
> -#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
> -                               add xen_phys_start(%rip), reg ; \
> -                               mov reg, addr(%rip)
> -
> -        .text
> -
> -        .code64
> -
> -ENTRY(compat_machine_kexec)
> -        /* x86/64                        x86/32  */
> -        /* %rdi - relocate_new_kernel_t  CALL    */
> -        /* %rsi - indirection page       4(%esp) */
> -        /* %rdx - page_list              8(%esp) */
> -        /* %rcx - start address         12(%esp) */
> -        /*        cpu has pae           16(%esp) */
> -
> -        /* Shim the 64 bit page_list into a 32 bit page_list. */
> -        mov $12,%r9
> -        lea compat_page_list(%rip), %rbx
> -1:      dec %r9
> -        movl (%rdx,%r9,8),%eax
> -        movl %eax,(%rbx,%r9,4)
> -        test %r9,%r9
> -        jnz 1b
> -
> -        RELOCATE_SYM(compat_page_list,%rdx)
> -
> -        /* Relocate compatibility mode entry point address. */
> -        RELOCATE_MEM(compatibility_mode_far,%eax)
> -
> -        /* Relocate compat_pg_table. */
> -        RELOCATE_MEM(compat_pg_table,     %rax)
> -        RELOCATE_MEM(compat_pg_table+0x8, %rax)
> -        RELOCATE_MEM(compat_pg_table+0x10,%rax)
> -        RELOCATE_MEM(compat_pg_table+0x18,%rax)
> -
> -        /*
> -         * Setup an identity mapped region in PML4[0] of idle page
> -         * table.
> -         */
> -        RELOCATE_SYM(l3_identmap,%rax)
> -        or  $0x63,%rax
> -        mov %rax, idle_pg_table(%rip)
> -
> -        /* Switch to idle page table. */
> -        RELOCATE_SYM(idle_pg_table,%rax)
> -        movq %rax, %cr3
> -
> -        /* Switch to identity mapped compatibility stack. */
> -        RELOCATE_SYM(compat_stack,%rax)
> -        movq %rax, %rsp
> -
> -        /* Save xen_phys_start for 32 bit code. */
> -        movq xen_phys_start(%rip), %rbx
> -
> -        /* Jump to low identity mapping in compatibility mode. */
> -        ljmp *compatibility_mode_far(%rip)
> -        ud2
> -
> -compatibility_mode_far:
> -        .long SYM_PHYS(compatibility_mode)
> -        .long __HYPERVISOR_CS32
> -
> -        /*
> -         * We use 5 words of stack for the arguments passed to the kernel. The
> -         * kernel only uses 1 word before switching to its own stack. Allocate
> -         * 16 words to give "plenty" of room.
> -         */
> -        .fill 16,4,0
> -compat_stack:
> -
> -        .code32
> -
> -#undef RELOCATE_SYM
> -#undef RELOCATE_MEM
> -
> -/*
> - * Load physical address of symbol into register and relocate it. %rbx
> - * contains xen_phys_start(%rip) saved before jump to compatibility
> - * mode.
> - */
> -#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
> -                              add %ebx, reg
> -
> -compatibility_mode:
> -        /* Setup some sane segments. */
> -        movl $__HYPERVISOR_DS32, %eax
> -        movl %eax, %ds
> -        movl %eax, %es
> -        movl %eax, %fs
> -        movl %eax, %gs
> -        movl %eax, %ss
> -
> -        /* Push arguments onto stack. */
> -        pushl $0   /* 20(%esp) - preserve context */
> -        pushl $1   /* 16(%esp) - cpu has pae */
> -        pushl %ecx /* 12(%esp) - start address */
> -        pushl %edx /*  8(%esp) - page list */
> -        pushl %esi /*  4(%esp) - indirection page */
> -        pushl %edi /*  0(%esp) - CALL */
> -
> -        /* Disable paging and therefore leave 64 bit mode. */
> -        movl %cr0, %eax
> -        andl $~X86_CR0_PG, %eax
> -        movl %eax, %cr0
> -
> -        /* Switch to 32 bit page table. */
> -        RELOCATE_SYM(compat_pg_table, %eax)
> -        movl  %eax, %cr3
> -
> -        /* Clear MSR_EFER[LME], disabling long mode */
> -        movl    $MSR_EFER,%ecx
> -        rdmsr
> -        btcl    $_EFER_LME,%eax
> -        wrmsr
> -
> -        /* Re-enable paging, but only 32 bit mode now. */
> -        movl %cr0, %eax
> -        orl $X86_CR0_PG, %eax
> -        movl %eax, %cr0
> -        jmp 1f
> -1:
> -
> -        popl %eax
> -        call *%eax
> -        ud2
> -
> -        .data
> -        .align 4
> -compat_page_list:
> -        .fill 12,4,0
> -
> -        .align 32,0
> -
> -        /*
> -         * These compat page tables contain an identity mapping of the
> -         * first 4G of the physical address space.
> -         */
> -compat_pg_table:
> -        .long SYM_PHYS(compat_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
> -        .long SYM_PHYS(compat_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
> -        .long SYM_PHYS(compat_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
> -        .long SYM_PHYS(compat_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
> -
> -        .section .data.page_aligned, "aw", @progbits
> -        .align PAGE_SIZE,0
> -compat_pg_table_l2:
> -        .macro identmap from=0, count=512
> -        .if \count-1
> -        identmap "(\from+0)","(\count/2)"
> -        identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
> -        .else
> -        .quad 0x00000000000000e3 + \from
> -        .endif
> -        .endm
> -
> -        identmap 0x00000000
> -        identmap 0x40000000
> -        identmap 0x80000000
> -        identmap 0xc0000000
> diff --git a/xen/arch/x86/x86_64/kexec_reloc.S b/xen/arch/x86/x86_64/kexec_reloc.S
> new file mode 100644
> index 0000000..7a16c85
> --- /dev/null
> +++ b/xen/arch/x86/x86_64/kexec_reloc.S
> @@ -0,0 +1,198 @@
> +/*
> + * Relocate a kexec_image to its destination and call it.
> + *
> + * Copyright (C) 2013 Citrix Systems R&D Ltd.
> + *
> + * Portions derived from Linux's arch/x86/kernel/relocate_kernel_64.S.
> + *
> + *   Copyright (C) 2002-2005 Eric Biederman  <ebiederm at xmission.com>
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2.  See the file COPYING for more details.
> + */
> +#include <xen/config.h>
> +#include <xen/kimage.h>
> +
> +#include <asm/asm_defns.h>
> +#include <asm/msr.h>
> +#include <asm/page.h>
> +#include <asm/machine_kexec.h>
> +
> +        .text
> +        .align PAGE_SIZE
> +        .code64
> +
> +ENTRY(kexec_reloc)
> +        /* %rdi - code page maddr */
> +        /* %rsi - page table maddr */
> +        /* %rdx - indirection page maddr */
> +        /* %rcx - entry maddr (%rbp) */
> +        /* %r8 - flags */
> +
> +        movq    %rcx, %rbp
> +
> +        /* Setup stack. */
> +        leaq    (reloc_stack - kexec_reloc)(%rdi), %rsp
> +
> +        /* Load reloc page table. */
> +        movq    %rsi, %cr3
> +
> +        /* Jump to identity mapped code. */
> +        leaq    (identity_mapped - kexec_reloc)(%rdi), %rax
> +        jmpq    *%rax
> +
> +identity_mapped:
> +        /*
> +         * Set cr0 to a known state:
> +         *  - Paging enabled
> +         *  - Alignment check disabled
> +         *  - Write protect disabled
> +         *  - No task switch
> +         *  - Don't do FP software emulation.
> +         *  - Protected mode enabled
> +         */
> +        movq    %cr0, %rax
> +        andl    $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
> +        orl     $(X86_CR0_PG | X86_CR0_PE), %eax
> +        movq    %rax, %cr0
> +
> +        /*
> +         * Set cr4 to a known state:
> +         *  - physical address extension enabled
> +         */
> +        movl    $X86_CR4_PAE, %eax
> +        movq    %rax, %cr4
> +
> +        movq    %rdx, %rdi
> +        call    relocate_pages
> +
> +        /* Need to switch to 32-bit mode? */
> +        testq   $KEXEC_RELOC_FLAG_COMPAT, %r8
> +        jnz     call_32_bit
> +
> +call_64_bit:
> +        /* Call the image entry point.  This should never return. */
> +        callq   *%rbp
> +        ud2
> +
> +call_32_bit:
> +        /* Setup IDT. */
> +        lidt    compat_mode_idt(%rip)
> +
> +        /* Load compat GDT. */
> +        leaq    compat_mode_gdt(%rip), %rax
> +        movq    %rax, (compat_mode_gdt_desc + 2)(%rip)
> +        lgdt    compat_mode_gdt_desc(%rip)
> +
> +        /* Relocate compatibility mode entry point address. */
> +        leal    compatibility_mode(%rip), %eax
> +        movl    %eax, compatibility_mode_far(%rip)
> +
> +        /* Enter compatibility mode. */
> +        ljmp    *compatibility_mode_far(%rip)
> +
> +relocate_pages:
> +        /* %rdi - indirection page maddr */
> +        pushq   %rbx
> +
> +        cld
> +        movq    %rdi, %rbx
> +        xorl    %edi, %edi
> +        xorl    %esi, %esi
> +
> +next_entry: /* top, read another word for the indirection page */
> +
> +        movq    (%rbx), %rcx
> +        addq    $8, %rbx
> +is_dest:
> +        testb   $IND_DESTINATION, %cl
> +        jz      is_ind
> +        movq    %rcx, %rdi
> +        andq    $PAGE_MASK, %rdi
> +        jmp     next_entry
> +is_ind:
> +        testb   $IND_INDIRECTION, %cl
> +        jz      is_done
> +        movq    %rcx, %rbx
> +        andq    $PAGE_MASK, %rbx
> +        jmp     next_entry
> +is_done:
> +        testb   $IND_DONE, %cl
> +        jnz     done
> +is_source:
> +        testb   $IND_SOURCE, %cl
> +        jz      is_zero
> +        movq    %rcx, %rsi      /* For every source page do a copy */
> +        andq    $PAGE_MASK, %rsi
> +        movl    $(PAGE_SIZE / 8), %ecx
> +        rep movsq
> +        jmp     next_entry
> +is_zero:
> +        testb   $IND_ZERO, %cl
> +        jz      next_entry
> +        movl    $(PAGE_SIZE / 8), %ecx  /* Zero the destination page. */
> +        xorl    %eax, %eax
> +        rep stosq
> +        jmp     next_entry
> +done:
> +        popq    %rbx
> +        ret
> +
> +        .code32
> +
> +compatibility_mode:
> +        /* Setup some sane segments. */
> +        movl    $0x0008, %eax
> +        movl    %eax, %ds
> +        movl    %eax, %es
> +        movl    %eax, %fs
> +        movl    %eax, %gs
> +        movl    %eax, %ss
> +
> +        /* Disable paging and therefore leave 64 bit mode. */
> +        movl    %cr0, %eax
> +        andl    $~X86_CR0_PG, %eax
> +        movl    %eax, %cr0
> +
> +        /* Disable long mode */
> +        movl    $MSR_EFER, %ecx
> +        rdmsr
> +        andl    $~EFER_LME, %eax
> +        wrmsr
> +
> +        /* Clear cr4 to disable PAE. */
> +        xorl    %eax, %eax
> +        movl    %eax, %cr4
> +
> +        /* Call the image entry point.  This should never return. */
> +        call    *%ebp
> +        ud2
> +
> +        .align 4
> +compatibility_mode_far:
> +        .long 0x00000000             /* set in call_32_bit above */
> +        .word 0x0010
> +
> +compat_mode_gdt_desc:
> +        .word (3*8)-1
> +        .quad 0x0000000000000000     /* set in call_32_bit above */
> +
> +        .align 8
> +compat_mode_gdt:
> +        .quad 0x0000000000000000     /* null                              */
> +        .quad 0x00cf92000000ffff     /* 0x0008 ring 0 data                */
> +        .quad 0x00cf9a000000ffff     /* 0x0010 ring 0 code, compatibility */
> +
> +compat_mode_idt:
> +        .word 0                      /* limit */
> +        .long 0                      /* base */
> +
> +        /*
> +         * 16 words of stack are more than enough.
> +         */
> +        .fill 16,8,0
> +reloc_stack:
> +
> +        .globl kexec_reloc_size
> +kexec_reloc_size:
> +        .long . - kexec_reloc
> diff --git a/xen/common/kexec.c b/xen/common/kexec.c
> index 7b23df0..c5450ba 100644
> --- a/xen/common/kexec.c
> +++ b/xen/common/kexec.c
> @@ -25,6 +25,7 @@
>   #include <xen/version.h>
>   #include <xen/console.h>
>   #include <xen/kexec.h>
> +#include <xen/kimage.h>
>   #include <public/elfnote.h>
>   #include <xsm/xsm.h>
>   #include <xen/cpu.h>
> @@ -47,7 +48,7 @@ static Elf_Note *xen_crash_note;
>   
>   static cpumask_t crash_saved_cpus;
>   
> -static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
> +static struct kexec_image *kexec_image[KEXEC_IMAGE_NR];
>   
>   #define KEXEC_FLAG_DEFAULT_POS   (KEXEC_IMAGE_NR + 0)
>   #define KEXEC_FLAG_CRASH_POS     (KEXEC_IMAGE_NR + 1)
> @@ -55,8 +56,6 @@ static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
>   
>   static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
>   
> -static spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
> -
>   static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>   static size_t vmcoreinfo_size = 0;
>   
> @@ -311,14 +310,14 @@ void kexec_crash(void)
>       kexec_common_shutdown();
>       kexec_crash_save_cpu();
>       machine_crash_shutdown();
> -    machine_kexec(&kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
> +    machine_kexec(kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
>   
>       BUG();
>   }
>   
>   static long kexec_reboot(void *_image)
>   {
> -    xen_kexec_image_t *image = _image;
> +    struct kexec_image *image = _image;
>   
>       kexecing = TRUE;
>   
> @@ -734,63 +733,264 @@ static void crash_save_vmcoreinfo(void)
>   #endif
>   }
>   
> -static int kexec_load_unload_internal(unsigned long op, xen_kexec_load_v1_t *load)
> +static void kexec_unload_image(struct kexec_image *image)
>   {
> -    xen_kexec_image_t *image;
> +    if ( !image )
> +        return;
> +
> +    machine_kexec_unload(image);
> +    kimage_free(image);
> +}
> +
> +static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg)
> +{
> +    xen_kexec_exec_t exec;
> +    struct kexec_image *image;
> +    int base, bit, pos, ret = -EINVAL;
> +
> +    if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
> +        return -EFAULT;
> +
> +    if ( kexec_load_get_bits(exec.type, &base, &bit) )
> +        return -EINVAL;
> +
> +    pos = (test_bit(bit, &kexec_flags) != 0);
> +
> +    /* Only allow kexec/kdump into loaded images */
> +    if ( !test_bit(base + pos, &kexec_flags) )
> +        return -ENOENT;
> +
> +    switch (exec.type)
> +    {
> +    case KEXEC_TYPE_DEFAULT:
> +        image = kexec_image[base + pos];
> +        ret = continue_hypercall_on_cpu(0, kexec_reboot, image);
> +        break;
> +    case KEXEC_TYPE_CRASH:
> +        kexec_crash(); /* Does not return */
> +        break;
> +    }
> +
> +    return -EINVAL; /* never reached */
> +}
> +
> +static int kexec_swap_images(int type, struct kexec_image *new,
> +                             struct kexec_image **old)
> +{
> +    static DEFINE_SPINLOCK(kexec_lock);
>       int base, bit, pos;
> -    int ret = 0;
> +    int new_slot, old_slot;
> +
> +    *old = NULL;
> +
> +    spin_lock(&kexec_lock);
> +
> +    if ( test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
> +    {
> +        spin_unlock(&kexec_lock);
> +        return -EBUSY;
> +    }
>   
> -    if ( kexec_load_get_bits(load->type, &base, &bit) )
> +    if ( kexec_load_get_bits(type, &base, &bit) )
>           return -EINVAL;
>   
>       pos = (test_bit(bit, &kexec_flags) != 0);
> +    old_slot = base + pos;
> +    new_slot = base + !pos;
>   
> -    /* Load the user data into an unused image */
> -    if ( op == KEXEC_CMD_kexec_load )
> +    if ( new )
>       {
> -        image = &kexec_image[base + !pos];
> +        kexec_image[new_slot] = new;
> +        set_bit(new_slot, &kexec_flags);
> +    }
> +    change_bit(bit, &kexec_flags);
>   
> -        BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
> +    clear_bit(old_slot, &kexec_flags);
> +    *old = kexec_image[old_slot];
>   
> -        memcpy(image, &load->image, sizeof(*image));
> +    spin_unlock(&kexec_lock);
>   
> -        if ( !(ret = machine_kexec_load(load->type, base + !pos, image)) )
> -        {
> -            /* Set image present bit */
> -            set_bit((base + !pos), &kexec_flags);
> +    return 0;
> +}
>   
> -            /* Make new image the active one */
> -            change_bit(bit, &kexec_flags);
> -        }
> +static int kexec_load_slot(struct kexec_image *kimage)
> +{
> +    struct kexec_image *old_kimage;
> +    int ret = -ENOMEM;
> +
> +    ret = machine_kexec_load(kimage);
> +    if ( ret < 0 )
> +        return ret;
> +
> +    crash_save_vmcoreinfo();
> +
> +    ret = kexec_swap_images(kimage->type, kimage, &old_kimage);
> +    if ( ret < 0 )
> +        return ret;
> +
> +    kexec_unload_image(old_kimage);
> +
> +    return 0;
> +}
> +
> +static uint16_t kexec_load_v1_arch(void)
> +{
> +#ifdef CONFIG_X86
> +    return is_pv_32on64_domain(dom0) ? EM_386 : EM_X86_64;
> +#else
> +    return EM_NONE;
> +#endif
> +}
>   
> -        crash_save_vmcoreinfo();
> +static int kexec_segments_add_segment(
> +    unsigned int *nr_segments, xen_kexec_segment_t *segments,
> +    unsigned long mfn)
> +{
> +    paddr_t maddr = (paddr_t)mfn << PAGE_SHIFT;
> +    unsigned int n = *nr_segments;
> +
> +    /* Need a new segment? */
> +    if ( n == 0
> +         || segments[n-1].dest_maddr + segments[n-1].dest_size != maddr )
> +    {
> +        n++;
> +        if ( n > KEXEC_SEGMENT_MAX )
> +            return -EINVAL;
> +        *nr_segments = n;
> +
> +        set_xen_guest_handle(segments[n-1].buf.h, NULL);
> +        segments[n-1].buf_size = 0;
> +        segments[n-1].dest_maddr = maddr;
> +        segments[n-1].dest_size = 0;
>       }
>   
> -    /* Unload the old image if present and load successful */
> -    if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
> +    return 0;
> +}
> +
> +static int kexec_segments_from_ind_page(unsigned long mfn,
> +                                        unsigned int *nr_segments,
> +                                        xen_kexec_segment_t *segments,
> +                                        bool_t compat)
> +{
> +    void *page;
> +    kimage_entry_t *entry;
> +    int ret = 0;
> +
> +    page = map_domain_page(mfn);
> +
> +    /*
> +     * Walk the indirection page list, adding destination pages to the
> +     * segments.
> +     */
> +    for ( entry = page; ; )
>       {
> -        if ( test_and_clear_bit((base + pos), &kexec_flags) )
> +        unsigned long ind;
> +
> +        ind = kimage_entry_ind(entry, compat);
> +        mfn = kimage_entry_mfn(entry, compat);
> +
> +        switch ( ind )
>           {
> -            image = &kexec_image[base + pos];
> -            machine_kexec_unload(load->type, base + pos, image);
> +        case IND_DESTINATION:
> +            ret = kexec_segments_add_segment(nr_segments, segments, mfn);
> +            if ( ret < 0 )
> +                goto done;
> +            break;
> +        case IND_INDIRECTION:
> +            unmap_domain_page(page);
> +            entry = page = map_domain_page(mfn);
> +            continue;
> +        case IND_DONE:
> +            goto done;
> +        case IND_SOURCE:
> +            if ( *nr_segments == 0 )
> +            {
> +                ret = -EINVAL;
> +                goto done;
> +            }
> +            segments[*nr_segments-1].dest_size += PAGE_SIZE;
> +            break;
> +        default:
> +            ret = -EINVAL;
> +            goto done;
>           }
> +        entry = kimage_entry_next(entry, compat);
>       }
> +done:
> +    unmap_domain_page(page);
> +    return ret;
> +}
>   
> +static int kexec_do_load_v1(xen_kexec_load_v1_t *load, int compat)
> +{
> +    struct kexec_image *kimage = NULL;
> +    xen_kexec_segment_t *segments;
> +    uint16_t arch;
> +    unsigned int nr_segments = 0;
> +    unsigned long ind_mfn = load->image.indirection_page >> PAGE_SHIFT;
> +    int ret;
> +
> +    arch = kexec_load_v1_arch();
> +    if ( arch == EM_NONE )
> +        return -ENOSYS;
> +
> +    segments = xmalloc_array(xen_kexec_segment_t, KEXEC_SEGMENT_MAX);
> +    if ( segments == NULL )
> +        return -ENOMEM;
> +
> +    /*
> +     * Work out the image segments (destination only) from the
> +     * indirection pages.
> +     *
> +     * This is needed so we don't allocate pages that will overlap
> +     * with the destination when building the new set of indirection
> +     * pages below.
> +     */
> +    ret = kexec_segments_from_ind_page(ind_mfn, &nr_segments, segments, compat);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    ret = kimage_alloc(&kimage, load->type, arch, load->image.start_address,
> +                       nr_segments, segments);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    /*
> +     * Build a new set of indirection pages in the native format.
> +     *
> +     * This walks the guest provided indirection pages a second time.
> +     * The guest could have altered then, invalidating the segment
> +     * information constructed above.  This will only result in the
> +     * resulting image being potentially unrelocatable.
> +     */
> +    ret = kimage_build_ind(kimage, ind_mfn, compat);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    ret = kexec_load_slot(kimage);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    return 0;
> +
> +error:
> +    if ( !kimage )
> +        xfree(segments);
> +    kimage_free(kimage);
>       return ret;
>   }
>   
> -static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) uarg)
> +static int kexec_load_v1(XEN_GUEST_HANDLE_PARAM(void) uarg)
>   {
>       xen_kexec_load_v1_t load;
>   
>       if ( unlikely(copy_from_guest(&load, uarg, 1)) )
>           return -EFAULT;
>   
> -    return kexec_load_unload_internal(op, &load);
> +    return kexec_do_load_v1(&load, 0);
>   }
>   
> -static int kexec_load_unload_compat(unsigned long op,
> -                                    XEN_GUEST_HANDLE_PARAM(void) uarg)
> +static int kexec_load_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg)
>   {
>   #ifdef CONFIG_COMPAT
>       compat_kexec_load_v1_t compat_load;
> @@ -809,49 +1009,113 @@ static int kexec_load_unload_compat(unsigned long op,
>       load.type = compat_load.type;
>       XLAT_kexec_image(&load.image, &compat_load.image);
>   
> -    return kexec_load_unload_internal(op, &load);
> -#else /* CONFIG_COMPAT */
> +    return kexec_do_load_v1(&load, 1);
> +#else
>       return 0;
> -#endif /* CONFIG_COMPAT */
> +#endif
>   }
>   
> -static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg)
> +static int kexec_load(XEN_GUEST_HANDLE_PARAM(void) uarg)
>   {
> -    xen_kexec_exec_t exec;
> -    xen_kexec_image_t *image;
> -    int base, bit, pos, ret = -EINVAL;
> +    xen_kexec_load_t load;
> +    xen_kexec_segment_t *segments;
> +    struct kexec_image *kimage = NULL;
> +    int ret;
>   
> -    if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
> +    if ( copy_from_guest(&load, uarg, 1) )
>           return -EFAULT;
>   
> -    if ( kexec_load_get_bits(exec.type, &base, &bit) )
> +    if ( load.nr_segments >= KEXEC_SEGMENT_MAX )
>           return -EINVAL;
>   
> -    pos = (test_bit(bit, &kexec_flags) != 0);
> -
> -    /* Only allow kexec/kdump into loaded images */
> -    if ( !test_bit(base + pos, &kexec_flags) )
> -        return -ENOENT;
> +    segments = xmalloc_array(xen_kexec_segment_t, load.nr_segments);
> +    if ( segments == NULL )
> +        return -ENOMEM;
>   
> -    switch (exec.type)
> +    if ( copy_from_guest(segments, load.segments.h, load.nr_segments) )
>       {
> -    case KEXEC_TYPE_DEFAULT:
> -        image = &kexec_image[base + pos];
> -        ret = continue_hypercall_on_cpu(0, kexec_reboot, image);
> -        break;
> -    case KEXEC_TYPE_CRASH:
> -        kexec_crash(); /* Does not return */
> -        break;
> +        ret = -EFAULT;
> +        goto error;
>       }
>   
> -    return -EINVAL; /* never reached */
> +    ret = kimage_alloc(&kimage, load.type, load.arch, load.entry_maddr,
> +                       load.nr_segments, segments);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    ret = kimage_load_segments(kimage);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    ret = kexec_load_slot(kimage);
> +    if ( ret < 0 )
> +        goto error;
> +
> +    return 0;
> +
> +error:
> +    if ( ! kimage )
> +        xfree(segments);
> +    kimage_free(kimage);
> +    return ret;
> +}
> +
> +static int kexec_do_unload(xen_kexec_unload_t *unload)
> +{
> +    struct kexec_image *old_kimage;
> +    int ret;
> +
> +    ret = kexec_swap_images(unload->type, NULL, &old_kimage);
> +    if ( ret < 0 )
> +        return ret;
> +
> +    kexec_unload_image(old_kimage);
> +
> +    return 0;
> +}
> +
> +static int kexec_unload_v1(XEN_GUEST_HANDLE_PARAM(void) uarg)
> +{
> +    xen_kexec_load_v1_t load;
> +    xen_kexec_unload_t unload;
> +
> +    if ( copy_from_guest(&load, uarg, 1) )
> +        return -EFAULT;
> +
> +    unload.type = load.type;
> +    return kexec_do_unload(&unload);
> +}
> +
> +static int kexec_unload_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg)
> +{
> +#ifdef CONFIG_COMPAT
> +    compat_kexec_load_v1_t compat_load;
> +    xen_kexec_unload_t unload;
> +
> +    if ( copy_from_guest(&compat_load, uarg, 1) )
> +        return -EFAULT;
> +
> +    unload.type = compat_load.type;
> +    return kexec_do_unload(&unload);
> +#else
> +    return 0;
> +#endif
> +}
> +
> +static int kexec_unload(XEN_GUEST_HANDLE_PARAM(void) uarg)
> +{
> +    xen_kexec_unload_t unload;
> +
> +    if ( unlikely(copy_from_guest(&unload, uarg, 1)) )
> +        return -EFAULT;
> +
> +    return kexec_do_unload(&unload);
>   }
>   
>   static int do_kexec_op_internal(unsigned long op,
>                                   XEN_GUEST_HANDLE_PARAM(void) uarg,
>                                   bool_t compat)
>   {
> -    unsigned long flags;
>       int ret = -EINVAL;
>   
>       ret = xsm_kexec(XSM_PRIV);
> @@ -867,20 +1131,26 @@ static int do_kexec_op_internal(unsigned long op,
>                   ret = kexec_get_range(uarg);
>           break;
>       case KEXEC_CMD_kexec_load_v1:
> +        if ( compat )
> +            ret = kexec_load_v1_compat(uarg);
> +        else
> +            ret = kexec_load_v1(uarg);
> +        break;
>       case KEXEC_CMD_kexec_unload_v1:
> -        spin_lock_irqsave(&kexec_lock, flags);
> -        if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
> -        {
> -                if (compat)
> -                        ret = kexec_load_unload_compat(op, uarg);
> -                else
> -                        ret = kexec_load_unload(op, uarg);
> -        }
> -        spin_unlock_irqrestore(&kexec_lock, flags);
> +        if ( compat )
> +            ret = kexec_unload_v1_compat(uarg);
> +        else
> +            ret = kexec_unload_v1(uarg);
>           break;
>       case KEXEC_CMD_kexec:
>           ret = kexec_exec(uarg);
>           break;
> +    case KEXEC_CMD_kexec_load:
> +        ret = kexec_load(uarg);
> +        break;
> +    case KEXEC_CMD_kexec_unload:
> +        ret = kexec_unload(uarg);
> +        break;
>       }
>   
>       return ret;
> diff --git a/xen/common/kimage.c b/xen/common/kimage.c
> index 02ee37e..10fb785 100644
> --- a/xen/common/kimage.c
> +++ b/xen/common/kimage.c
> @@ -175,11 +175,20 @@ static int do_kimage_alloc(struct kexec_image **rimage, paddr_t entry,
>       image->control_code_page = kimage_alloc_control_page(image, MEMF_bits(32));
>       if ( !image->control_code_page )
>           goto out;
> +    result = machine_kexec_add_page(image,
> +                                    page_to_maddr(image->control_code_page),
> +                                    page_to_maddr(image->control_code_page));
> +    if ( result < 0 )
> +        goto out;
>   
>       /* Add an empty indirection page. */
>       image->entry_page = kimage_alloc_control_page(image, 0);
>       if ( !image->entry_page )
>           goto out;
> +    result = machine_kexec_add_page(image, page_to_maddr(image->entry_page),
> +                                    page_to_maddr(image->entry_page));
> +    if ( result < 0 )
> +        goto out;
>   
>       image->head = page_to_maddr(image->entry_page);
>   
> @@ -595,7 +604,7 @@ static struct page_info *kimage_alloc_page(struct kexec_image *image,
>           if ( addr == destination )
>           {
>               page_list_del(page, &image->dest_pages);
> -            return page;
> +            goto found;
>           }
>       }
>       page = NULL;
> @@ -647,6 +656,8 @@ static struct page_info *kimage_alloc_page(struct kexec_image *image,
>               page_list_add(page, &image->dest_pages);
>           }
>       }
> +found:
> +    machine_kexec_add_page(image, page_to_maddr(page), page_to_maddr(page));
>       return page;
>   }
>   
> @@ -753,6 +764,7 @@ static int kimage_load_crash_segment(struct kexec_image *image,
>   static int kimage_load_segment(struct kexec_image *image, xen_kexec_segment_t *segment)
>   {
>       int result = -ENOMEM;
> +    paddr_t addr;
>   
>       if ( !guest_handle_is_null(segment->buf.h) )
>       {
> @@ -767,6 +779,14 @@ static int kimage_load_segment(struct kexec_image *image, xen_kexec_segment_t *s
>           }
>       }
>   
> +    for ( addr = segment->dest_maddr & PAGE_MASK;
> +          addr < segment->dest_maddr + segment->dest_size; addr += PAGE_SIZE )
> +    {
> +        result = machine_kexec_add_page(image, addr, addr);
> +        if ( result < 0 )
> +            break;
> +    }
> +
>       return result;
>   }
>   
> @@ -810,6 +830,106 @@ int kimage_load_segments(struct kexec_image *image)
>       return 0;
>   }
>   
> +kimage_entry_t *kimage_entry_next(kimage_entry_t *entry, bool_t compat)
> +{
> +    if ( compat )
> +        return (kimage_entry_t *)((uint32_t *)entry + 1);
> +    return entry + 1;
> +}
> +
> +unsigned long kimage_entry_mfn(kimage_entry_t *entry, bool_t compat)
> +{
> +    if ( compat )
> +        return *(uint32_t *)entry >> PAGE_SHIFT;
> +    return *entry >> PAGE_SHIFT;
> +}
> +
> +unsigned long kimage_entry_ind(kimage_entry_t *entry, bool_t compat)
> +{
> +    if ( compat )
> +        return *(uint32_t *)entry & 0xf;
> +    return *entry & 0xf;
> +}
> +
> +int kimage_build_ind(struct kexec_image *image, unsigned long ind_mfn,
> +                     bool_t compat)
> +{
> +    void *page;
> +    kimage_entry_t *entry;
> +    int ret = 0;
> +    paddr_t dest = KIMAGE_NO_DEST;
> +
> +    page = map_domain_page(ind_mfn);
> +    if ( !page )
> +        return -ENOMEM;
> +
> +    /*
> +     * Walk the guest-supplied indirection pages, adding entries to
> +     * the image's indirection pages.
> +     */
> +    for ( entry = page; ;  )
> +    {
> +        unsigned long ind;
> +        unsigned long mfn;
> +
> +        ind = kimage_entry_ind(entry, compat);
> +        mfn = kimage_entry_mfn(entry, compat);
> +
> +        switch ( ind )
> +        {
> +        case IND_DESTINATION:
> +            dest = (paddr_t)mfn << PAGE_SHIFT;
> +            ret = kimage_set_destination(image, dest);
> +            if ( ret < 0 )
> +                goto done;
> +            break;
> +        case IND_INDIRECTION:
> +            unmap_domain_page(page);
> +            page = map_domain_page(mfn);
> +            entry = page;
> +            continue;
> +        case IND_DONE:
> +            kimage_terminate(image);
> +            goto done;
> +        case IND_SOURCE:
> +        {
> +            struct page_info *guest_page, *xen_page;
> +
> +            guest_page = mfn_to_page(mfn);
> +            if ( !get_page(guest_page, current->domain) )
> +            {
> +                ret = -EFAULT;
> +                goto done;
> +            }
> +
> +            xen_page = kimage_alloc_page(image, dest);
> +            if ( !xen_page )
> +            {
> +                put_page(guest_page);
> +                ret = -ENOMEM;
> +                goto done;
> +            }
> +
> +            copy_domain_page(page_to_mfn(xen_page), mfn);
> +            put_page(guest_page);
> +
> +            ret = kimage_add_page(image, page_to_maddr(xen_page));
> +            if ( ret < 0 )
> +                goto done;
> +            dest += PAGE_SIZE;
> +            break;
> +        }
> +        default:
> +            ret = -EINVAL;
> +            goto done;
> +        }
> +        entry = kimage_entry_next(entry, compat);
> +    }
> +done:
> +    unmap_domain_page(page);
> +    return ret;
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h
> index 8b4266d..48c5676 100644
> --- a/xen/include/asm-x86/fixmap.h
> +++ b/xen/include/asm-x86/fixmap.h
> @@ -56,9 +56,6 @@ enum fixed_addresses {
>       FIX_ACPI_BEGIN,
>       FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
>       FIX_HPET_BASE,
> -    FIX_KEXEC_BASE_0,
> -    FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \
> -      + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1,
>       FIX_TBOOT_SHARED_BASE,
>       FIX_MSIX_IO_RESERV_BASE,
>       FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
> diff --git a/xen/include/asm-x86/machine_kexec.h b/xen/include/asm-x86/machine_kexec.h
> new file mode 100644
> index 0000000..ba0d469
> --- /dev/null
> +++ b/xen/include/asm-x86/machine_kexec.h
> @@ -0,0 +1,16 @@
> +#ifndef __X86_MACHINE_KEXEC_H__
> +#define __X86_MACHINE_KEXEC_H__
> +
> +#define KEXEC_RELOC_FLAG_COMPAT 0x1 /* 32-bit image */
> +
> +#ifndef __ASSEMBLY__
> +
> +extern void kexec_reloc(unsigned long reloc_code, unsigned long reloc_pt,
> +                        unsigned long ind_maddr, unsigned long entry_maddr,
> +                        unsigned long flags);
> +
> +extern unsigned int kexec_reloc_size;
> +
> +#endif
> +
> +#endif /* __X86_MACHINE_KEXEC_H__ */
> diff --git a/xen/include/xen/kexec.h b/xen/include/xen/kexec.h
> index 1a5dda1..bd17747 100644
> --- a/xen/include/xen/kexec.h
> +++ b/xen/include/xen/kexec.h
> @@ -6,6 +6,7 @@
>   #include <public/kexec.h>
>   #include <asm/percpu.h>
>   #include <xen/elfcore.h>
> +#include <xen/kimage.h>
>   
>   typedef struct xen_kexec_reserve {
>       unsigned long size;
> @@ -40,11 +41,13 @@ extern enum low_crashinfo low_crashinfo_mode;
>   extern paddr_t crashinfo_maxaddr_bits;
>   void kexec_early_calculations(void);
>   
> -int machine_kexec_load(int type, int slot, xen_kexec_image_t *image);
> -void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image);
> +int machine_kexec_add_page(struct kexec_image *image, unsigned long vaddr,
> +                           unsigned long maddr);
> +int machine_kexec_load(struct kexec_image *image);
> +void machine_kexec_unload(struct kexec_image *image);
>   void machine_kexec_reserved(xen_kexec_reserve_t *reservation);
> -void machine_reboot_kexec(xen_kexec_image_t *image);
> -void machine_kexec(xen_kexec_image_t *image);
> +void machine_reboot_kexec(struct kexec_image *image);
> +void machine_kexec(struct kexec_image *image);
>   void kexec_crash(void);
>   void kexec_crash_save_cpu(void);
>   crash_xen_info_t *kexec_crash_save_info(void);
> @@ -52,11 +55,6 @@ void machine_crash_shutdown(void);
>   int machine_kexec_get(xen_kexec_range_t *range);
>   int machine_kexec_get_xen(xen_kexec_range_t *range);
>   
> -void compat_machine_kexec(unsigned long rnk,
> -                          unsigned long indirection_page,
> -                          unsigned long *page_list,
> -                          unsigned long start_address);
> -
>   /* vmcoreinfo stuff */
>   #define VMCOREINFO_BYTES           (4096)
>   #define VMCOREINFO_NOTE_NAME       "VMCOREINFO_XEN"
> diff --git a/xen/include/xen/kimage.h b/xen/include/xen/kimage.h
> index 0ebd37a..d10ebf7 100644
> --- a/xen/include/xen/kimage.h
> +++ b/xen/include/xen/kimage.h
> @@ -47,6 +47,12 @@ int kimage_load_segments(struct kexec_image *image);
>   struct page_info *kimage_alloc_control_page(struct kexec_image *image,
>                                               unsigned memflags);
>   
> +kimage_entry_t *kimage_entry_next(kimage_entry_t *entry, bool_t compat);
> +unsigned long kimage_entry_mfn(kimage_entry_t *entry, bool_t compat);
> +unsigned long kimage_entry_ind(kimage_entry_t *entry, bool_t compat);
> +int kimage_build_ind(struct kexec_image *image, unsigned long ind_mfn,
> +                     bool_t compat);
> +
>   #endif /* __ASSEMBLY__ */
>   
>   #endif /* __XEN_KIMAGE_H__ */




More information about the kexec mailing list