[PATCH 2/2] mshv: Add kexec blocking support
Mukesh R
mrathor at linux.microsoft.com
Thu Feb 12 14:11:13 PST 2026
On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> Add kexec notifier to prevent kexec when VMs are active or memory
> is deposited. The notifier blocks kexec operations if:
> - Active VMs exist in the partition table
> - Pages are still deposited to the hypervisor
>
> The kernel cannot access hypervisor deposited pages: any access
> triggers a GPF. Until the deposited page state can be handed over
> to the next kernel, kexec must be blocked if there is any shared
> state between kernel and hypervisor.
>
> For L1 host virtualization, attempt to withdraw all deposited memory before
> allowing kexec to proceed. If withdrawal fails or pages remain deposited
> block the kexec operation.
>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii at linux.microsoft.com>
> ---
> drivers/hv/Makefile | 1 +
> drivers/hv/hv_proc.c | 4 ++
> drivers/hv/mshv_kexec.c | 66 ++++++++++++++++++++++++++++++++++++++++
> drivers/hv/mshv_root.h | 14 ++++++++
> drivers/hv/mshv_root_hv_call.c | 2 +
> drivers/hv/mshv_root_main.c | 7 ++++
> 6 files changed, 94 insertions(+)
> create mode 100644 drivers/hv/mshv_kexec.c
>
> diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> index a49f93c2d245..bb72be5cc525 100644
> --- a/drivers/hv/Makefile
> +++ b/drivers/hv/Makefile
> @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
> hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
> mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
> mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> +mshv_root-$(CONFIG_KEXEC) += mshv_kexec.o
> mshv_vtl-y := mshv_vtl_main.o
>
> # Code that must be built-in
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 89870c1b0087..39bbbedb0340 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -15,6 +15,8 @@
> */
> #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
>
> +atomic_t hv_pages_deposited;
> +
> /* Deposits exact number of pages. Must be called with interrupts enabled. */
> int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> {
> @@ -93,6 +95,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> goto err_free_allocations;
> }
>
> + atomic_add(page_count, &hv_pages_deposited);
> +
> ret = 0;
> goto free_buf;
>
> diff --git a/drivers/hv/mshv_kexec.c b/drivers/hv/mshv_kexec.c
> new file mode 100644
> index 000000000000..5222b2e4ff97
> --- /dev/null
> +++ b/drivers/hv/mshv_kexec.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026, Microsoft Corporation.
> + *
> + * Live update orchestration management for mshv_root module.
> + *
> + * Author: Stanislav Kinsburskii <skinsburskii at linux.microsoft.com>
> + */
> +
> +#include <linux/kexec.h>
> +#include <linux/notifier.h>
> +#include <asm/mshyperv.h>
> +#include "mshv_root.h"
> +
> +static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
> +
> +static int mshv_block_kexec_notify(struct notifier_block *nb,
> + unsigned long action, void *arg)
> +{
> + if (!hash_empty(mshv_root.pt_htable)) {
> + pr_warn("mshv: Cannot perform kexec while VMs are active\n");
> + return -EBUSY;
> + }
> +
> + if (hv_l1vh_partition()) {
> + int err;
> +
> + /* Attempt to withdraw all the deposited pages */
> + err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE,
> + hv_current_partition_id);
> + if (err) {
> + pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n",
> + err);
> + return err;
> + }
> + }
> +
> + if (atomic_read(&hv_pages_deposited)) {
> + pr_warn("mshv: Cannot perform kexec while pages are deposited\n");
> + return -EBUSY;
> + }
> + return 0;
> +}
> +
What guarantees another deposit won't happen after this. Are all cpus
"locked" in kexec path and not doing anything at this point?
Thanks,
-Mukesh
> +static struct notifier_block mshv_kexec_notifier = {
> + .notifier_call = mshv_block_kexec_notify,
> +};
> +
> +int __init mshv_kexec_init(void)
> +{
> + int err;
> +
> + err = kexec_block_notifier_register(&mshv_kexec_notifier);
> + if (err) {
> + pr_err("mshv: Could not register kexec notifier: %pe\n",
> + ERR_PTR(err));
> + return err;
> + }
> +
> + return 0;
> +}
> +
> +void __exit mshv_kexec_exit(void)
> +{
> + (void)kexec_block_notifier_unregister(&mshv_kexec_notifier);
> +}
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index 3c1d88b36741..311f76262d10 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -17,6 +17,7 @@
> #include <linux/build_bug.h>
> #include <linux/mmu_notifier.h>
> #include <uapi/linux/mshv.h>
> +#include <hyperv/hvhdk.h>
>
> /*
> * Hypervisor must be between these version numbers (inclusive)
> @@ -319,6 +320,7 @@ int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 a
> extern struct mshv_root mshv_root;
> extern enum hv_scheduler_type hv_scheduler_type;
> extern u8 * __percpu *hv_synic_eventring_tail;
> +extern atomic_t hv_pages_deposited;
>
> struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
> u64 uaddr, u32 flags);
> @@ -333,4 +335,16 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
> void mshv_region_movable_fini(struct mshv_mem_region *region);
> bool mshv_region_movable_init(struct mshv_mem_region *region);
>
> +#if IS_ENABLED(CONFIG_KEXEC)
> +int mshv_kexec_init(void);
> +void mshv_kexec_exit(void);
> +#else
> +static inline int mshv_kexec_init(void)
> +{
> + return 0;
> +}
> +
> +static inline void mshv_kexec_exit(void) { }
> +#endif
> +
> #endif /* _MSHV_ROOT_H_ */
> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index 06f2bac8039d..4203af5190ee 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -73,6 +73,8 @@ int hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
> for (i = 0; i < completed; i++)
> __free_page(pfn_to_page(output_page->gpa_page_list[i]));
>
> + atomic_sub(completed, &hv_pages_deposited);
> +
> if (!hv_result_success(status)) {
> if (hv_result(status) == HV_STATUS_NO_RESOURCES)
> status = HV_STATUS_SUCCESS;
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 5fc572e31cd7..d55aa69d130c 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -2330,6 +2330,10 @@ static int __init mshv_parent_partition_init(void)
> if (ret)
> goto deinit_root_scheduler;
>
> + ret = mshv_kexec_init();
> + if (ret)
> + goto deinit_irqfd_wq;
> +
> spin_lock_init(&mshv_root.pt_ht_lock);
> hash_init(mshv_root.pt_htable);
>
> @@ -2337,6 +2341,8 @@ static int __init mshv_parent_partition_init(void)
>
> return 0;
>
> +deinit_irqfd_wq:
> + mshv_irqfd_wq_cleanup();
> deinit_root_scheduler:
> root_scheduler_deinit();
> exit_partition:
> @@ -2356,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void)
> hv_setup_mshv_handler(NULL);
> mshv_port_table_fini();
> misc_deregister(&mshv_dev);
> + mshv_kexec_exit();
> mshv_irqfd_wq_cleanup();
> root_scheduler_deinit();
> if (hv_root_partition())
>
>
More information about the kexec
mailing list