[PATCH v2 07/15] arm64: hyperv: Add support for mshv_vtl_return_call

Thu Apr 23 06:56:47 PDT 2026

On Thu, Apr 23, 2026 at 12:41:57PM +0000, Naman Jain wrote:
> Add the arm64 variant of mshv_vtl_return_call() to support the MSHV_VTL
> driver on arm64. This function enables the transition between Virtual
> Trust Levels (VTLs) in MSHV_VTL when the kernel acts as a paravisor.
> 
> Signed-off-by: Roman Kisel <romank at linux.microsoft.com>
> Reviewed-by: Roman Kisel <vdso at mailbox.org>
> Signed-off-by: Naman Jain <namjain at linux.microsoft.com>
> ---
>  arch/arm64/hyperv/Makefile        |   1 +
>  arch/arm64/hyperv/hv_vtl.c        | 158 ++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/mshyperv.h |  13 +++
>  arch/x86/include/asm/mshyperv.h   |   2 -
>  drivers/hv/mshv_vtl.h             |   3 +
>  include/asm-generic/mshyperv.h    |   2 +
>  6 files changed, 177 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arm64/hyperv/hv_vtl.c
> 
> diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile
> index 87c31c001da9..9701a837a6e1 100644
> --- a/arch/arm64/hyperv/Makefile
> +++ b/arch/arm64/hyperv/Makefile
> @@ -1,2 +1,3 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-y		:= hv_core.o mshyperv.o
> +obj-$(CONFIG_HYPERV_VTL_MODE)	+= hv_vtl.o
> diff --git a/arch/arm64/hyperv/hv_vtl.c b/arch/arm64/hyperv/hv_vtl.c
> new file mode 100644
> index 000000000000..59cbeb74e7b9
> --- /dev/null
> +++ b/arch/arm64/hyperv/hv_vtl.c
> @@ -0,0 +1,158 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Microsoft, Inc.
> + *
> + * Authors:
> + *     Roman Kisel <romank at linux.microsoft.com>
> + *     Naman Jain <namjain at linux.microsoft.com>
> + */
> +
> +#include <asm/mshyperv.h>
> +#include <asm/neon.h>
> +#include <linux/export.h>
> +
> +void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
> +{
> +	struct user_fpsimd_state fpsimd_state;
> +	u64 base_ptr = (u64)vtl0->x;
> +
> +	/*
> +	 * Obtain the CPU FPSIMD registers for VTL context switch.
> +	 * This saves the current task's FP/NEON state and allows us to
> +	 * safely load VTL0's FP/NEON context for the hypercall.
> +	 */
> +	kernel_neon_begin(&fpsimd_state);
> +
> +	/*
> +	 * VTL switch for ARM64 platform - managing VTL0's CPU context.
> +	 * We explicitly use the stack to save the base pointer, and use x16
> +	 * as our working register for accessing the context structure.
> +	 *
> +	 * Register Handling:
> +	 * - X0-X17: Saved/restored (general-purpose, shared for VTL communication)
> +	 * - X18: NOT touched - hypervisor-managed per-VTL (platform register)
> +	 * - X19-X30: Saved/restored (part of VTL0's execution context)
> +	 * - Q0-Q31: Saved/restored (128-bit NEON/floating-point registers, shared)
> +	 * - SP: Not in structure, hypervisor-managed per-VTL
> +	 *
> +	 * X29 (FP) and X30 (LR) are in the structure and must be saved/restored
> +	 * as part of VTL0's complete execution state.
> +	 */
> +	asm __volatile__ (
> +		/* Save base pointer to stack explicitly, then load into x16 */
> +		"str %0, [sp, #-16]!\n\t"     /* Push base pointer onto stack */
> +		"mov x16, %0\n\t"             /* Load base pointer into x16 */
> +		/* Volatile registers (Windows ARM64 ABI: x0-x17) */
> +		"ldp x0, x1, [x16]\n\t"
> +		"ldp x2, x3, [x16, #(2*8)]\n\t"
> +		"ldp x4, x5, [x16, #(4*8)]\n\t"
> +		"ldp x6, x7, [x16, #(6*8)]\n\t"
> +		"ldp x8, x9, [x16, #(8*8)]\n\t"
> +		"ldp x10, x11, [x16, #(10*8)]\n\t"
> +		"ldp x12, x13, [x16, #(12*8)]\n\t"
> +		"ldp x14, x15, [x16, #(14*8)]\n\t"
> +		/* x16 will be loaded last, after saving base pointer */
> +		"ldr x17, [x16, #(17*8)]\n\t"
> +		/* x18 is hypervisor-managed per-VTL - DO NOT LOAD */
> +
> +		/* General-purpose registers: x19-x30 */
> +		"ldp x19, x20, [x16, #(19*8)]\n\t"
> +		"ldp x21, x22, [x16, #(21*8)]\n\t"
> +		"ldp x23, x24, [x16, #(23*8)]\n\t"
> +		"ldp x25, x26, [x16, #(25*8)]\n\t"
> +		"ldp x27, x28, [x16, #(27*8)]\n\t"
> +
> +		/* Frame pointer and link register */
> +		"ldp x29, x30, [x16, #(29*8)]\n\t"
> +
> +		/* Shared NEON/FP registers: Q0-Q31 (128-bit) */
> +		"ldp q0, q1, [x16, #(32*8)]\n\t"
> +		"ldp q2, q3, [x16, #(32*8 + 2*16)]\n\t"
> +		"ldp q4, q5, [x16, #(32*8 + 4*16)]\n\t"
> +		"ldp q6, q7, [x16, #(32*8 + 6*16)]\n\t"
> +		"ldp q8, q9, [x16, #(32*8 + 8*16)]\n\t"
> +		"ldp q10, q11, [x16, #(32*8 + 10*16)]\n\t"
> +		"ldp q12, q13, [x16, #(32*8 + 12*16)]\n\t"
> +		"ldp q14, q15, [x16, #(32*8 + 14*16)]\n\t"
> +		"ldp q16, q17, [x16, #(32*8 + 16*16)]\n\t"
> +		"ldp q18, q19, [x16, #(32*8 + 18*16)]\n\t"
> +		"ldp q20, q21, [x16, #(32*8 + 20*16)]\n\t"
> +		"ldp q22, q23, [x16, #(32*8 + 22*16)]\n\t"
> +		"ldp q24, q25, [x16, #(32*8 + 24*16)]\n\t"
> +		"ldp q26, q27, [x16, #(32*8 + 26*16)]\n\t"
> +		"ldp q28, q29, [x16, #(32*8 + 28*16)]\n\t"
> +		"ldp q30, q31, [x16, #(32*8 + 30*16)]\n\t"
> +
> +		/* Now load x16 itself */
> +		"ldr x16, [x16, #(16*8)]\n\t"
> +
> +		/* Return to the lower VTL */
> +		"hvc #3\n\t"

NAK to this.

* This is a non-SMCCC hypercall, which we have NAK'd in general in the
  past for various reasons that I am not going to rehash here.

* It's not clear how this is going to be extended with necessary
  architecture state in future (e.g. SVE, SME). This is not
  future-proof, and I don't believe this is maintainable.

* This breaks general requirements for reliable stacktracing by
  clobbering state (e.g. x29) that we depend upon being valid AT ALL
  TIMES outside of entry code.

* IMO, if this needs to be saved/restored, that should happen in
  whatever you are calling.

Mark.