[RFC PATCH 6/7] KVM: arm64: Configure PBHA bits for stage2
Marc Zyngier
maz at kernel.org
Sat Oct 16 06:46:55 PDT 2021
On Fri, 15 Oct 2021 17:14:15 +0100,
James Morse <james.morse at arm.com> wrote:
>
> There are two conflicting use-cases for PBHA at stage2. We could copy
> the stage1 PBHA bits to stage2, this would ensure the VMMs memory is
> exactly reproduced for the guest, including the PBHA bits. The problem
> here is how the VMM's memory is allocated with the PBHA bits set.
>
> The other is allowing the guest to configure PBHA directly. This would
> allow guest device drivers to map memory with the appropriate PBHA bits.
> This would only be safe if the guest can be trusted to only generate
> PBHA values that only affect performance.
>
> The arm-arm doesn't describe how the stage1 and stage2 bits are combined.
> Arm's implementations appear to all have the same behaviour, according
> to the TRM: stage2 wins.
>
> For these CPUs, we can allow a guest to use a PBHA bit by disabling it
> in VTCR_EL2. We just need to know which bits...
>
> The DT describes the values that only affect performance, but if value-5
> is safe for use, we can't prevent the guest from using value-1 and value-4.
> These 'decomposed' values would also need to be listed as only affecting
> performance.
>
> Add a cpufeature for CPUs that have this 'stage2 wins' behaviour.
> Decompose each performance-only value (5 -> 5, 4, 1), and check each of
> these values is listed as only affecting performance. If so, the bits
> of the original value (5) can be used by the guest at stage1. (by clearing
> the bits from VTCR_EL2)
>
> Signed-off-by: James Morse <james.morse at arm.com>
> ---
> I've checked the TRMs for the listed CPUs.
> There are more, I've asked for the TRMs to always describe this.
> ---
> arch/arm64/include/asm/cpufeature.h | 1 +
> arch/arm64/include/asm/cputype.h | 4 ++
> arch/arm64/kernel/cpufeature.c | 105 ++++++++++++++++++++++++++++
> arch/arm64/kernel/image-vars.h | 3 +
> arch/arm64/kvm/hyp/pgtable.c | 8 ++-
> arch/arm64/tools/cpucaps | 1 +
> 6 files changed, 120 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index ef6be92b1921..ec800ce15308 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -101,6 +101,7 @@ struct arm64_ftr_reg {
> };
>
> extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
> +extern unsigned long arm64_pbha_stage2_safe_bits;
>
> /*
> * CPU capabilities:
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 6231e1f0abe7..4d7f18749d23 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -73,6 +73,8 @@
> #define ARM_CPU_PART_CORTEX_A76 0xD0B
> #define ARM_CPU_PART_NEOVERSE_N1 0xD0C
> #define ARM_CPU_PART_CORTEX_A77 0xD0D
> +#define ARM_CPU_PART_CORTEX_A78 0xD41
> +#define ARM_CPU_PART_CORTEX_X1 0xD44
>
> #define APM_CPU_PART_POTENZA 0x000
>
> @@ -113,6 +115,8 @@
> #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
> #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
> #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
> +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
> +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
> #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
> #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
> #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 88f0f805b938..ad2b3b179ab1 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -77,6 +77,7 @@
> #include <asm/cpu.h>
> #include <asm/cpufeature.h>
> #include <asm/cpu_ops.h>
> +#include <asm/cputype.h>
> #include <asm/fpsimd.h>
> #include <asm/insn.h>
> #include <asm/kvm_host.h>
> @@ -113,6 +114,7 @@ EXPORT_SYMBOL(arm64_use_ng_mappings);
>
> unsigned long __ro_after_init arm64_pbha_perf_only_values;
> EXPORT_SYMBOL(arm64_pbha_perf_only_values);
> +unsigned long __ro_after_init arm64_pbha_stage2_safe_bits;
>
> /*
> * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
> @@ -1680,13 +1682,50 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
>
> #endif
>
> +
> #ifdef CONFIG_ARM64_PBHA
> static u8 pbha_stage1_enable_bits;
> +static DEFINE_SPINLOCK(pbha_dt_lock);
> +
> +/* For the value 5, return a bitmap with bits 5, 4, and 1 set. */
> +static unsigned long decompose_pbha_values(u8 val)
> +{
> + int i;
> + unsigned long mask = 0;
> +
> + for (i = 1; i <= 15; i++) {
> + if ((i & val) == i)
> + set_bit(i, &mask);
> + }
> +
> + return mask;
> +}
> +
> +/*
> + * The bits of a value are safe if all values that can be built from those
> + * enabled bits are listed as only affecting performance.
> + * e.g. 5 would also need 1 and 4 to be listed.
> + *
> + * When there is a conflict with the bits already enabled, the new value is
> + * skipped.
> + * e.g. if 5 already caused bit-0 and bit-2 to be enabled, adding 3 to the list
> + * would need to test 7 as bit-2 is already enabled. If 7 is not listed, 3 is
> + * skipped and bit-1 is not enabled.
> + */
> +static void stage2_test_pbha_value(u8 val)
> +{
> + unsigned long mask;
> +
> + mask = decompose_pbha_values(val | arm64_pbha_stage2_safe_bits);
> + if ((arm64_pbha_perf_only_values & mask) == mask)
> + arm64_pbha_stage2_safe_bits |= val;
> +}
>
> static bool plat_can_use_pbha_stage1(const struct arm64_cpu_capabilities *cap,
> int scope)
> {
> u8 val;
> + static bool dt_check_done;
> struct device_node *cpus;
> const u8 *perf_only_vals;
> int num_perf_only_vals, i;
> @@ -1702,6 +1741,10 @@ static bool plat_can_use_pbha_stage1(const struct arm64_cpu_capabilities *cap,
> if (scope == SCOPE_LOCAL_CPU)
> return true;
>
> + spin_lock(&pbha_dt_lock);
> + if (dt_check_done)
> + goto out_unlock;
> +
> cpus = of_find_node_by_path("/cpus");
> if (!cpus)
> goto done;
> @@ -1721,9 +1764,24 @@ static bool plat_can_use_pbha_stage1(const struct arm64_cpu_capabilities *cap,
> set_bit(val, &arm64_pbha_perf_only_values);
> }
>
> + /*
> + * for stage2 the values are collapsed back to 4 bits that can only
> + * enable values in the arm64_pbha_perf_only_values mask.
> + */
> + for (i = 0 ; i < num_perf_only_vals; i++) {
> + val = perf_only_vals[i];
> + if (val > 0xf)
> + continue;
> +
> + stage2_test_pbha_value(val);
> + }
> +
> done:
> of_node_put(cpus);
> + dt_check_done = true;
>
> +out_unlock:
> + spin_unlock(&pbha_dt_lock);
> return !!pbha_stage1_enable_bits;
> }
>
> @@ -1743,6 +1801,47 @@ static void cpu_enable_pbha(struct arm64_cpu_capabilities const *cap)
> isb();
> local_flush_tlb_all();
> }
> +
> +/*
> + * PBHA's behaviour is implementation defined, as is the way it combines
> + * stage1 and stage2 attributes. If the kernel has KVM supported, and booted
> + * at EL2, only these CPUs can allow PBHA in a guest, as KVM knows how the PBHA
> + * bits are combined. This prevents the host being affected by some
> + * implementation defined behaviour from the guest.
> + *
> + * The TRM for these CPUs describe stage2 as overriding stage1.
> + */
> +static const struct midr_range pbha_stage2_wins[] = {
> + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
> + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
> + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
> + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
> + {},
> +};
> +
> +static bool plat_can_use_pbha_stage2(const struct arm64_cpu_capabilities *cap,
> + int scope)
> +{
> + /* Booted at EL2? */
> + if (!is_hyp_mode_available() && !is_kernel_in_hyp_mode())
> + return false;
> +
> + if (!is_midr_in_range_list(read_cpuid_id(), cap->midr_range_list))
> + return false;
> +
> + /*
> + * Calls with scope == SCOPE_LOCAL_CPU need only testing whether this
> + * cpu has the feature. A later 'system' scope call will check for a
> + * firmware description.
> + */
> + if (scope == SCOPE_LOCAL_CPU)
> + return true;
> +
> + if (!__system_matches_cap(ARM64_HAS_PBHA_STAGE1))
> + return false;
> +
> + return !!arm64_pbha_stage2_safe_bits;
> +}
> #endif /* CONFIG_ARM64_PBHA */
>
> #ifdef CONFIG_ARM64_AMU_EXTN
> @@ -2418,6 +2517,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .min_field_value = 2,
> .cpu_enable = cpu_enable_pbha,
> },
> + {
> + .capability = ARM64_HAS_PBHA_STAGE2,
> + .type = ARM64_CPUCAP_SYSTEM_FEATURE,
> + .matches = plat_can_use_pbha_stage2,
> + .midr_range_list = pbha_stage2_wins,
> + },
> #endif
> {},
> };
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index c96a9a0043bf..d6abdc53f4d9 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -134,6 +134,9 @@ KVM_NVHE_ALIAS(__hyp_rodata_end);
> /* pKVM static key */
> KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
>
> +/* PBHA bits for stage2 */
> +KVM_NVHE_ALIAS(arm64_pbha_stage2_safe_bits);
I'm not totally keen on this, see below.
> +
> #endif /* CONFIG_KVM */
>
> #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 7bd90ea1c61f..c0bfef55a1ff 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -520,7 +520,7 @@ struct stage2_map_data {
> u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
> {
> u64 vtcr = VTCR_EL2_FLAGS;
> - u8 lvls;
> + u8 lvls, pbha = 0xf;
>
> vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
> vtcr |= VTCR_EL2_T0SZ(phys_shift);
> @@ -545,9 +545,13 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
> * value will always be 0, which is defined as the safe default
> * setting. On Cortex cores, enabling PBHA for stage2 effectively
> * disables it for stage1.
> + * When the HAS_PBHA_STAGE2 feature is supported, clear the 'safe'
> + * bits to allow the guest's stage1 to use these bits.
> */
> + if (cpus_have_final_cap(ARM64_HAS_PBHA_STAGE2))
> + pbha = pbha ^ arm64_pbha_stage2_safe_bits;
> if (cpus_have_final_cap(ARM64_HAS_PBHA))
> - vtcr = FIELD_PREP(VTCR_EL2_PBHA_MASK, 0xf);
> + vtcr = FIELD_PREP(VTCR_EL2_PBHA_MASK, pbha);
ORing bug aside, this isn't great in the protected case, where we
ultimately don't want to trust a value that is under EL1 control for
page tables.
I'd suggest reusing the hack we have for things like kimage_voffset,
where we generate the value as an immediate that gets inlined.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list