[PATCH v7 20/45] arm64: RME: Allow populating initial contents
Gavin Shan
gshan at redhat.com
Mon Mar 3 21:09:59 PST 2025
On 2/14/25 2:14 AM, Steven Price wrote:
> The VMM needs to populate the realm with some data before starting (e.g.
> a kernel and initrd). This is measured by the RMM and used as part of
> the attestation later on.
>
> Co-developed-by: Suzuki K Poulose <suzuki.poulose at arm.com>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose at arm.com>
> Signed-off-by: Steven Price <steven.price at arm.com>
> ---
> Changes since v6:
> * Handle host potentially having a larger page size than the RMM
> granule.
> * Drop historic "par" (protected address range) from
> populate_par_region() - it doesn't exist within the current
> architecture.
> * Add a cond_resched() call in kvm_populate_realm().
> Changes since v5:
> * Refactor to use PFNs rather than tracking struct page in
> realm_create_protected_data_page().
> * Pull changes from a later patch (in the v5 series) for accessing
> pages from a guest memfd.
> * Do the populate in chunks to avoid holding locks for too long and
> triggering RCU stall warnings.
> ---
> arch/arm64/kvm/rme.c | 234 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 234 insertions(+)
>
With the following comments addressed:
Reviewed-by: Gavin Shan <gshan at redhat.com>
> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
> index f965869e9ef7..7880894db722 100644
> --- a/arch/arm64/kvm/rme.c
> +++ b/arch/arm64/kvm/rme.c
> @@ -624,6 +624,228 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long start, u64 size,
> realm_unmap_private_range(kvm, start, end);
> }
>
> +static int realm_create_protected_data_granule(struct realm *realm,
> + unsigned long ipa,
> + phys_addr_t dst_phys,
> + phys_addr_t src_phys,
> + unsigned long flags)
> +{
> + phys_addr_t rd = virt_to_phys(realm->rd);
> + int ret;
> +
> + if (rmi_granule_delegate(dst_phys))
> + return -ENXIO;
> +
> + ret = rmi_data_create(rd, dst_phys, ipa, src_phys, flags);
> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
> + /* Create missing RTTs and retry */
> + int level = RMI_RETURN_INDEX(ret);
> +
> + WARN_ON(level == RMM_RTT_MAX_LEVEL);
> +
> + ret = realm_create_rtt_levels(realm, ipa, level,
> + RMM_RTT_MAX_LEVEL, NULL);
> + if (ret)
> + return -EIO;
> +
> + ret = rmi_data_create(rd, dst_phys, ipa, src_phys, flags);
> + }
> + if (ret)
> + return -EIO;
> +
> + return 0;
> +}
> +
> +static int realm_create_protected_data_page(struct realm *realm,
> + unsigned long ipa,
> + kvm_pfn_t dst_pfn,
> + kvm_pfn_t src_pfn,
> + unsigned long flags)
> +{
> + unsigned long rd = virt_to_phys(realm->rd);
> + phys_addr_t dst_phys, src_phys;
> + bool undelegate_failed = false;
> + int ret, offset;
> +
> + dst_phys = __pfn_to_phys(dst_pfn);
> + src_phys = __pfn_to_phys(src_pfn);
> +
> + for (offset = 0; offset < PAGE_SIZE; offset += RMM_PAGE_SIZE) {
> + ret = realm_create_protected_data_granule(realm,
> + ipa,
> + dst_phys,
> + src_phys,
> + flags);
> + if (ret)
> + goto err;
> +
> + ipa += RMM_PAGE_SIZE;
> + dst_phys += RMM_PAGE_SIZE;
> + src_phys += RMM_PAGE_SIZE;
> + }
> +
> + return 0;
> +
> +err:
> + if (ret == -EIO) {
> + /* current offset needs undelegating */
> + if (WARN_ON(rmi_granule_undelegate(dst_phys)))
> + undelegate_failed = true;
> + }
> + while (offset > 0) {
> + ipa -= RMM_PAGE_SIZE;
> + offset -= RMM_PAGE_SIZE;
> + dst_phys -= RMM_PAGE_SIZE;
> +
> + rmi_data_destroy(rd, ipa, NULL, NULL);
> +
> + if (WARN_ON(rmi_granule_undelegate(dst_phys)))
> + undelegate_failed = true;
> + }
> +
> + if (undelegate_failed) {
> + /*
> + * A granule could not be undelegated,
> + * so the page has to be leaked
> + */
> + get_page(pfn_to_page(dst_pfn));
> + }
> +
> + return -ENXIO;
> +}
> +
> +static int populate_region(struct kvm *kvm,
> + phys_addr_t ipa_base,
> + phys_addr_t ipa_end,
> + unsigned long data_flags)
> +{
> + struct realm *realm = &kvm->arch.realm;
> + struct kvm_memory_slot *memslot;
> + gfn_t base_gfn, end_gfn;
> + int idx;
> + phys_addr_t ipa;
> + int ret = 0;
> +
> + base_gfn = gpa_to_gfn(ipa_base);
> + end_gfn = gpa_to_gfn(ipa_end);
> +
> + idx = srcu_read_lock(&kvm->srcu);
> + memslot = gfn_to_memslot(kvm, base_gfn);
> + if (!memslot) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + /* We require the region to be contained within a single memslot */
> + if (memslot->base_gfn + memslot->npages < end_gfn) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (!kvm_slot_can_be_private(memslot)) {
> + ret = -EINVAL;
ret = -EPERM;
> + goto out;
> + }
> +
> + write_lock(&kvm->mmu_lock);
> +
> + ipa = ALIGN_DOWN(ipa_base, PAGE_SIZE);
The aignment operation is unnecessary since the base/size are ensured
to be aligned to PAGE_SIZE by its caller (kvm_populate_realm()).
> + while (ipa < ipa_end) {
> + struct vm_area_struct *vma;
> + unsigned long hva;
> + struct page *page;
> + bool writeable;
> + kvm_pfn_t pfn;
> + kvm_pfn_t priv_pfn;
> + struct page *gmem_page;
> +
> + hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa));
> + vma = vma_lookup(current->mm, hva);
> + if (!vma) {
> + ret = -EFAULT;
> + break;
> + }
> +
> + pfn = __kvm_faultin_pfn(memslot, gpa_to_gfn(ipa), FOLL_WRITE,
> + &writeable, &page);
> +
> + if (is_error_pfn(pfn)) {
> + ret = -EFAULT;
> + break;
> + }
> +
> + ret = kvm_gmem_get_pfn(kvm, memslot,
> + ipa >> PAGE_SHIFT,
> + &priv_pfn, &gmem_page, NULL);
> + if (ret)
> + break;
> +
> + ret = realm_create_protected_data_page(realm, ipa,
> + priv_pfn,
> + pfn,
> + data_flags);
> +
> + kvm_release_faultin_page(kvm, page, false, false);
> +
> + if (ret)
> + break;
> +
> + ipa += PAGE_SIZE;
> + }
> +
> + write_unlock(&kvm->mmu_lock);
> +
> +out:
> + srcu_read_unlock(&kvm->srcu, idx);
> + return ret;
> +}
> +
> +static int kvm_populate_realm(struct kvm *kvm,
> + struct arm_rme_populate_realm *args)
> +{
> + phys_addr_t ipa_base, ipa_end;
> + unsigned long data_flags = 0;
> +
> + if (kvm_realm_state(kvm) != REALM_STATE_NEW)
> + return -EINVAL;
return -EPERM;
> +
> + if (!IS_ALIGNED(args->base, PAGE_SIZE) ||
> + !IS_ALIGNED(args->size, PAGE_SIZE))
> + return -EINVAL;
> +
> + if (args->flags & ~RMI_MEASURE_CONTENT)
> + return -EINVAL;
It's perfect to combine those checks:
if (!IS_ALIGNED(...) ||
!IS_ALIGNED(...) ||
args->flags & ~RMI_MEASURE_CONTENT)
return -EINVAL;
> +
> + ipa_base = args->base;
> + ipa_end = ipa_base + args->size;
> +
> + if (ipa_end < ipa_base)
> + return -EINVAL;
> +
> + if (args->flags & RMI_MEASURE_CONTENT)
> + data_flags |= RMI_MEASURE_CONTENT;
> +
> + /*
> + * Perform the populate in parts to ensure locks are not held for too
> + * long
> + */
s/populate/population ?
> + while (ipa_base < ipa_end) {
> + phys_addr_t end = min(ipa_end, ipa_base + SZ_2M);
> +
> + int ret = populate_region(kvm, ipa_base, end,
> + args->flags);
> +
> + if (ret)
> + return ret;
> +
> + ipa_base = end;
> +
> + cond_resched();
> + }
> +
> + return 0;
> +}
> +
> static int realm_set_ipa_state(struct kvm_vcpu *vcpu,
> unsigned long start,
> unsigned long end,
> @@ -873,6 +1095,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> r = kvm_init_ipa_range_realm(kvm, &args);
> break;
> }
> + case KVM_CAP_ARM_RME_POPULATE_REALM: {
> + struct arm_rme_populate_realm args;
> + void __user *argp = u64_to_user_ptr(cap->args[1]);
> +
> + if (copy_from_user(&args, argp, sizeof(args))) {
> + r = -EFAULT;
> + break;
> + }
> +
> + r = kvm_populate_realm(kvm, &args);
> + break;
> + }
> default:
> r = -EINVAL;
> break;
Thanks,
Gavin
More information about the linux-arm-kernel
mailing list