[PATCH v3 20/21] KVM: arm64: Restrict EL2 stage-1 changes in protected mode

Tue Aug 3 01:22:03 PDT 2021

Hi Quentin,

> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 0ccea58df7e0..1b67f562b6fc 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -338,6 +338,95 @@ static int host_stage2_idmap(u64 addr)
>         return ret;
>  }
>
> +static inline bool check_prot(enum kvm_pgtable_prot prot,
> +                             enum kvm_pgtable_prot required,
> +                             enum kvm_pgtable_prot denied)
> +{
> +       return (prot & (required | denied)) == required;
> +}
> +
> +int __pkvm_host_share_hyp(u64 pfn)
> +{
> +       phys_addr_t addr = hyp_pfn_to_phys(pfn);
> +       enum kvm_pgtable_prot prot, cur;
> +       void *virt = __hyp_va(addr);
> +       enum pkvm_page_state state;
> +       kvm_pte_t pte;
> +       u32 level;
> +       int ret;
> +
> +       if (!range_is_memory(addr, addr + PAGE_SIZE))
> +               return -EINVAL;
> +
> +       hyp_spin_lock(&host_kvm.lock);
> +       hyp_spin_lock(&pkvm_pgd_lock);
> +
> +       ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
> +       if (ret)
> +               goto unlock;
> +       if (!pte)
> +               goto map_shared;

Should this check whether kvm_pte_valid as well, is that guaranteed to
always be the case, or implicitly handled later?

> +
> +       /*
> +        * Check attributes in the host stage-2 PTE. We need the page to be:
> +        *  - mapped RWX as we're sharing memory;
> +        *  - not borrowed, as that implies absence of ownership.
> +        * Otherwise, we can't let it got through
> +        */
> +       cur = kvm_pgtable_stage2_pte_prot(pte);
> +       prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
> +       if (!check_prot(cur, KVM_PGTABLE_PROT_RWX, prot)) {
> +               ret = -EPERM;
> +               goto unlock;
> +       }
> +
> +       state = pkvm_getstate(cur);
> +       if (state == PKVM_PAGE_OWNED)
> +               goto map_shared;
> +
> +       /*
> +        * Tolerate double-sharing the same page, but this requires
> +        * cross-checking the hypervisor stage-1.
> +        */
> +       if (state != PKVM_PAGE_SHARED_OWNED) {
> +               ret = -EPERM;
> +               goto unlock;
> +       }
> +
> +       ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, &level);
> +       if (ret)
> +               goto unlock;
> +
> +       /*
> +        * If the page has been shared with the hypervisor, it must be
> +        * SHARED_BORROWED already.
> +        */

This comment confused me at first, but then I realized it's referring
to the page from the hyp's point of view. Could you add something to
the comment to that effect?

It might also make it easier to follow if the variables could be
annotated to specify whether cur, state, and prot are the host's or
hyps (and not reuse the same one for both).

> +       cur = kvm_pgtable_hyp_pte_prot(pte);
> +       prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
> +       if (!check_prot(cur, prot, ~prot))
> +               ret = EPERM;
> +       goto unlock;
> +
> +map_shared:
> +       /*
> +        * If the page is not yet shared, adjust mappings in both page-tables
> +        * while both locks are held.
> +        */
> +       prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
> +       ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
> +       BUG_ON(ret);
> +
> +       prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_OWNED);
> +       ret = host_stage2_idmap_locked(addr, addr + PAGE_SIZE, prot);
> +       BUG_ON(ret);
> +
> +unlock:
> +       hyp_spin_unlock(&pkvm_pgd_lock);
> +       hyp_spin_unlock(&host_kvm.lock);
> +
> +       return ret;
> +}
> +
>  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>  {
>         struct kvm_vcpu_fault_info fault;
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 0625bf2353c2..cbab146cda6a 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -259,10 +259,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size,
>  {
>         int err;
>
> -       if (!kvm_host_owns_hyp_mappings()) {
> -               return kvm_call_hyp_nvhe(__pkvm_create_mappings,
> -                                        start, size, phys, prot);
> -       }
> +       if (WARN_ON(!kvm_host_owns_hyp_mappings()))
> +               return -EINVAL;
>
>         mutex_lock(&kvm_hyp_pgd_mutex);
>         err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
> @@ -282,6 +280,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
>         }
>  }
>
> +static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
> +{
> +       phys_addr_t addr;
> +       int ret;
> +
> +       for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
> +               ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
> +                                       __phys_to_pfn(addr));

I guess we don't expect this to happen often, but I wonder if it would
be better to have the looping in the hyp call rather than here, to
reduce the number of hyp calls when sharing.

Thanks,
/fuad

> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
>  /**
>   * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
>   * @from:      The virtual kernel start address of the range
> @@ -302,6 +315,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
>         if (is_kernel_in_hyp_mode())
>                 return 0;
>
> +       if (!kvm_host_owns_hyp_mappings()) {
> +               if (WARN_ON(prot != PAGE_HYP))
> +                       return -EPERM;
> +               return pkvm_share_hyp(kvm_kaddr_to_phys(from),
> +                                     kvm_kaddr_to_phys(to));
> +       }
> +
>         start = start & PAGE_MASK;
>         end = PAGE_ALIGN(end);
>
> --
> 2.32.0.432.gabb21c7263-goog
>