[PATCH] irqchip/gic-v3-its: Mark some in-memory data structures as 'decrypted'

Marc Zyngier maz at kernel.org
Thu Dec 9 01:41:23 PST 2021


On Wed, 08 Dec 2021 15:59:16 +0000,
Will Deacon <will at kernel.org> wrote:
> 
> The GICv3 ITS driver allocates memory for its tables using alloc_pages()
> and performs explicit cache maintenance if necessary. On systems such
> as those running pKVM, where the memory encryption API is implemented,
> memory shared with the ITS must first be transitioned to the "decrypted"
> state, as it would be if allocated via the DMA API.
> 
> Allow pKVM guests to interact with an ITS emulation by ensuring that the
> shared pages are decrypted at the point of allocation and encrypted
> again upon free().
> 
> Cc: Thomas Gleixner <tglx at linutronix.de>
> Cc: Marc Zyngier <maz at kernel.org>
> Signed-off-by: Will Deacon <will at kernel.org>
> ---
> 
> Although pKVM doesn't yet expose share/unshare hypercalls to the guest,
> this change is agnostic of the hypervisor and could be queued
> independently as it has no functional impact when the memory encryption
> API is not implemented.
> 
>  drivers/irqchip/irq-gic-v3-its.c | 40 ++++++++++++++++++++++++++------
>  1 file changed, 33 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index eb0882d15366..4559b8dfb9bc 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -27,6 +27,7 @@
>  #include <linux/of_pci.h>
>  #include <linux/of_platform.h>
>  #include <linux/percpu.h>
> +#include <linux/set_memory.h>
>  #include <linux/slab.h>
>  #include <linux/syscore_ops.h>
>  
> @@ -2166,6 +2167,7 @@ static void gic_reset_prop_table(void *va)
>  
>  	/* Make sure the GIC will observe the written configuration */
>  	gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ);
> +	set_memory_decrypted((unsigned long)va, LPI_PROPBASE_SZ >> PAGE_SHIFT);
>  }
>  
>  static struct page *its_allocate_prop_table(gfp_t gfp_flags)
> @@ -2183,8 +2185,10 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
>  
>  static void its_free_prop_table(struct page *prop_page)
>  {
> -	free_pages((unsigned long)page_address(prop_page),
> -		   get_order(LPI_PROPBASE_SZ));
> +	unsigned long va = (unsigned long)page_address(prop_page);
> +
> +	set_memory_encrypted(va, LPI_PROPBASE_SZ >> PAGE_SHIFT);
> +	free_pages(va, get_order(LPI_PROPBASE_SZ));
>  }
>  
>  static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
> @@ -2377,6 +2381,8 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
>  		return -ENXIO;
>  	}
>  
> +	set_memory_decrypted((unsigned long)base,
> +			     PAGE_ORDER_TO_SIZE(order) >> PAGE_SHIFT);
>  	baser->order = order;
>  	baser->base = base;
>  	baser->psz = psz;
> @@ -2512,8 +2518,12 @@ static void its_free_tables(struct its_node *its)
>  
>  	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
>  		if (its->tables[i].base) {
> -			free_pages((unsigned long)its->tables[i].base,
> -				   its->tables[i].order);
> +			unsigned long base = (unsigned long)its->tables[i].base;
> +			u32 order = its->tables[i].order;
> +			u32 npages = PAGE_ORDER_TO_SIZE(order) >> PAGE_SHIFT;
> +
> +			set_memory_encrypted(base, npages);
> +			free_pages(base, order);
>  			its->tables[i].base = NULL;
>  		}
>  	}
> @@ -2934,6 +2944,7 @@ static int its_alloc_collections(struct its_node *its)
>  static struct page *its_allocate_pending_table(gfp_t gfp_flags)
>  {
>  	struct page *pend_page;
> +	void *va;
>  
>  	pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
>  				get_order(LPI_PENDBASE_SZ));
> @@ -2941,14 +2952,19 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
>  		return NULL;
>  
>  	/* Make sure the GIC will observe the zero-ed page */
> -	gic_flush_dcache_to_poc(page_address(pend_page), LPI_PENDBASE_SZ);
> +	va = page_address(pend_page);
> +	gic_flush_dcache_to_poc(va, LPI_PENDBASE_SZ);
> +	set_memory_decrypted((unsigned long)va, LPI_PENDBASE_SZ >> PAGE_SHIFT);
>  
>  	return pend_page;
>  }
>  
>  static void its_free_pending_table(struct page *pt)
>  {
> -	free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
> +	unsigned long va = (unsigned long)page_address(pt);
> +
> +	set_memory_encrypted(va, LPI_PENDBASE_SZ >> PAGE_SHIFT);
> +	free_pages(va, get_order(LPI_PENDBASE_SZ));
>  }
>  
>  /*
> @@ -3268,14 +3284,20 @@ static bool its_alloc_table_entry(struct its_node *its,
>  
>  	/* Allocate memory for 2nd level table */
>  	if (!table[idx]) {
> +		void *l2addr;
> +
>  		page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO,
>  					get_order(baser->psz));
>  		if (!page)
>  			return false;
>  
> +		l2addr = page_address(page);
> +		set_memory_decrypted((unsigned long)l2addr,
> +				     baser->psz >> PAGE_SHIFT);
> +
>  		/* Flush Lvl2 table to PoC if hw doesn't support coherency */
>  		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
> -			gic_flush_dcache_to_poc(page_address(page), baser->psz);
> +			gic_flush_dcache_to_poc(l2addr, baser->psz);
>  
>  		table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID);
>  
> @@ -5043,6 +5065,8 @@ static int __init its_probe_one(struct resource *res,
>  	its->fwnode_handle = handle;
>  	its->get_msi_base = its_irq_get_msi_base;
>  	its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP;
> +	set_memory_decrypted((unsigned long)its->cmd_base,
> +			     ITS_CMD_QUEUE_SZ >> PAGE_SHIFT);
>  
>  	its_enable_quirks(its);
>  
> @@ -5099,6 +5123,8 @@ static int __init its_probe_one(struct resource *res,
>  out_free_tables:
>  	its_free_tables(its);
>  out_free_cmd:
> +	set_memory_encrypted((unsigned long)its->cmd_base,
> +			     ITS_CMD_QUEUE_SZ >> PAGE_SHIFT);
>  	free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ));
>  out_unmap_sgir:
>  	if (its->sgir_base)

This misses the subtly hidden per-endpoint ITT allocation (see
its_create_device()), which is the only one that is not a full page
allocation.

You could of course upgrade it to a full page, but this is going to
waste a lot of memory on systems with large page sizes, lots of
devices, and only few interrupts per device.

I guess that this would be acceptable on systems such as those
currently targeted by pKVM, but you'd need to make it a runtime
decision. Another possibility would be to build a custom allocator for
this, but this feels like a massive undertaking.

Maybe that's the push we needed to switch the ITS driver to be a
full-blown platform driver instead of a side hack.

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list