[PATCH 1/1] iommu/arm-smmu-v3: add support for BBML

Fri Jan 22 08:00:44 EST 2021

On 2021-01-22 12:51, Will Deacon wrote:
> On Thu, Nov 26, 2020 at 11:42:30AM +0800, Zhen Lei wrote:
>> When changing from a set of pages/smaller blocks to a larger block for an
>> address, the software should follow the sequence of BBML processing.
>>
>> When changing from a block to a set of pages/smaller blocks for an
>> address, there's no need to use nT bit. If an address in the large block
>> is accessed before page table switching, the TLB caches the large block
>> mapping. After the page table is switched and before TLB invalidation
>> finished, new access requests are still based on large block mapping.
>> After the block or page is invalidated, the system reads the small block
>> or page mapping from the memory; If the address in the large block is not
>> accessed before page table switching, the TLB has no cache. After the
>> page table is switched, a new access is initiated to read the small block
>> or page mapping from the memory.
>>
>> Signed-off-by: Zhen Lei <thunder.leizhen at huawei.com>
>> ---
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  2 +
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 +
>>   drivers/iommu/io-pgtable-arm.c              | 46 ++++++++++++++++-----
>>   include/linux/io-pgtable.h                  |  1 +
>>   4 files changed, 40 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index e634bbe60573..14a1a11565fb 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -1977,6 +1977,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
>>   		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
>>   		.tlb		= &arm_smmu_flush_ops,
>>   		.iommu_dev	= smmu->dev,
>> +		.bbml		= smmu->bbml,
>>   	};
>>   
>>   	if (smmu_domain->non_strict)
>> @@ -3291,6 +3292,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>>   
>>   	/* IDR3 */
>>   	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
>> +	smmu->bbml = FIELD_GET(IDR3_BBML, reg);
>>   	if (FIELD_GET(IDR3_RIL, reg))
>>   		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
>>   
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> index d4b7f40ccb02..aa7eb460fa09 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> @@ -51,6 +51,7 @@
>>   #define IDR1_SIDSIZE			GENMASK(5, 0)
>>   
>>   #define ARM_SMMU_IDR3			0xc
>> +#define IDR3_BBML			GENMASK(12, 11)
>>   #define IDR3_RIL			(1 << 10)
>>   
>>   #define ARM_SMMU_IDR5			0x14
>> @@ -617,6 +618,7 @@ struct arm_smmu_device {
>>   
>>   	int				gerr_irq;
>>   	int				combined_irq;
>> +	int				bbml;
>>   
>>   	unsigned long			ias; /* IPA */
>>   	unsigned long			oas; /* PA */
>> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
>> index a7a9bc08dcd1..341581337ad0 100644
>> --- a/drivers/iommu/io-pgtable-arm.c
>> +++ b/drivers/iommu/io-pgtable-arm.c
>> @@ -72,6 +72,7 @@
>>   
>>   #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>>   #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
>> +#define ARM_LPAE_PTE_nT			(((arm_lpae_iopte)1) << 16)
>>   #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>>   #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>>   #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
>> @@ -255,7 +256,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
>>   
>>   static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>>   				phys_addr_t paddr, arm_lpae_iopte prot,
>> -				int lvl, arm_lpae_iopte *ptep)
>> +				int lvl, arm_lpae_iopte *ptep, arm_lpae_iopte nT)
>>   {
>>   	arm_lpae_iopte pte = prot;
>>   
>> @@ -265,37 +266,60 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>>   		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
>>   
>>   	pte |= paddr_to_iopte(paddr, data);
>> +	pte |= nT;
>>   
>>   	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
>>   }
>>   
>> +static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
>> +				    arm_lpae_iopte *ptep);
>>   static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>>   			     unsigned long iova, phys_addr_t paddr,
>>   			     arm_lpae_iopte prot, int lvl,
>>   			     arm_lpae_iopte *ptep)
>>   {
>>   	arm_lpae_iopte pte = *ptep;
>> +	struct io_pgtable_cfg *cfg = &data->iop.cfg;
>>   
>>   	if (iopte_leaf(pte, lvl, data->iop.fmt)) {
>>   		/* We require an unmap first */
>>   		WARN_ON(!selftest_running);
>>   		return -EEXIST;
>>   	} else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
>> -		/*
>> -		 * We need to unmap and free the old table before
>> -		 * overwriting it with a block entry.
>> -		 */
>>   		arm_lpae_iopte *tblp;
>> +		struct io_pgtable *iop = &data->iop;
>>   		size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
>>   
>> -		tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
>> -		if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
>> -			WARN_ON(1);
>> -			return -EINVAL;
>> +		switch (cfg->bbml) {
>> +		case 0:
>> +			/*
>> +			 * We need to unmap and free the old table before
>> +			 * overwriting it with a block entry.
>> +			 */
>> +			tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
>> +			if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
>> +				WARN_ON(1);
>> +				return -EINVAL;
>> +			}
>> +			break;
>> +		case 1:
>> +			__arm_lpae_init_pte(data, paddr, prot, lvl, ptep, ARM_LPAE_PTE_nT);
>> +
>> +			io_pgtable_tlb_flush_walk(iop, iova, sz, ARM_LPAE_GRANULE(data));
>> +			tblp = iopte_deref(pte, data);
>> +			__arm_lpae_free_pgtable(data, lvl + 1, tblp);
>> +			break;
>> +		case 2:
>> +			__arm_lpae_init_pte(data, paddr, prot, lvl, ptep, 0);
>> +
>> +			io_pgtable_tlb_flush_walk(iop, iova, sz, ARM_LPAE_GRANULE(data));
>> +			tblp = iopte_deref(pte, data);
>> +			__arm_lpae_free_pgtable(data, lvl + 1, tblp);
>> +			return 0;
> 
> Sorry, but I really don't understand what you're trying to do here. The old
> code uses BBM for the table -> block path so we don't need anything extra
> here. The dodgy case is when we unmap part of a block, and end up installing
> a table via arm_lpae_split_blk_unmap(). We can't use BBM there because there
> could be ongoing DMA to parts of the block mapping that we want to remain in
> place.
> 
> Are you seeing a problem in practice?

Right, I was under the assumption that we could ignore BBML because we 
should never have a legitimate reason to split blocks. I'm certainly not 
keen on piling any more complexity into split_blk_unmap, because the 
IOMMU API clearly doesn't have a well-defined behaviour for that case 
anyway - some other drivers will just unmap the entire block, and IIRC 
there was a hint somewhere in VFIO that it might actually expect that 
behaviour.

Robin.