[PATCH 3/6] iommu/arm-smmu-qcom: Add Qualcomm TBU driver

Georgi Djakov quic_c_gdjako at quicinc.com
Fri Nov 17 18:26:43 PST 2023


On 10/22/2023 12:05 AM, Bjorn Andersson wrote:
> On Wed, Oct 18, 2023 at 07:19:20PM -0700, Georgi Djakov wrote:
>> Add driver for the Qualcomm implementation of the ARM MMU-500 TBU.
>> The driver will enable the resources needed by the TBU and will
>> configure the registers for some debug features like checking if
>> there are any pending transactions, capturing transactions and
>> running ATOS (Address Translation Operations). ATOS/eCATS are used
>> to manually trigger an address translation of IOVA to physical
>> address by the SMMU hardware.
> 
> I still don't think this commit message clearly enough describe the
> problem you're trying to solve.
> 
> Not until I had read the Kconfig help text did I pay attention to the
> significance of the words "some debug features" in the middle of the
> paragraph.
> 
> 
> Please describe your changes in accordance with [1], i.e. clearly
> describe the problem you're trying to solve, then discuss the technical
> solution in the patch.

Thanks Bjorn, I'll try to improve it! 

> [1] https://docs.kernel.org/process/submitting-patches.html#describe-your-changes
> 
> [..]
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> [..]
>> +#ifdef CONFIG_ARM_SMMU_QCOM_TBU
>> +
>> +struct qsmmuv500_tbu {
>> +	struct device *dev;
>> +	struct arm_smmu_device *smmu;
>> +	u32 sid_range[2];
>> +	struct list_head list;
>> +	struct clk *clk;
>> +	struct icc_path	*path;
>> +	void __iomem *base;
>> +	spinlock_t halt_lock; /* protects halt count */
> 
> But in particular it makes sure that multiple halt or resume can't
> execute concurrently.

Exactly. Will mention it. 

>> +	int halt_count;
>> +};
>> +
>> +static DEFINE_SPINLOCK(ecats_lock);
>> +
>> +static struct qsmmuv500_tbu *qsmmuv500_find_tbu(struct qcom_smmu *qsmmu, u32 sid)
>> +{
>> +	struct qsmmuv500_tbu *tbu = NULL;
>> +	u32 start, end;
>> +
>> +	mutex_lock(&qsmmu->tbu_list_lock);
>> +
>> +	list_for_each_entry(tbu, &qsmmu->tbu_list, list) {
>> +		start = tbu->sid_range[0];
>> +		end = start + tbu->sid_range[1];
>> +
>> +		if (start <= sid && sid < end)
>> +			break;
>> +	}
>> +
>> +	mutex_unlock(&qsmmu->tbu_list_lock);
>> +
>> +	return tbu;
>> +}
>> +
>> +static int qsmmuv500_tbu_halt(struct qsmmuv500_tbu *tbu, struct arm_smmu_domain *smmu_domain)
>> +{
>> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +	int ret = 0, idx = smmu_domain->cfg.cbndx;
>> +	unsigned long flags;
>> +	u32 val, fsr, status;
>> +
>> +	spin_lock_irqsave(&tbu->halt_lock, flags);
> 
> Does this really need to run with interrupts disabled?

This is being executed in threaded irq context. 

>> +	if (tbu->halt_count) {
>> +		tbu->halt_count++;
>> +		goto out;
>> +	}
>> +
> [..]
>> +static phys_addr_t qsmmuv500_iova_to_phys(struct arm_smmu_domain *smmu_domain,
>> +					  dma_addr_t iova, u32 sid)
>> +{
> [..]
>> +	/* Only one concurrent atos operation */
>> +	spin_lock_irqsave(&ecats_lock, flags);
> 
> Does this require interrupts to be disabled?

This also runs in the irq handler during context fault.

>> +
>> +	/*
>> +	 * After a failed translation, the next successful translation will
>> +	 * incorrectly be reported as a failure.
> 
> "So if the ECATS translation fails, attempt the lookup more time."
> 
>> +	 */
>> +	do {
>> +		phys = qsmmuv500_tbu_trigger_atos(smmu_domain, tbu, iova, sid);
>> +
>> +		fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
>> +		if (fsr & ARM_SMMU_FSR_FAULT) {
>> +			/* Clear pending interrupts */
>> +			arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
>> +			/*
>> +			 * Barrier required to ensure that the FSR is cleared
>> +			 * before resuming SMMU operation.
>> +			 */
> 
> Better be clear on what this actually does, for future readers' sake:
> 
> 	 /* Ensure that FSR and RESUME operations aren't reordered. */
> 
> But is this really necessary, the two writes are for the same device,
> can they still be reordered?

Right, these are to the same endpoint. It can be dropped. 

>> +			wmb();
>> +
>> +			if (fsr & ARM_SMMU_FSR_SS)
>> +				arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
>> +						  ARM_SMMU_RESUME_TERMINATE);
>> +		}
>> +	} while (!phys && needs_redo++ < 2);
> 
> "needs_redo" sounds like a boolean to me. I think "attempt" would be a
> better fit here.
> 

Ok.

>> +
>> +	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
>> +	spin_unlock_irqrestore(&ecats_lock, flags);
>> +	qsmmuv500_tbu_resume(tbu);
>> +
>> +	/* Read to complete prior write transcations */
>> +	readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
>> +
>> +	/* Wait for read to complete */
> 
> That's not what rmb() does. You don't need to do anything here,
> readl_relaxed() returns when the read is done.

Ack.

>> +	rmb();
>> +
>> +disable_clk:
>> +	clk_disable_unprepare(tbu->clk);
>> +disable_icc:
>> +	icc_set_bw(tbu->path, 0, 0);
>> +
>> +	return phys;
>> +}
>> +#endif
>> +
>>  static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
>>  				int sync, int status)
>>  {
>> @@ -588,3 +895,80 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
>>  
>>  	return smmu;
>>  }
>> +
>> +#ifdef CONFIG_ARM_SMMU_QCOM_TBU
>> +
>> +static const struct of_device_id qsmmuv500_tbu_of_match[] = {
>> +	{ .compatible = "qcom,qsmmuv500-tbu" },
>> +	{ }
>> +};
> 
> Place this below the remove function, as most other drivers do.

Ack.

>> +
>> +static int qsmmuv500_tbu_probe(struct platform_device *pdev)
>> +{
> [..]
>> +	mutex_lock(&qsmmu->tbu_list_lock);
>> +	list_add_tail(&tbu->list, &qsmmu->tbu_list);
> 
> "tbu" is devres allocated, but you don't pull it off the list (or
> synchronize) during remove.

Right, but I'll just make this a builtin.

>> +	mutex_unlock(&qsmmu->tbu_list_lock);
>> +
>> +	dev_set_drvdata(dev, tbu);
>> +
>> +	return 0;
>> +}
>> +
>> +static void qsmmuv500_tbu_remove(struct platform_device *pdev)
>> +{
>> +	struct qsmmuv500_tbu *tbu = dev_get_drvdata(&pdev->dev);
>> +
>> +	clk_disable_unprepare(tbu->clk);
> 
> This isn't balanced.
> 
>> +	clk_put(tbu->clk);
>> +	icc_put(tbu->path);
>> +}
>> +
>> +static struct platform_driver qsmmuv500_tbu_driver = {
>> +	.driver = {
>> +		.name           = "qsmmuv500-tbu",
>> +		.of_match_table = of_match_ptr(qsmmuv500_tbu_of_match),
> 
> Won't of_match_ptr() result in a build warning if built without
> CONFIG_OF?

Will drop.

>> +	},
>> +	.probe  = qsmmuv500_tbu_probe,
>> +	.remove_new = qsmmuv500_tbu_remove,
>> +};
>> +module_platform_driver(qsmmuv500_tbu_driver);
> 
> This file acts as a library for the arm-smmu driver today, adding a
> platform_driver here makes it look like this is a separate driver.
> 
> Which makes me wonder, why is this a separate driver? Why not just
> loop over the subnodes and build the tbu_list in qcom_smmu_impl_init()?
> 

I am using the platform framework in order to get a more compact code
for this optional driver, but it adds some overhead.. I'll start with
moving all the TBU stuff into a separate file and will try meanwhile
your suggestion..

Thanks,
Georgi 



More information about the linux-arm-kernel mailing list