[PATCH 4/4] iommu/arm-smmu-v3: Remove cmpxchg() in arm_smmu_cmdq_issue_cmdlist()

kernel test robot lkp at intel.com
Mon Jun 22 21:07:34 EDT 2020


Hi John,

I love your patch! Perhaps something to improve:

[auto build test WARNING on iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/John-Garry/iommu-arm-smmu-v3-Improve-cmdq-lock-efficiency/20200623-013438
base:   https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
config: arm64-randconfig-c024-20200622 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp at intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

In file included from include/linux/bits.h:23,
from include/linux/ioport.h:15,
from include/linux/acpi.h:12,
from drivers/iommu/arm-smmu-v3.c:12:
drivers/iommu/arm-smmu-v3.c: In function 'arm_smmu_cmdq_issue_cmdlist':
include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 |   __builtin_constant_p((l) > (h)), (l) > (h), 0)))
|                            ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|                                                              ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 |  (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|   ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 |  u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
|                  ^~~~~~~
include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 |   __builtin_constant_p((l) > (h)), (l) > (h), 0)))
|                                        ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|                                                              ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 |  (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|   ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 |  u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
|                  ^~~~~~~

vim +/GENMASK +1404 drivers/iommu/arm-smmu-v3.c

  1369	
  1370	/*
  1371	 * This is the actual insertion function, and provides the following
  1372	 * ordering guarantees to callers:
  1373	 *
  1374	 * - There is a dma_wmb() before publishing any commands to the queue.
  1375	 *   This can be relied upon to order prior writes to data structures
  1376	 *   in memory (such as a CD or an STE) before the command.
  1377	 *
  1378	 * - On completion of a CMD_SYNC, there is a control dependency.
  1379	 *   This can be relied upon to order subsequent writes to memory (e.g.
  1380	 *   freeing an IOVA) after completion of the CMD_SYNC.
  1381	 *
  1382	 * - Command insertion is totally ordered, so if two CPUs each race to
  1383	 *   insert their own list of commands then all of the commands from one
  1384	 *   CPU will appear before any of the commands from the other CPU.
  1385	 *
  1386	 * - A CMD_SYNC is always inserted, which ensures we limit the prod pointer
  1387	 *   for when the cmdq is full, such that we don't wrap more than twice.
  1388	 *   It also makes it easy for the owner to know by how many to increment the
  1389	 *   cmdq lock.
  1390	 */
  1391	static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
  1392					       u64 *cmds, int n)
  1393	{
  1394		u64 cmd_sync[CMDQ_ENT_DWORDS];
  1395		const int sync = 1;
  1396		u32 prod;
  1397		unsigned long flags;
  1398		bool owner;
  1399		struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
  1400		struct arm_smmu_ll_queue llq = {
  1401			.max_n_shift = cmdq->q.llq.max_n_shift,
  1402		}, head = llq, space = llq;
  1403		u32 owner_val = 1 << cmdq->q.llq.owner_count_shift;
> 1404		u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
  1405		u32 owner_mask = GENMASK(30, cmdq->q.llq.owner_count_shift);
  1406		int ret = 0;
  1407	
  1408		/* 1. Allocate some space in the queue */
  1409		local_irq_save(flags);
  1410	
  1411		prod = atomic_fetch_add(n + sync + owner_val,
  1412					&cmdq->q.llq.atomic.prod);
  1413	
  1414		owner = !(prod & owner_mask);
  1415		llq.prod = prod_mask & prod;
  1416		head.prod = queue_inc_prod_n(&llq, n + sync);
  1417	
  1418		/*
  1419		 * Ensure it's safe to write the entries. For this, we need to ensure
  1420		 * that there is space in the queue from our prod pointer.
  1421		 */
  1422		space.cons = READ_ONCE(cmdq->q.llq.cons);
  1423		space.prod = llq.prod;
  1424		while (!queue_has_space(&space, n + sync)) {
  1425			if (arm_smmu_cmdq_poll_until_not_full(smmu, &space))
  1426				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
  1427	
  1428			space.prod = llq.prod;
  1429		}
  1430	
  1431		/*
  1432		 * 2. Write our commands into the queue
  1433		 * Dependency ordering from the space-checking while loop, above.
  1434		 */
  1435		arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
  1436	
  1437		prod = queue_inc_prod_n(&llq, n);
  1438		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
  1439		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
  1440	
  1441		/* 3. Mark our slots as valid, ensuring commands are visible first */
  1442		dma_wmb();
  1443		arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
  1444	
  1445		/* 4. If we are the owner, take control of the SMMU hardware */
  1446		if (owner) {
  1447			int owner_count;
  1448			u32 prod_tmp;
  1449	
  1450			/* a. Wait for previous owner to finish */
  1451			atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
  1452	
  1453			/* b. Stop gathering work by clearing the owned mask */
  1454			prod_tmp = atomic_fetch_andnot_relaxed(~prod_mask,
  1455							       &cmdq->q.llq.atomic.prod);
  1456			prod = prod_tmp & prod_mask;
  1457			owner_count = prod_tmp & owner_mask;
  1458			owner_count >>= cmdq->q.llq.owner_count_shift;
  1459	
  1460			/*
  1461			 * c. Wait for any gathered work to be written to the queue.
  1462			 * Note that we read our own entries so that we have the control
  1463			 * dependency required by (d).
  1464			 */
  1465			arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
  1466	
  1467			/*
  1468			 * In order to determine completion of the CMD_SYNCs, we must
  1469			 * ensure that the queue can't wrap twice without us noticing.
  1470			 * We achieve that by taking the cmdq lock as shared before
  1471			 * progressing the prod pointer.
  1472			 * The owner does this for all the non-owners it has gathered.
  1473			 * Otherwise, some non-owner(s) may lock the cmdq, blocking
  1474			 * cons being updating. This could be when the cmdq has just
  1475			 * become full. In this case, other sibling non-owners could be
  1476			 * blocked from progressing, leading to deadlock.
  1477			 */
  1478			arm_smmu_cmdq_shared_lock(cmdq, owner_count);
  1479	
  1480			/*
  1481			 * d. Advance the hardware prod pointer
  1482			 * Control dependency ordering from the entries becoming valid.
  1483			 */
  1484			writel_relaxed(prod, cmdq->q.prod_reg);
  1485	
  1486			/*
  1487			 * e. Tell the next owner we're done
  1488			 * Make sure we've updated the hardware first, so that we don't
  1489			 * race to update prod and potentially move it backwards.
  1490			 */
  1491			atomic_set_release(&cmdq->owner_prod, prod);
  1492		}
  1493	
  1494		/* 5. Since we always insert a CMD_SYNC, we must wait for it to complete */
  1495		llq.prod = queue_inc_prod_n(&llq, n);
  1496		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
  1497		if (ret)
  1498			dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
  1499					    llq.prod, readl_relaxed(cmdq->q.prod_reg),
  1500					    readl_relaxed(cmdq->q.cons_reg));
  1501	
  1502		/*
  1503		 * Try to unlock the cmdq lock. This will fail if we're the last reader,
  1504		 * in which case we can safely update cmdq->q.llq.cons
  1505		 */
  1506		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
  1507			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
  1508			arm_smmu_cmdq_shared_unlock(cmdq);
  1509		}
  1510	
  1511		local_irq_restore(flags);
  1512		return ret;
  1513	}
  1514	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
-------------- next part --------------
A non-text attachment was scrubbed...
Name: .config.gz
Type: application/gzip
Size: 30901 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20200623/3bd79709/attachment-0001.gz>


More information about the linux-arm-kernel mailing list