[PATCH v9 0/9] Make the SMMUv3 CD logic match the new STE design (part 2a/3)

Jason Gunthorpe jgg at nvidia.com
Tue Apr 30 17:02:06 PDT 2024


On Tue, Apr 30, 2024 at 02:11:35PM -0700, Nicolin Chen wrote:
> On Tue, Apr 30, 2024 at 02:21:32PM -0300, Jason Gunthorpe wrote:
> > v9:
> >  - Remove arm_smmu_clean_cd_entry() and instead zero the required CD bits
> >    inside arm_smmu_write_ctx_desc()
> 
> SVA sanity works well with v9 as my previous test on v7.

Yeah, very little changed in the main code after everything is
applied. Most of the edits were adjusting interior patches

Here is the diff from v7 to v9.

Thanks,
Jason

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 2e597102baf6e5..f872aeccd82041 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -411,8 +411,9 @@ config ARM_SMMU_V3_SVA
 	  and PRI.
 
 config ARM_SMMU_V3_KUNIT_TEST
-	tristate "KUnit tests for arm-smmu-v3 driver"  if !KUNIT_ALL_TESTS
+	bool "KUnit tests for arm-smmu-v3 driver"  if !KUNIT_ALL_TESTS
 	depends on KUNIT
+	depends on ARM_SMMU_V3_SVA
 	default KUNIT_ALL_TESTS
 	help
 	  Enable this option to unit-test arm-smmu-v3 driver functions.
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 014a997753a8a2..0b97054b3929b7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -2,6 +2,5 @@
 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
 arm_smmu_v3-objs-y += arm-smmu-v3.o
 arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
 arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
-
-obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index f56a2d38012b5c..34a977a0767d46 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -8,6 +8,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
+#include <kunit/visibility.h>
 
 #include "arm-smmu-v3.h"
 #include "../../io-pgtable-arm.h"
@@ -120,6 +121,7 @@ static u64 page_size_to_cd(void)
 	return ARM_LPAE_TCR_TG0_4K;
 }
 
+VISIBLE_IF_KUNIT
 void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
 			  struct arm_smmu_master *master, struct mm_struct *mm,
 			  u16 asid)
@@ -172,8 +174,7 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
 		 * disable is permitted. This speeds up cleanup for an unclean
 		 * exit if the device is still doing a lot of DMA.
 		 */
-		if (master->stall_enabled &&
-		    !(master->smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
+		if (!(master->smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
 			target->data[0] &=
 				cpu_to_le64(~(CTXDESC_CD_0_S | CTXDESC_CD_0_R));
 	}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
index 14c8e40712a70e..417804392ff089 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
@@ -26,6 +26,9 @@ static struct arm_smmu_ste abort_ste;
 static struct arm_smmu_device smmu = {
 	.features = ARM_SMMU_FEAT_STALLS | ARM_SMMU_FEAT_ATTR_TYPES_OVR
 };
+static struct mm_struct sva_mm = {
+	.pgd = (void *)0xdaedbeefdeadbeefULL,
+};
 
 static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry,
 						const __le64 *used_bits,
@@ -61,7 +64,7 @@ arm_smmu_test_writer_record_syncs(struct arm_smmu_entry_writer *writer)
 			     false);
 
 	test_writer->num_syncs += 1;
-	if (!(test_writer->entry[0] & writer->ops->v_bit)) {
+	if (!test_writer->entry[0]) {
 		test_writer->invalid_entry_written = true;
 	} else {
 		/*
@@ -96,13 +99,11 @@ arm_smmu_v3_test_debug_print_used_bits(struct arm_smmu_entry_writer *writer,
 }
 
 static const struct arm_smmu_entry_writer_ops test_ste_ops = {
-	.v_bit = cpu_to_le64(STRTAB_STE_0_V),
 	.sync = arm_smmu_test_writer_record_syncs,
 	.get_used = arm_smmu_get_ste_used,
 };
 
 static const struct arm_smmu_entry_writer_ops test_cd_ops = {
-	.v_bit = cpu_to_le64(CTXDESC_CD_0_V),
 	.sync = arm_smmu_test_writer_record_syncs,
 	.get_used = arm_smmu_get_cd_used,
 };
@@ -392,11 +393,8 @@ static void arm_smmu_test_make_sva_cd(struct arm_smmu_cd *cd, unsigned int asid)
 	struct arm_smmu_master master = {
 		.smmu = &smmu,
 	};
-	struct mm_struct mm = {
-		.pgd = (void *)0xdaedbeefdeadbeefULL,
-	};
 
-	arm_smmu_make_sva_cd(cd, &master, &mm, asid);
+	arm_smmu_make_sva_cd(cd, &master, &sva_mm, asid);
 }
 
 static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3ffaa3b34b44bf..15bad76cf84a61 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
 #include <linux/platform_device.h>
+#include <kunit/visibility.h>
 
 #include "arm-smmu-v3.h"
 #include "../../dma-iommu.h"
@@ -968,6 +969,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
  * would be nice if this was complete according to the spec, but minimally it
  * has to capture the bits this driver uses.
  */
+VISIBLE_IF_KUNIT
 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
 {
 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
@@ -1090,6 +1092,7 @@ static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
  * V=0 process. This relies on the IGNORED behavior described in the
  * specification.
  */
+VISIBLE_IF_KUNIT
 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
 			  const __le64 *target)
 {
@@ -1124,7 +1127,7 @@ void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
 		 * requires a breaking update, zero the V bit, write all qwords
 		 * but 0, then set qword 0
 		 */
-		unused_update[0] = entry[0] & (~writer->ops->v_bit);
+		unused_update[0] = 0;
 		entry_set(writer, entry, unused_update, 0, 1);
 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
 		entry_set(writer, entry, target, 0, 1);
@@ -1221,7 +1224,7 @@ struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
 	}
 
 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
-		unsigned int idx = ssid >> CTXDESC_SPLIT;
+		unsigned int idx = ssid / CTXDESC_L2_ENTRIES;
 		struct arm_smmu_l1_ctx_desc *l1_desc;
 
 		l1_desc = &cd_table->l1_desc[idx];
@@ -1245,6 +1248,7 @@ struct arm_smmu_cd_writer {
 	unsigned int ssid;
 };
 
+VISIBLE_IF_KUNIT
 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
 {
 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
@@ -1252,7 +1256,10 @@ void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
 		return;
 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
 
-	/* EPD0 means T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED */
+	/*
+	 * If EPD0 is set by the make function it means
+	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
+	 */
 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
 		used_bits[0] &= ~cpu_to_le64(
 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
@@ -1273,7 +1280,6 @@ static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
 	.sync = arm_smmu_cd_writer_sync_entry,
 	.get_used = arm_smmu_get_cd_used,
-	.v_bit = cpu_to_le64(CTXDESC_CD_0_V),
 };
 
 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
@@ -1472,7 +1478,6 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
 	.sync = arm_smmu_ste_writer_sync_entry,
 	.get_used = arm_smmu_get_ste_used,
-	.v_bit = cpu_to_le64(STRTAB_STE_0_V),
 };
 
 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
@@ -1502,6 +1507,7 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
 	}
 }
 
+VISIBLE_IF_KUNIT
 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
 {
 	memset(target, 0, sizeof(*target));
@@ -1510,6 +1516,7 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
 }
 
+VISIBLE_IF_KUNIT
 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
 			      struct arm_smmu_ste *target)
 {
@@ -1523,6 +1530,7 @@ void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
 							 STRTAB_STE_1_SHCFG_INCOMING));
 }
 
+VISIBLE_IF_KUNIT
 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
 			       struct arm_smmu_master *master)
 {
@@ -1573,6 +1581,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
 	}
 }
 
+VISIBLE_IF_KUNIT
 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
 				 struct arm_smmu_master *master,
 				 struct arm_smmu_domain *smmu_domain)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 0455498d24c730..1242a086c9f948 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -275,8 +275,7 @@ struct arm_smmu_ste {
  * 2lvl: at most 1024 L1 entries,
  *       1024 lazy entries per table.
  */
-#define CTXDESC_SPLIT			10
-#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
+#define CTXDESC_L2_ENTRIES		1024
 
 #define CTXDESC_L1_DESC_DWORDS		1
 #define CTXDESC_L1_DESC_V		(1UL << 0)
@@ -745,16 +744,15 @@ struct arm_smmu_entry_writer {
 };
 
 struct arm_smmu_entry_writer_ops {
-	__le64 v_bit;
 	void (*get_used)(const __le64 *entry, __le64 *used);
 	void (*sync)(struct arm_smmu_entry_writer *writer);
 };
 
+#if IS_ENABLED(CONFIG_KUNIT)
 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits);
-void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits);
 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur,
 			  const __le64 *target);
-
+void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits);
 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target);
 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
 			      struct arm_smmu_ste *target);
@@ -766,6 +764,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
 void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
 			  struct arm_smmu_master *master, struct mm_struct *mm,
 			  u16 asid);
+#endif
 
 static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {



More information about the linux-arm-kernel mailing list