[PATCH v2 2/4] iommu/io-pgtable-arm: Add read_and_clear_dirty() support
Shameer Kolothum
shameerali.kolothum.thodi at huawei.com
Thu Feb 22 01:49:21 PST 2024
.read_and_clear_dirty() IOMMU domain op takes care of reading the dirty
bits (i.e. PTE has both DBM and AP[2] set) and marshalling into a bitmap of
a given page size.
While reading the dirty bits we also clear the PTE AP[2] bit to mark it as
writable-clean depending on read_and_clear_dirty() flags.
Structure it in a way that the IOPTE walker is generic, and so we pass a
function pointer over what to do on a per-PTE basis.
Signed-off-by: Joao Martins <joao.m.martins at oracle.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
---
drivers/iommu/io-pgtable-arm.c | 128 ++++++++++++++++++++++++++++++++-
1 file changed, 127 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f7828a7aad41..1ce7b7a3c1e8 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -75,6 +75,7 @@
#define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63)
#define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53)
+#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51)
#define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10)
#define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8)
#define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8)
@@ -84,7 +85,7 @@
#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2)
/* Ignore the contiguous bit for block splitting */
-#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52)
+#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)0xd) << 51)
#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \
ARM_LPAE_PTE_ATTR_HI_MASK)
/* Software bit for solving coherency races */
@@ -93,6 +94,9 @@
/* Stage-1 PTE */
#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6)
#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_RDONLY_BIT 7
+#define ARM_LPAE_PTE_AP_WRITABLE (ARM_LPAE_PTE_AP_RDONLY | \
+ ARM_LPAE_PTE_DBM)
#define ARM_LPAE_PTE_ATTRINDX_SHIFT 2
#define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11)
@@ -729,6 +733,127 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
return iopte_to_paddr(pte, data) | iova;
}
+struct arm_lpae_iopte_read_dirty {
+ unsigned long flags;
+ struct iommu_dirty_bitmap *dirty;
+};
+
+typedef int (*io_pgtable_visit_fn_t)(unsigned long iova, size_t size,
+ arm_lpae_iopte *ptep, void *opaque);
+struct io_pgtable_walk_data {
+ const io_pgtable_visit_fn_t fn;
+ void *opaque;
+ u64 addr;
+ const u64 end;
+};
+
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl);
+
+static int __arm_lpae_read_and_clear_dirty(unsigned long iova, size_t size,
+ arm_lpae_iopte *ptep, void *opaque)
+{
+ struct arm_lpae_iopte_read_dirty *arg = opaque;
+ struct iommu_dirty_bitmap *dirty = arg->dirty;
+ arm_lpae_iopte pte;
+
+ pte = READ_ONCE(*ptep);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ if ((pte & ARM_LPAE_PTE_AP_WRITABLE) == ARM_LPAE_PTE_AP_WRITABLE)
+ return 0;
+
+ iommu_dirty_bitmap_record(dirty, iova, size);
+ if (!(arg->flags & IOMMU_DIRTY_NO_CLEAR))
+ set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)ptep);
+ return 0;
+}
+
+static int io_pgtable_visit(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep, int lvl)
+{
+ struct io_pgtable *iop = &data->iop;
+ arm_lpae_iopte pte = READ_ONCE(*ptep);
+
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ if (iopte_leaf(pte, lvl, iop->fmt)) {
+ size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+ walk_data->fn(walk_data->addr, size, ptep, walk_data->opaque);
+ walk_data->addr += size;
+ return 0;
+ }
+
+ ptep = iopte_deref(pte, data);
+ return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1);
+}
+
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl)
+{
+ u32 idx;
+ int max_entries, ret;
+
+ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+ return -EINVAL;
+
+ if (lvl == data->start_level)
+ max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
+ else
+ max_entries = ARM_LPAE_PTES_PER_TABLE(data);
+
+ for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
+ (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
+ arm_lpae_iopte *pteref = &ptep[idx];
+
+ ret = io_pgtable_visit(data, walk_data, pteref, lvl);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ struct arm_lpae_iopte_read_dirty arg = {
+ .flags = flags, .dirty = dirty,
+ };
+ struct io_pgtable_walk_data walk_data = {
+ .fn = __arm_lpae_read_and_clear_dirty,
+ .opaque = &arg,
+ .addr = iova,
+ .end = iova + size - 1,
+ };
+ arm_lpae_iopte *ptep = data->pgd;
+ int lvl = data->start_level;
+ long iaext = (s64)iova >> cfg->ias;
+
+ if (WARN_ON(!size))
+ return -EINVAL;
+
+ if (WARN_ON(iaext))
+ return -EINVAL;
+
+ if (data->iop.fmt != ARM_64_LPAE_S1)
+ return -EINVAL;
+
+ return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl);
+}
+
static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
{
unsigned long granule, page_sizes;
@@ -807,6 +932,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
.map_pages = arm_lpae_map_pages,
.unmap_pages = arm_lpae_unmap_pages,
.iova_to_phys = arm_lpae_iova_to_phys,
+ .read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
};
return data;
--
2.34.1
More information about the linux-arm-kernel
mailing list