[PATCH 5/5] iommu/io-pgtable: Add ARMv7 short descriptor support
Yong Wu
yong.wu at mediatek.com
Tue Dec 8 00:58:11 PST 2015
Hi Robin,
Thanks very much for your rewriting. It looks more pretty.
This works well in my selftest. and I will push to our chrome branch
and request our video/display to test more.
Only a little comment below.
On Fri, 2015-12-04 at 17:53 +0000, Robin Murphy wrote:
> Add a nearly-complete ARMv7 short descriptor implementation, omitting
> only a few legacy and CPU-centric aspects which shouldn't be necessary
> for IOMMU API use anyway.
>
> Signed-off-by: Yong Wu <yong.wu at mediatek.com>
> Signed-off-by: Robin Murphy <robin.murphy at arm.com>
> ---
[...]
> +/* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
> +#define ARM_V7S_PTE_TYPE_TABLE 0x1
> +#define ARM_V7S_PTE_TYPE_PAGE 0x2
> +#define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1
>From the spec, This is Large page, Do we need add a comment for
readable?
/* Large page */
and add /* Supersection */ for CONT_SECTION too.
> +
> +#define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0)
> +#define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl < 2 && ((pte) & ARM_V7S_PTE_TYPE_TABLE))
> +
> +/* Page table bits */
> +#define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl)))
> +#define ARM_V7S_ATTR_B BIT(2)
> +#define ARM_V7S_ATTR_C BIT(3)
> +#define ARM_V7S_ATTR_NS_TABLE BIT(3)
> +#define ARM_V7S_ATTR_NS_SECTION BIT(19)
> +
> +#define ARM_V7S_CONT_SECTION BIT(18)
> +#define ARM_V7S_CONT_PAGE_XN_SHIFT 15
> +
[...]
> +static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
> + struct arm_v7s_io_pgtable *data)
> +{
> + struct device *dev = data->iop.cfg.iommu_dev;
> + dma_addr_t dma;
> + size_t size = ARM_V7S_TABLE_SIZE(lvl);
> + void *table = NULL;
> +
> + if (lvl == 1)
> + table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
> + else if (lvl == 2)
> + table = kmem_cache_zalloc(data->l2_tables, gfp);
> + if (table && !selftest_running) {
> + dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
> + if (dma_mapping_error(dev, dma))
> + goto out_free;
> + /*
> + * We depend on the IOMMU being able to work with any physical
> + * address directly, so if the DMA layer suggests otherwise by
> + * translating or truncating them, that bodes very badly...
> + */
> + if (dma != virt_to_phys(table))
> + goto out_unmap;
> + }
There is some special while we use kmem_cache, we save the physical
address into the pagetable, then get the virtual address via
phys_to_virt, then free it.
It isn't same with the normal case that saving the va and free the va.
Do we need add kmemleak_ignore here?
> + return table;
> +
> +out_unmap:
> + dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
> + dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
> +out_free:
> + if (lvl == 1)
> + free_pages((unsigned long)table, get_order(size));
> + else
> + kmem_cache_free(data->l2_tables, table);
> + return NULL;
> +}
> +
[...]
> +static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
> + unsigned long iova, phys_addr_t paddr, int prot,
> + int lvl, int num_entries, arm_v7s_iopte *ptep)
> +{
> + struct io_pgtable_cfg *cfg = &data->iop.cfg;
> + arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
> + int i;
> +
> + for (i = 0; i < num_entries; i++)
> + if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
> + /*
> + * We need to unmap and free the old table before
> + * overwriting it with a block entry.
> + */
> + arm_v7s_iopte *tblp;
> + size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
> +
> + tblp = ptep + i - ARM_V7S_LVL_IDX(iova, lvl);
> + if (WARN_ON(__arm_v7s_unmap(data, iova, sz, lvl, tblp)
> + != sz))
> + return -EINVAL;
Here it may come from Will's "iommu/io-pgtable-arm: Unmap and free table
when overwriting with block".
But if we have IO_PGTABLE_QUIRK_TLBI_ON_MAP, the condition(1) in that
comment don't exist. So we don't need take care whether the exist one
is a pgtable or not, we could always return -EEXIST here?
> + } else if (ptep[i]) {
> + /* We require an unmap first */
> + WARN_ON(!selftest_running);
> + return -EEXIST;
> + }
> +
[...]
> +static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
> + unsigned long iova, size_t size, int lvl,
> + arm_v7s_iopte *ptep)
> +{
> + arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
> + struct io_pgtable_cfg *cfg = &data->iop.cfg;
> + const struct iommu_gather_ops *tlb = cfg->tlb;
> + void *cookie = data->iop.cookie;
> + int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
> +
> + /* Something went horribly wrong and we ran out of page table */
> + if (WARN_ON(lvl > 2))
> + return 0;
> +
> + idx = ARM_V7S_LVL_IDX(iova, lvl);
> + ptep += idx;
> + do {
> + if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i])))
> + return 0;
> + pte[i] = ptep[i];
> + } while (++i < num_entries);
> +
> + /*
> + * If we've hit a contiguous 'large page' entry at this level, it
> + * needs splitting first, unless we're unmapping the whole lot.
> + */
> + if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl))
> + arm_v7s_split_cont(data, iova, idx, lvl, ptep);
> +
> + /* If the size matches this level, we're in the right place */
> + if (num_entries) {
> + size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);
> +
> + __arm_v7s_set_pte(ptep, 0, num_entries, cfg);
> +
> + for (i = 0; i < num_entries; i++) {
> + if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
> + /* Also flush any partial walks */
> + tlb->tlb_add_flush(iova, blk_size,
> + ARM_V7S_BLOCK_SIZE(2),
> + false, cookie);
> + tlb->tlb_sync(cookie);
MTK iommu driver will get a warning here in my test.
There is a tlb_sync here, and in arm_v7s_unmap, there is another one.
then the flow is:
tlb->tlb_add_flush(xxx)
tlb->tlb_sync()
tlb->tlb_sync()
In MTK tlb_sync, The code is:
static void mtk_iommu_tlb_sync(void *cookie)
{
struct mtk_iommu_data *data = cookie;
int ret;
u32 tmp;
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp,
tmp != 0, 10, 100000);
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
mtk_iommu_tlb_flush_all(cookie);
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
}
In the end of this function, We have to write 0 to clear the CPE status,
then the HW could check the status in the next time.
So if we call tlb_sync twice continually. It will time out in the second
time, then we can see this log:
Partial TLB flush timed out, falling back to full flush
I don't know whether it is our HW special behavior, is this case ok in
the arm-smmu.c?
Is there some suggestion about this?
> + ptep = iopte_deref(pte[i], lvl);
> + __arm_v7s_free_table(ptep, lvl + 1, data);
> + } else {
> + tlb->tlb_add_flush(iova, blk_size, blk_size,
> + true, cookie);
> + }
> + iova += blk_size;
> + }
> + return size;
> + } else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
> + /*
> + * Insert a table at the next level to map the old region,
> + * minus the part we want to unmap
> + */
> + return arm_v7s_split_blk_unmap(data, iova, size, ptep);
> + }
> +
> + /* Keep on walkin' */
> + ptep = iopte_deref(pte[0], lvl);
> + return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
> +}
> +
> +static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
> + size_t size)
> +{
> + size_t unmapped;
> + struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + struct io_pgtable *iop = &data->iop;
> +
> + unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd);
> + if (unmapped)
> + iop->cfg.tlb->tlb_sync(iop->cookie);
> +
> + return unmapped;
> +}
> +
> +static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
> + unsigned long iova)
> +{
> + struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + arm_v7s_iopte *ptep = data->pgd, pte = ARM_V7S_PTE_TYPE_TABLE;
> + int lvl = 0;
> + u32 mask;
> +
> + while (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
> + pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)];
> + ptep = iopte_deref(pte, lvl);
> + }
If we would like it always enter this while.
Could we use do{}while? then we don't need initialize the pte.
And in this file, the valid lvl should be 1 or 2. but here the "lvl" is
initialized to 0. Do we need add a enum for the lvl for more safe and
readable?
> + if (!ARM_V7S_PTE_IS_VALID(pte))
> + return 0;
> +
> + mask = ARM_V7S_LVL_MASK(lvl);
> + if (arm_v7s_pte_is_cont(pte, lvl))
> + mask *= ARM_V7S_CONT_PAGES;
> + return (pte & mask) | (iova & ~mask);
> +}
> +
[...]
More information about the linux-arm-kernel
mailing list