[PATCH 7/8] swiotlb: respect min_align_mask

Christoph Hellwig hch at lst.de
Thu Feb 4 14:30:34 EST 2021


Respect the min_align_mask in struct device_dma_parameters in swiotlb.

There are two parts to it:
 1) for the lower bits of the alignment inside the io tlb slot, just
    extent the size of the allocation and leave the start of the slot
     empty
 2) for the high bits ensure we find a slot that matches the high bits
    of the alignment to avoid wasting too much memory

Based on an earlier patch from Jianxiong Gao <jxgao at google.com>.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 kernel/dma/swiotlb.c | 49 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 6a2439826a1ba4..ab3192142b9906 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -468,6 +468,18 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 	}
 }
 
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+	unsigned min_align_mask = dma_get_min_align_mask(dev);
+
+	if (!min_align_mask)
+		return 0;
+	return addr & min_align_mask & ((1 << IO_TLB_SHIFT) - 1);
+}
+
 /*
  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
  */
@@ -478,6 +490,16 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
 	return nr_slots(boundary_mask + 1);
 }
 
+static inline bool check_alignment(phys_addr_t orig_addr,
+		dma_addr_t tbl_dma_addr, unsigned int index,
+		unsigned int min_align_mask)
+{
+	if (!min_align_mask)
+		return true;
+	return ((tbl_dma_addr + (index << IO_TLB_SHIFT)) & min_align_mask) ==
+		(orig_addr & min_align_mask);
+}
+
 static unsigned int wrap_index(unsigned int index)
 {
 	if (index >= io_tlb_nslabs)
@@ -489,9 +511,11 @@ static unsigned int wrap_index(unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int find_slots(struct device *dev, size_t alloc_size,
-		dma_addr_t tbl_dma_addr)
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+		size_t alloc_size, dma_addr_t tbl_dma_addr)
 {
+	unsigned int min_align_mask = dma_get_min_align_mask(dev) &
+			~((1 << IO_TLB_SHIFT) - 1);
 	unsigned int max_slots = get_max_slots(dma_get_seg_boundary(dev));
 	unsigned int nslots = nr_slots(alloc_size), stride = 1;
 	unsigned int index, wrap, count = 0, i;
@@ -503,7 +527,9 @@ static int find_slots(struct device *dev, size_t alloc_size,
 	 * For mappings greater than or equal to a page, we limit the stride
 	 * (and hence alignment) to a page size.
 	 */
-	if (alloc_size >= PAGE_SIZE)
+	if (min_align_mask)
+		stride = (min_align_mask + 1) >> IO_TLB_SHIFT;
+	else if (alloc_size >= PAGE_SIZE)
 		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
 
 	spin_lock_irqsave(&io_tlb_lock, flags);
@@ -512,6 +538,12 @@ static int find_slots(struct device *dev, size_t alloc_size,
 
 	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
 	do {
+		if (!check_alignment(orig_addr, tbl_dma_addr, index,
+				     min_align_mask)) {
+			index = wrap_index(index + 1);
+			continue;
+		}
+
 		/*
 		 * If we find a slot that indicates we have 'nslots' number of
 		 * contiguous buffers, we allocate the buffers from that slot
@@ -557,6 +589,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 {
 	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, io_tlb_start) &
 			dma_get_seg_boundary(dev);
+	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned int index, i;
 	phys_addr_t tlb_addr;
 
@@ -572,7 +605,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
-	index = find_slots(dev, alloc_size, tbl_dma_addr);
+	alloc_size += offset;
+	index = find_slots(dev, orig_addr, alloc_size, tbl_dma_addr);
 	if (index == -1) {
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
@@ -589,7 +623,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	for (i = 0; i < nr_slots(alloc_size); i++)
 		io_tlb_orig_addr[index + i] = orig_addr + (i << IO_TLB_SHIFT);
 
-	tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+	tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT) + offset;
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -604,8 +638,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long flags;
-	int i, count, nslots = nr_slots(alloc_size);
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+	int i, count, nslots = nr_slots(alloc_size + offset);
+	int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	/*
-- 
2.29.2




More information about the Linux-nvme mailing list