@@ -454,6 +454,14 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
+
/*
* Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
*/
@@ -475,24 +483,29 @@ static unsigned int wrap_index(unsigned int index)
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
*/
-static int find_slots(struct device *dev, size_t alloc_size)
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size)
{
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask);
- unsigned int nslots = nr_slots(alloc_size), stride = 1;
+ unsigned int iotlb_align_mask =
+ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ unsigned int nslots = nr_slots(alloc_size), stride;
unsigned int index, wrap, count = 0, i;
unsigned long flags;
BUG_ON(!nslots);
/*
- * For mappings greater than or equal to a page, we limit the stride
- * (and hence alignment) to a page size.
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one. For allocations of
+ * PAGE_SIZE or larger only look for page aligned allocations.
*/
+ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
- stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
+ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
spin_lock_irqsave(&io_tlb_lock, flags);
if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
@@ -500,6 +513,12 @@ static int find_slots(struct device *dev, size_t alloc_size)
index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
do {
+ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask)) {
+ index = wrap_index(index + 1);
+ continue;
+ }
+
/*
* If we find a slot that indicates we have 'nslots' number of
* contiguous buffers, we allocate the buffers from that slot
@@ -543,6 +562,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
+ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned int index, i;
phys_addr_t tlb_addr;
@@ -558,7 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
- index = find_slots(dev, alloc_size);
+ index = find_slots(dev, orig_addr, alloc_size + offset);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -572,10 +592,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nr_slots(alloc_size); i++)
+ for (i = 0; i < nr_slots(alloc_size + offset); i++)
io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
- tlb_addr = slot_addr(io_tlb_start, index);
+ tlb_addr = slot_addr(io_tlb_start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -590,8 +610,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long flags;
- int i, count, nslots = nr_slots(alloc_size);
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+ int i, count, nslots = nr_slots(alloc_size + offset);
+ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = io_tlb_orig_addr[index];
/*