[PATCH 2/3 v2] net/macb: change RX path for GEM

Nicolas Ferre nicolas.ferre at atmel.com
Mon Dec 17 08:01:58 EST 2012


GEM is able to adapt its DMA buffer size, so change
the RX path to take advantage of this possibility and
remove all kind of memcpy in this path.
This modification introduces function pointers for managing
differences between MACB and GEM adapter type.

Signed-off-by: Nicolas Ferre <nicolas.ferre at atmel.com>
---
 drivers/net/ethernet/cadence/macb.c | 308 ++++++++++++++++++++++++++++++------
 drivers/net/ethernet/cadence/macb.h |  13 ++
 2 files changed, 272 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 50f8669..16ec751 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -33,7 +33,6 @@
 #include "macb.h"
 
 #define MACB_RX_BUFFER_SIZE	128
-#define GEM_RX_BUFFER_SIZE	2048
 #define RX_BUFFER_MULTIPLE	64  /* bytes */
 #define RX_RING_SIZE		512 /* must be power of 2 */
 #define RX_RING_BYTES		(sizeof(struct macb_dma_desc) * RX_RING_SIZE)
@@ -527,6 +526,155 @@ static void macb_tx_interrupt(struct macb *bp)
 		netif_wake_queue(bp->dev);
 }
 
+static void gem_rx_refill(struct macb *bp)
+{
+	unsigned int		entry;
+	struct sk_buff		*skb;
+	struct macb_dma_desc	*desc;
+	dma_addr_t		paddr;
+
+	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, RX_RING_SIZE) > 0) {
+		u32 addr, ctrl;
+
+		entry = macb_rx_ring_wrap(bp->rx_prepared_head);
+		desc = &bp->rx_ring[entry];
+
+		/* Make hw descriptor updates visible to CPU */
+		rmb();
+
+		addr = desc->addr;
+		ctrl = desc->ctrl;
+		bp->rx_prepared_head++;
+
+		if ((addr & MACB_BIT(RX_USED)))
+			continue;
+
+		if (bp->rx_skbuff[entry] == NULL) {
+			/* allocate sk_buff for this free entry in ring */
+			skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
+			if (unlikely(skb == NULL)) {
+				netdev_err(bp->dev,
+					   "Unable to allocate sk_buff\n");
+				break;
+			}
+			bp->rx_skbuff[entry] = skb;
+
+			/* now fill corresponding descriptor entry */
+			paddr = dma_map_single(&bp->pdev->dev, skb->data,
+					       bp->rx_buffer_size, DMA_FROM_DEVICE);
+
+			if (entry == RX_RING_SIZE - 1)
+				paddr |= MACB_BIT(RX_WRAP);
+			bp->rx_ring[entry].addr = paddr;
+			bp->rx_ring[entry].ctrl = 0;
+
+			/* properly align Ethernet header */
+			skb_reserve(skb, NET_IP_ALIGN);
+		}
+	}
+
+	/* Make descriptor updates visible to hardware */
+	wmb();
+
+	netdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n",
+		   bp->rx_prepared_head, bp->rx_tail);
+}
+
+/* Mark DMA descriptors from begin up to and not including end as unused */
+static void discard_partial_frame(struct macb *bp, unsigned int begin,
+				  unsigned int end)
+{
+	unsigned int frag;
+
+	for (frag = begin; frag != end; frag++) {
+		struct macb_dma_desc *desc = macb_rx_desc(bp, frag);
+		desc->addr &= ~MACB_BIT(RX_USED);
+	}
+
+	/* Make descriptor updates visible to hardware */
+	wmb();
+
+	/*
+	 * When this happens, the hardware stats registers for
+	 * whatever caused this is updated, so we don't have to record
+	 * anything.
+	 */
+}
+
+static int gem_rx(struct macb *bp, int budget)
+{
+	unsigned int		len;
+	unsigned int		entry;
+	struct sk_buff		*skb;
+	struct macb_dma_desc	*desc;
+	int			count = 0;
+
+	while (count < budget) {
+		u32 addr, ctrl;
+
+		entry = macb_rx_ring_wrap(bp->rx_tail);
+		desc = &bp->rx_ring[entry];
+
+		/* Make hw descriptor updates visible to CPU */
+		rmb();
+
+		addr = desc->addr;
+		ctrl = desc->ctrl;
+
+		if (!(addr & MACB_BIT(RX_USED)))
+			break;
+
+		desc->addr &= ~MACB_BIT(RX_USED);
+		bp->rx_tail++;
+		count++;
+
+		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
+			netdev_err(bp->dev,
+				   "not whole frame pointed by descriptor\n");
+			bp->stats.rx_dropped++;
+			break;
+		}
+		skb = bp->rx_skbuff[entry];
+		if (unlikely(!skb)) {
+			netdev_err(bp->dev,
+				   "inconsistent Rx descriptor chain\n");
+			bp->stats.rx_dropped++;
+			break;
+		}
+		/* now everything is ready for receiving packet */
+		bp->rx_skbuff[entry] = NULL;
+		len = MACB_BFEXT(RX_FRMLEN, ctrl);
+
+		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
+
+		skb_put(skb, len);
+		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
+		dma_unmap_single(&bp->pdev->dev, addr,
+				 len, DMA_FROM_DEVICE);
+
+		skb->protocol = eth_type_trans(skb, bp->dev);
+		skb_checksum_none_assert(skb);
+
+		bp->stats.rx_packets++;
+		bp->stats.rx_bytes += skb->len;
+
+#if defined(DEBUG) && defined(VERBOSE_DEBUG)
+		netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+			    skb->len, skb->csum);
+		print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1,
+			       skb->mac_header, 16, true);
+		print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_ADDRESS, 16, 1,
+			       skb->data, 32, true);
+#endif
+
+		netif_receive_skb(skb);
+	}
+
+	gem_rx_refill(bp);
+
+	return count;
+}
+
 static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			 unsigned int last_frag)
 {
@@ -605,27 +753,6 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	return 0;
 }
 
-/* Mark DMA descriptors from begin up to and not including end as unused */
-static void discard_partial_frame(struct macb *bp, unsigned int begin,
-				  unsigned int end)
-{
-	unsigned int frag;
-
-	for (frag = begin; frag != end; frag++) {
-		struct macb_dma_desc *desc = macb_rx_desc(bp, frag);
-		desc->addr &= ~MACB_BIT(RX_USED);
-	}
-
-	/* Make descriptor updates visible to hardware */
-	wmb();
-
-	/*
-	 * When this happens, the hardware stats registers for
-	 * whatever caused this is updated, so we don't have to record
-	 * anything.
-	 */
-}
-
 static int macb_rx(struct macb *bp, int budget)
 {
 	int received = 0;
@@ -686,7 +813,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
 		   (unsigned long)status, budget);
 
-	work_done = macb_rx(bp, budget);
+	work_done = bp->macbgem_ops.mog_rx(bp, budget);
 	if (work_done < budget) {
 		napi_complete(napi);
 
@@ -862,29 +989,63 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void macb_init_rx_buffer_size(struct macb *bp)
+static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 {
 	if (!macb_is_gem(bp)) {
 		bp->rx_buffer_size = MACB_RX_BUFFER_SIZE;
 	} else {
-		bp->rx_buffer_size = GEM_RX_BUFFER_SIZE;
+		bp->rx_buffer_size = size;
 
-		if (bp->rx_buffer_size > PAGE_SIZE) {
-			netdev_warn(bp->dev,
-				    "RX buffer cannot be bigger than PAGE_SIZE, shrinking\n");
-			bp->rx_buffer_size = PAGE_SIZE;
-		}
 		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
-			netdev_warn(bp->dev,
-				    "RX buffer must be multiple of %d bytes, shrinking\n",
+			netdev_dbg(bp->dev,
+				    "RX buffer must be multiple of %d bytes, expanding\n",
 				    RX_BUFFER_MULTIPLE);
 			bp->rx_buffer_size =
-				rounddown(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
+				roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
 		}
-		bp->rx_buffer_size = max(RX_BUFFER_MULTIPLE, GEM_RX_BUFFER_SIZE);
 	}
+
+	netdev_dbg(bp->dev, "mtu [%d] rx_buffer_size [%d]\n",
+		   bp->dev->mtu, bp->rx_buffer_size);
 }
 
+static void gem_free_rx_buffers(struct macb *bp)
+{
+	struct sk_buff		*skb;
+	struct macb_dma_desc	*desc;
+	dma_addr_t		addr;
+	int i;
+
+	if (!bp->rx_skbuff)
+		return;
+
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		skb = bp->rx_skbuff[i];
+
+		if (skb == NULL)
+			continue;
+
+		desc = &bp->rx_ring[i];
+		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+		dma_unmap_single(&bp->pdev->dev, addr, skb->len,
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		skb = NULL;
+	}
+
+	kfree(bp->rx_skbuff);
+	bp->rx_skbuff = NULL;
+}
+
+static void macb_free_rx_buffers(struct macb *bp)
+{
+	if (bp->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
+				  RX_RING_SIZE * bp->rx_buffer_size,
+				  bp->rx_buffers, bp->rx_buffers_dma);
+		bp->rx_buffers = NULL;
+	}
+}
 
 static void macb_free_consistent(struct macb *bp)
 {
@@ -892,6 +1053,7 @@ static void macb_free_consistent(struct macb *bp)
 		kfree(bp->tx_skb);
 		bp->tx_skb = NULL;
 	}
+	bp->macbgem_ops.mog_free_rx_buffers(bp);
 	if (bp->rx_ring) {
 		dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES,
 				  bp->rx_ring, bp->rx_ring_dma);
@@ -902,12 +1064,37 @@ static void macb_free_consistent(struct macb *bp)
 				  bp->tx_ring, bp->tx_ring_dma);
 		bp->tx_ring = NULL;
 	}
-	if (bp->rx_buffers) {
-		dma_free_coherent(&bp->pdev->dev,
-				  RX_RING_SIZE * bp->rx_buffer_size,
-				  bp->rx_buffers, bp->rx_buffers_dma);
-		bp->rx_buffers = NULL;
-	}
+}
+
+static int gem_alloc_rx_buffers(struct macb *bp)
+{
+	int size;
+
+	size = RX_RING_SIZE * sizeof(struct sk_buff *);
+	bp->rx_skbuff = kzalloc(size, GFP_KERNEL);
+	if (!bp->rx_skbuff)
+		return -ENOMEM;
+	else
+		netdev_dbg(bp->dev,
+			   "Allocated %d RX struct sk_buff entries at %p\n",
+			   RX_RING_SIZE, bp->rx_skbuff);
+	return 0;
+}
+
+static int macb_alloc_rx_buffers(struct macb *bp)
+{
+	int size;
+
+	size = RX_RING_SIZE * bp->rx_buffer_size;
+	bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
+					    &bp->rx_buffers_dma, GFP_KERNEL);
+	if (!bp->rx_buffers)
+		return -ENOMEM;
+	else
+		netdev_dbg(bp->dev,
+			   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
+			   size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
+	return 0;
 }
 
 static int macb_alloc_consistent(struct macb *bp)
@@ -937,14 +1124,8 @@ static int macb_alloc_consistent(struct macb *bp)
 		   "Allocated TX ring of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)bp->tx_ring_dma, bp->tx_ring);
 
-	size = RX_RING_SIZE * bp->rx_buffer_size;
-	bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
-					    &bp->rx_buffers_dma, GFP_KERNEL);
-	if (!bp->rx_buffers)
+	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
 		goto out_err;
-	netdev_dbg(bp->dev,
-		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
-		   size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
 
 	return 0;
 
@@ -953,6 +1134,21 @@ out_err:
 	return -ENOMEM;
 }
 
+static void gem_init_rings(struct macb *bp)
+{
+	int i;
+
+	for (i = 0; i < TX_RING_SIZE; i++) {
+		bp->tx_ring[i].addr = 0;
+		bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
+	}
+	bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
+
+	bp->rx_tail = bp->rx_prepared_head = bp->tx_head = bp->tx_tail = 0;
+
+	gem_rx_refill(bp);
+}
+
 static void macb_init_rings(struct macb *bp)
 {
 	int i;
@@ -1236,6 +1432,7 @@ EXPORT_SYMBOL_GPL(macb_set_rx_mode);
 static int macb_open(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
+	size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
 	int err;
 
 	netdev_dbg(bp->dev, "open\n");
@@ -1248,7 +1445,7 @@ static int macb_open(struct net_device *dev)
 		return -EAGAIN;
 
 	/* RX buffers initialization */
-	macb_init_rx_buffer_size(bp);
+	macb_init_rx_buffer_size(bp, bufsz);
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
@@ -1259,7 +1456,7 @@ static int macb_open(struct net_device *dev)
 
 	napi_enable(&bp->napi);
 
-	macb_init_rings(bp);
+	bp->macbgem_ops.mog_init_rings(bp);
 	macb_init_hw(bp);
 
 	/* schedule a link state check */
@@ -1611,6 +1808,19 @@ static int __init macb_probe(struct platform_device *pdev)
 
 	dev->base_addr = regs->start;
 
+	/* setup appropriated routines according to adapter type */
+	if (macb_is_gem(bp)) {
+		bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers;
+		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
+		bp->macbgem_ops.mog_init_rings = gem_init_rings;
+		bp->macbgem_ops.mog_rx = gem_rx;
+	} else {
+		bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
+		bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
+		bp->macbgem_ops.mog_init_rings = macb_init_rings;
+		bp->macbgem_ops.mog_rx = macb_rx;
+	}
+
 	/* Set MII management clock divider */
 	config = macb_mdc_clk_div(bp);
 	config |= macb_dbw(bp);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2681455..cef146f 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -538,11 +538,22 @@ struct gem_stats {
 	u32	rx_udp_checksum_errors;
 };
 
+struct macb;
+
+struct macb_or_gem_ops {
+	int	(*mog_alloc_rx_buffers)(struct macb *bp);
+	void	(*mog_free_rx_buffers)(struct macb *bp);
+	void	(*mog_init_rings)(struct macb *bp);
+	int	(*mog_rx)(struct macb *bp, int budget);
+};
+
 struct macb {
 	void __iomem		*regs;
 
 	unsigned int		rx_tail;
+	unsigned int		rx_prepared_head;
 	struct macb_dma_desc	*rx_ring;
+	struct sk_buff		**rx_skbuff;
 	void			*rx_buffers;
 	size_t			rx_buffer_size;
 
@@ -567,6 +578,8 @@ struct macb {
 	dma_addr_t		tx_ring_dma;
 	dma_addr_t		rx_buffers_dma;
 
+	struct macb_or_gem_ops	macbgem_ops;
+
 	struct mii_bus		*mii_bus;
 	struct phy_device	*phy_dev;
 	unsigned int 		link;
-- 
1.8.0




More information about the linux-arm-kernel mailing list