[EXT] [PATCH net-next V3 3/3] net: axienet: Introduce dmaengine support

Subbaraya Sundeep Bhatta sbhatta at marvell.com
Wed May 10 06:44:09 PDT 2023


Hi,

>-----Original Message-----
>From: Sarath Babu Naidu Gaddam <sarath.babu.naidu.gaddam at amd.com>
>Sent: Wednesday, May 10, 2023 2:21 PM
>To: davem at davemloft.net; edumazet at google.com; kuba at kernel.org;
>pabeni at redhat.com; robh+dt at kernel.org; krzysztof.kozlowski+dt at linaro.org
>Cc: linux at armlinux.org.uk; michal.simek at amd.com;
>radhey.shyam.pandey at amd.com; netdev at vger.kernel.org;
>devicetree at vger.kernel.org; linux-arm-kernel at lists.infradead.org; linux-
>kernel at vger.kernel.org; anirudha.sarangi at amd.com;
>harini.katakam at amd.com; sarath.babu.naidu.gaddam at amd.com;
>git at amd.com
>Subject: [PATCH net-next V3 3/3] net: axienet: Introduce dmaengine
>support
>
>Add dmaengine framework to communicate with the xilinx DMAengine
>driver(AXIDMA).
>
>Axi ethernet driver uses separate channels for transmit and receive.
>Add support for these channels to handle TX and RX with skb and
>appropriate callbacks. Also add axi ethernet core interrupt for
>dmaengine framework support.
>
>The dmaengine framework was extended for metadata API support during the
>axidma RFC[1] discussion. However it still needs further enhancements to
>make it well suited for ethernet usecases. The ethernet features i.e
>ethtool set/get of DMA IP properties, ndo_poll_controller, trigger
>reset of DMA IP from ethernet are not supported (mentioned in TODO)
>and it requires follow-up discussion and dma framework enhancement.
>
>[1]: https://urldefense.proofpoint.com/v2/url?u=https-
>3A__lore.kernel.org_lkml_1522665546-2D10035-2D1-2Dgit-2Dsend-2Demail-
>2D&d=DwIDAg&c=nKjWec2b6R0mOyPaz7xtfQ&r=wYboOaw70DU5hRM5HDwOR
>Jx_MfD-hXXKii2eobNikgU&m=qvFFGXjULUP3IPFOil5aKySdkumrp8V0TYK4kQ-
>yzsWPmRolFaQvfKMhaR11_dZv&s=brEWb1Til18VCodb-H4tST0HOBXKIJtL-
>2ztGmMZz_8&e=
>radheys at xilinx.com
>
>Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey at xilinx.com>
>Signed-off-by: Sarath Babu Naidu Gaddam
><sarath.babu.naidu.gaddam at amd.com>
>---
>Performance numbers(Mbps):
>
>              | TCP | UDP |
>         -----------------
>         | Tx | 920 | 800 |
>         -----------------
>         | Rx | 620 | 910 |
>
>Changes in V3:
>1) New patch for dmaengine framework support.
>---
> drivers/net/ethernet/xilinx/xilinx_axienet.h  |   6 +
> .../net/ethernet/xilinx/xilinx_axienet_main.c | 331 +++++++++++++++++-
> 2 files changed, 335 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h
>b/drivers/net/ethernet/xilinx/xilinx_axienet.h
>index 10917d997d27..fbe00c5390d5 100644
>--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
>+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
>@@ -436,6 +436,9 @@ struct axidma_bd {
>  * @coalesce_count_tx:	Store the irq coalesce on TX side.
>  * @coalesce_usec_tx:	IRQ coalesce delay for TX
>  * @has_dmas:	flag to check dmaengine framework usage.
>+ * @tx_chan:	TX DMA channel.
>+ * @rx_chan:	RX DMA channel.
>+ * @skb_cache:	Custom skb slab allocator
>  */
> struct axienet_local {
> 	struct net_device *ndev;
>@@ -501,6 +504,9 @@ struct axienet_local {
> 	u32 coalesce_count_tx;
> 	u32 coalesce_usec_tx;
> 	u8  has_dmas;
>+	struct dma_chan *tx_chan;
>+	struct dma_chan *rx_chan;
>+	struct kmem_cache *skb_cache;
> };
>
> /**
>diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>index 8678fc09245a..662c77ff0e99 100644
>--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>@@ -37,6 +37,9 @@
> #include <linux/phy.h>
> #include <linux/mii.h>
> #include <linux/ethtool.h>
>+#include <linux/dmaengine.h>
>+#include <linux/dma-mapping.h>
>+#include <linux/dma/xilinx_dma.h>
>
> #include "xilinx_axienet.h"
>
>@@ -46,6 +49,9 @@
> #define TX_BD_NUM_MIN			(MAX_SKB_FRAGS + 1)
> #define TX_BD_NUM_MAX			4096
> #define RX_BD_NUM_MAX			4096
>+#define DMA_NUM_APP_WORDS		5
>+#define LEN_APP				4
>+#define RX_BUF_NUM_DEFAULT		128
>
> /* Must be shorter than length of ethtool_drvinfo.driver field to fit */
> #define DRIVER_NAME		"xaxienet"
>@@ -56,6 +62,16 @@
>
> #define AXIENET_USE_DMA(lp) ((lp)->has_dmas)
>
>+struct axi_skbuff {
>+	struct scatterlist sgl[MAX_SKB_FRAGS + 1];
>+	struct dma_async_tx_descriptor *desc;
>+	dma_addr_t dma_address;
>+	struct sk_buff *skb;
>+	int sg_len;
>+} __packed;
>+
>+static int axienet_rx_submit_desc(struct net_device *ndev);
>+
> /* Match table for of_platform binding */
> static const struct of_device_id axienet_of_match[] = {
> 	{ .compatible = "xlnx,axi-ethernet-1.00.a", },
>@@ -728,6 +744,108 @@ static inline int axienet_check_tx_bd_space(struct
>axienet_local *lp,
> 	return 0;
> }
>
>+/**
>+ * axienet_dma_tx_cb - DMA engine callback for TX channel.
>+ * @data:       Pointer to the axi_skbuff structure
>+ * @result:     error reporting through dmaengine_result.
>+ * This function is called by dmaengine driver for TX channel to notify
>+ * that the transmit is done.
>+ */
>+static void axienet_dma_tx_cb(void *data, const struct dmaengine_result
>*result)
>+{
>+	struct axi_skbuff *axi_skb = data;
>+
>+	struct net_device *netdev = axi_skb->skb->dev;
>+	struct axienet_local *lp = netdev_priv(netdev);
>+
>+	u64_stats_update_begin(&lp->tx_stat_sync);
>+	u64_stats_add(&lp->tx_bytes, axi_skb->skb->len);
>+	u64_stats_add(&lp->tx_packets, 1);
>+	u64_stats_update_end(&lp->tx_stat_sync);
>+
>+	dma_unmap_sg(lp->dev, axi_skb->sgl, axi_skb->sg_len,
>DMA_MEM_TO_DEV);
>+	dev_kfree_skb_any(axi_skb->skb);
>+	kmem_cache_free(lp->skb_cache, axi_skb);
>+}
>+
>+/**
>+ * axienet_start_xmit_dmaengine - Starts the transmission.
>+ * @skb:        sk_buff pointer that contains data to be Txed.
>+ * @ndev:       Pointer to net_device structure.
>+ *
>+ * Return: NETDEV_TX_OK, on success
>+ *          NETDEV_TX_BUSY, if any memory failure or SG error.
>+ *
>+ * This function is invoked from xmit to initiate transmission. The
>+ * function sets the skbs , call back API, SG etc.
>+ * Additionally if checksum offloading is supported,
>+ * it populates AXI Stream Control fields with appropriate values.
>+ */
>+static netdev_tx_t
>+axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev)
>+{
>+	struct dma_async_tx_descriptor *dma_tx_desc = NULL;
>+	struct axienet_local *lp = netdev_priv(ndev);
>+	u32 app[DMA_NUM_APP_WORDS] = {0};
>+	struct axi_skbuff *axi_skb;
>+	u32 csum_start_off;
>+	u32 csum_index_off;
>+	int sg_len;
>+	int ret;
>+
>+	sg_len = skb_shinfo(skb)->nr_frags + 1;
>+	axi_skb = kmem_cache_zalloc(lp->skb_cache, GFP_KERNEL);
>+	if (!axi_skb)
>+		return NETDEV_TX_BUSY;
>+
>+	sg_init_table(axi_skb->sgl, sg_len);
>+	ret = skb_to_sgvec(skb, axi_skb->sgl, 0, skb->len);
>+	if (unlikely(ret < 0))
>+		goto xmit_error_skb_sgvec;
>+
>+	ret = dma_map_sg(lp->dev, axi_skb->sgl, sg_len, DMA_TO_DEVICE);
>+	if (ret == 0)
>+		goto xmit_error_skb_sgvec;
>+
>+	/*Fill up app fields for checksum */
>+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
>+		if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
>+			/* Tx Full Checksum Offload Enabled */
>+			app[0] |= 2;
>+		} else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) {
>+			csum_start_off = skb_transport_offset(skb);
>+			csum_index_off = csum_start_off + skb->csum_offset;
>+			/* Tx Partial Checksum Offload Enabled */
>+			app[0] |= 1;
>+			app[1] = (csum_start_off << 16) | csum_index_off;
>+		}
>+	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
>+		app[0] |= 2; /* Tx Full Checksum Offload Enabled */
>+	}
>+
>+	dma_tx_desc = lp->tx_chan->device->device_prep_slave_sg(lp->tx_chan,
>axi_skb->sgl,
>+			sg_len, DMA_MEM_TO_DEV,
>+			DMA_PREP_INTERRUPT, (void *)app);
>+
>+	if (!dma_tx_desc)
>+		goto xmit_error_prep;
>+
>+	axi_skb->skb = skb;
>+	axi_skb->sg_len = sg_len;
>+	dma_tx_desc->callback_param =  axi_skb;
>+	dma_tx_desc->callback_result = axienet_dma_tx_cb;
>+	dmaengine_submit(dma_tx_desc);
>+	dma_async_issue_pending(lp->tx_chan);
>+
>+	return NETDEV_TX_OK;
>+
>+xmit_error_prep:
>+	dma_unmap_sg(lp->dev, axi_skb->sgl, sg_len, DMA_TO_DEVICE);
>+xmit_error_skb_sgvec:
>+	kmem_cache_free(lp->skb_cache, axi_skb);
>+	return NETDEV_TX_BUSY;
>+}
>+
> /**
>  * axienet_tx_poll - Invoked once a transmit is completed by the
>  * Axi DMA Tx channel.
>@@ -912,7 +1030,42 @@ axienet_start_xmit(struct sk_buff *skb, struct
>net_device *ndev)
> 	if (!AXIENET_USE_DMA(lp))
> 		return axienet_start_xmit_legacy(skb, ndev);
> 	else
>-		return NETDEV_TX_BUSY;
>+		return axienet_start_xmit_dmaengine(skb, ndev);

You can avoid this if else by
 if (AXIENET_USE_DMA(lp))
         return axienet_start_xmit_dmaengine(skb, ndev);

and no need of defining axienet_start_xmit_legacy function in patch 2/3.
_legacy may not be correct since you support both in-built dma or with dma engine just by
turning on/off dt properties. Also does this driver roll back to using in-built dma if DMA_ENGINE
is not compiled in? 

>+}
>+
>+/**
>+ * axienet_dma_rx_cb - DMA engine callback for RX channel.
>+ * @data:       Pointer to the axi_skbuff structure
>+ * @result:     error reporting through dmaengine_result.
>+ * This function is called by dmaengine driver for RX channel to notify
>+ * that the packet is received.
>+ */
>+static void axienet_dma_rx_cb(void *data, const struct dmaengine_result
>*result)
>+{
>+	struct axi_skbuff *axi_skb = data;
>+	struct sk_buff *skb = axi_skb->skb;
>+	struct net_device *netdev = skb->dev;
>+	struct axienet_local *lp = netdev_priv(netdev);
>+	size_t meta_len, meta_max_len, rx_len;
>+	u32 *app;
>+
>+	app  = dmaengine_desc_get_metadata_ptr(axi_skb->desc, &meta_len,
>&meta_max_len);
>+	dma_unmap_single(lp->dev, axi_skb->dma_address, lp->max_frm_size,
>+			 DMA_FROM_DEVICE);
>+	/* TODO: Derive app word index programmatically */
>+	rx_len = (app[LEN_APP] & 0xFFFF);
>+	skb_put(skb, rx_len);
>+	skb->protocol = eth_type_trans(skb, netdev);
>+	skb->ip_summed = CHECKSUM_NONE;
>+
>+	netif_rx(skb);
>+	kmem_cache_free(lp->skb_cache, axi_skb);
>+	u64_stats_update_begin(&lp->rx_stat_sync);
>+	u64_stats_add(&lp->rx_packets, 1);
>+	u64_stats_add(&lp->rx_bytes, rx_len);
>+	u64_stats_update_end(&lp->rx_stat_sync);
>+	axienet_rx_submit_desc(netdev);
>+	dma_async_issue_pending(lp->rx_chan);
> }
>
> /**
>@@ -1148,6 +1301,134 @@ static irqreturn_t axienet_eth_irq(int irq, void
>*_ndev)
>
> static void axienet_dma_err_handler(struct work_struct *work);
>
>+/**
>+ * axienet_rx_submit_desc - Submit the descriptors with required data
>+ * like call backup API, skb buffer.. etc to dmaengine.
>+ *
>+ * @ndev:	net_device pointer
>+ *
>+ *Return: 0, on success.
>+ *          non-zero error value on failure
>+ */
>+static int axienet_rx_submit_desc(struct net_device *ndev)
>+{
>+	struct dma_async_tx_descriptor *dma_rx_desc = NULL;
>+	struct axienet_local *lp = netdev_priv(ndev);
>+	struct axi_skbuff *axi_skb;
>+	struct sk_buff *skb;
>+	dma_addr_t addr;
>+	int ret;
>+
>+	axi_skb = kmem_cache_alloc(lp->skb_cache, GFP_KERNEL);
>+
>+	if (!axi_skb)
>+		return -ENOMEM;
>+	skb = netdev_alloc_skb(ndev, lp->max_frm_size);
>+	if (!skb) {
>+		ret = -ENOMEM;
>+		goto rx_bd_init_skb;
>+	}
>+
>+	sg_init_table(axi_skb->sgl, 1);
>+	addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size,
>DMA_FROM_DEVICE);
>+	sg_dma_address(axi_skb->sgl) = addr;
>+	sg_dma_len(axi_skb->sgl) = lp->max_frm_size;
>+	dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, axi_skb->sgl,
>+					      1, DMA_DEV_TO_MEM,
>+					      DMA_PREP_INTERRUPT);
>+	if (!dma_rx_desc) {
>+		ret = -EINVAL;
>+		goto rx_bd_init_prep_sg;
>+	}
>+
>+	axi_skb->skb = skb;
>+	axi_skb->dma_address = sg_dma_address(axi_skb->sgl);
>+	axi_skb->desc = dma_rx_desc;
>+	dma_rx_desc->callback_param =  axi_skb;
>+	dma_rx_desc->callback_result = axienet_dma_rx_cb;
>+	dmaengine_submit(dma_rx_desc);
>+
>+	return 0;
>+
>+rx_bd_init_prep_sg:
>+	dma_unmap_single(lp->dev, addr, lp->max_frm_size,
>DMA_FROM_DEVICE);
>+	dev_kfree_skb(skb);
>+rx_bd_init_skb:
>+	kmem_cache_free(lp->skb_cache, axi_skb);
>+	return ret;
>+}
>+
>+/**
>+ * axienet_setup_dma_chan - request the dma channels.
>+ * @ndev:       Pointer to net_device structure
>+ *
>+ * Return: 0, on success.
>+ *          non-zero error value on failure
>+ *
>+ * This function requests the TX and RX channels. It also submits the
>+ * allocated skb buffers and call back APIs to dmaengine.
>+ *
>+ */
>+static int axienet_setup_dma_chan(struct net_device *ndev)
>+{
>+	struct axienet_local *lp = netdev_priv(ndev);
>+	int i, ret;
>+
>+	lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
>+	if (IS_ERR(lp->tx_chan)) {
>+		ret = PTR_ERR(lp->tx_chan);
>+		if (ret != -EPROBE_DEFER)
>+			netdev_err(ndev, "No Ethernet DMA (TX) channel
>found\n");
>+		return ret;
>+	}
>+
>+	lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0");
>+	if (IS_ERR(lp->rx_chan)) {
>+		ret = PTR_ERR(lp->rx_chan);
>+		if (ret != -EPROBE_DEFER)
>+			netdev_err(ndev, "No Ethernet DMA (RX) channel
>found\n");
>+		goto err_dma_request_rx;
>+	}
>+	lp->skb_cache = kmem_cache_create("ethernet", sizeof(struct
>axi_skbuff),
>+					  0, 0, NULL);

I do not see kmem_cache_destroy?

Thanks,
Sundeep

>+	if (!lp->skb_cache) {
>+		ret =  -ENOMEM;
>+		goto err_kmem;
>+	}
>+	/* TODO: Instead of BD_NUM_DEFAULT use runtime support*/
>+	for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
>+		axienet_rx_submit_desc(ndev);
>+	dma_async_issue_pending(lp->rx_chan);
>+
>+	return 0;
>+err_kmem:
>+	dma_release_channel(lp->rx_chan);
>+err_dma_request_rx:
>+	dma_release_channel(lp->tx_chan);
>+	return ret;
>+}
>+
>+/**
>+ * axienet_init_dmaengine - init the dmaengine code.
>+ * @ndev:       Pointer to net_device structure
>+ *
>+ * Return: 0, on success.
>+ *          non-zero error value on failure
>+ *
>+ * This is the dmaengine initialization code.
>+ */
>+static inline int axienet_init_dmaengine(struct net_device *ndev)
>+{
>+	int ret;
>+
>+	ret = axienet_setup_dma_chan(ndev);
>+
>+	if (ret < 0)
>+		return ret;
>+
>+	return 0;
>+}
>+
> /**
>  * axienet_init_legacy_dma - init the dma legacy code.
>  * @ndev:       Pointer to net_device structure
>@@ -1239,7 +1520,20 @@ static int axienet_open(struct net_device *ndev)
>
> 	phylink_start(lp->phylink);
>
>-	if (!AXIENET_USE_DMA(lp)) {
>+	if (AXIENET_USE_DMA(lp)) {
>+		ret = axienet_init_dmaengine(ndev);
>+		if (ret < 0)
>+			goto error_code;
>+
>+		/* Enable interrupts for Axi Ethernet core (if defined) */
>+		if (lp->eth_irq > 0) {
>+			ret = request_irq(lp->eth_irq, axienet_eth_irq,
>IRQF_SHARED,
>+					  ndev->name, ndev);
>+			if (ret)
>+				goto error_code;
>+		}
>+
>+	} else {
> 		ret = axienet_init_legacy_dma(ndev);
> 		if (ret)
> 			goto error_code;
>@@ -1287,6 +1581,12 @@ static int axienet_stop(struct net_device *ndev)
> 		free_irq(lp->tx_irq, ndev);
> 		free_irq(lp->rx_irq, ndev);
> 		axienet_dma_bd_release(ndev);
>+	} else {
>+		dmaengine_terminate_all(lp->tx_chan);
>+		dmaengine_terminate_all(lp->rx_chan);
>+
>+		dma_release_channel(lp->rx_chan);
>+		dma_release_channel(lp->tx_chan);
> 	}
>
> 	axienet_iow(lp, XAE_IE_OFFSET, 0);
>@@ -2136,6 +2436,33 @@ static int axienet_probe(struct platform_device
>*pdev)
> 		}
> 		netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll);
> 		netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll);
>+	} else {
>+		struct xilinx_vdma_config cfg;
>+		struct dma_chan *tx_chan;
>+
>+		lp->eth_irq = platform_get_irq_optional(pdev, 0);
>+		tx_chan = dma_request_chan(lp->dev, "tx_chan0");
>+
>+		if (IS_ERR(tx_chan)) {
>+			ret = PTR_ERR(tx_chan);
>+			if (ret != -EPROBE_DEFER)
>+				dev_err(&pdev->dev, "No Ethernet DMA (TX)
>channel found\n");
>+			goto cleanup_clk;
>+		}
>+
>+		cfg.reset = 1;
>+		/* As name says VDMA but it has support for DMA channel
>reset*/
>+		ret = xilinx_vdma_channel_set_config(tx_chan, &cfg);
>+
>+		if (ret < 0) {
>+			dev_err(&pdev->dev, "Reset channel failed\n");
>+			dma_release_channel(tx_chan);
>+			goto cleanup_clk;
>+		} else {
>+			lp->has_dmas = 1;
>+		}
>+
>+		dma_release_channel(tx_chan);
> 	}
>
> 	/* Check for Ethernet core IRQ (optional) */
>--
>2.25.1
>




More information about the linux-arm-kernel mailing list