[PATCH v5 1/3] ARM: add CPPI 4.1 DMA support

Sat May 15 14:14:53 EDT 2010

Add support for Texas Instuments Communication Port Programming Interface 4.1
(CPPI 4.1) used on OMAP-L1x/DA8xx and AM35x.

At this moment, only the DMA controller and queue manager are supported.
Support for the buffer manager is lacking but these chips don't have it anyway.

Signed-off-by: Sergei Shtylyov <sshtylyov at ru.mvista.com>
Signed-off-by: Sekhar Nori <nsekhar at ti.com>

---
Changes since the previous version:
- moved everything from arch/arm/mach-davinci/ to arch/arm/common/;
- s/CONFIG_CPPI41/CONFIG_TI_CPPI41/, made that option invisible;
- added #include <linux/slab.h> for kzalloc();
- switched alloc_queue() and cppi41_queue_free() to using bit operations;
- replaced 'static' linking_ram[] by local variable in cppi41_queue_mgr_init();
- fixed pr_debug() in cppi41_dma_ctrlr_init() to print the real queue manager #.

 arch/arm/common/Kconfig                |    3 
 arch/arm/common/Makefile               |    1 
 arch/arm/common/cppi41.c               |  759 +++++++++++++++++++++++++++++++++
 arch/arm/include/asm/hardware/cppi41.h |  718 +++++++++++++++++++++++++++++++
 4 files changed, 1481 insertions(+)

Index: linux-davinci/arch/arm/common/Kconfig
===================================================================

--- linux-davinci.orig/arch/arm/common/Kconfig
+++ linux-davinci/arch/arm/common/Kconfig
@@ -38,5 +38,8 @@ config SHARP_PARAM
 config SHARP_SCOOP
 	bool
 
+config TI_CPPI41
+	bool
+
 config COMMON_CLKDEV
 	bool
Index: linux-davinci/arch/arm/common/Makefile
===================================================================
--- linux-davinci.orig/arch/arm/common/Makefile
+++ linux-davinci/arch/arm/common/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_TIMER_ACORN)	+= time-acorn.
 obj-$(CONFIG_SHARP_LOCOMO)	+= locomo.o
 obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
+obj-$(CONFIG_TI_CPPI41) 	+= cppi41.o
 obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
Index: linux-davinci/arch/arm/common/cppi41.c
===================================================================
--- /dev/null
+++ linux-davinci/arch/arm/common/cppi41.c
@@ -0,0 +1,759 @@
+/*
+ * CPPI 4.1 support
+ *
+ * Copyright (C) 2008-2010 MontaVista Software, Inc. <source at mvista.com>
+ *
+ * Based on the PAL CPPI 4.1 implementation
+ * Copyright (C) 2007, Texas Instruments Inc. http://www.ti.com/
+ *
+ * This file contains the main implementation for CPPI 4.1 common peripherals,
+ * including the DMA Controllers and the Queue Managers.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+
+#undef	DEBUG
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/hardware/cppi41.h>
+
+static unsigned long *allocated_queues[CPPI41_NUM_QUEUE_MGR];
+
+/* First 32 packet descriptors are reserved for unallocated memory regions. */
+static u32 next_desc_index[CPPI41_NUM_QUEUE_MGR] = { 1 << 5 };
+static u8  next_mem_rgn[CPPI41_NUM_QUEUE_MGR];
+
+static struct {
+	size_t rgn_size;
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	struct cppi41_queue_obj queue_obj;
+	u8 mem_rgn;
+} dma_teardown[CPPI41_NUM_DMA_BLOCK];
+
+/******************** CPPI 4.1 Functions (External Interface) *****************/
+
+int __init cppi41_queue_mgr_init(u8 q_mgr, dma_addr_t rgn0_base, u16 rgn0_size)
+{
+	static struct {
+		void *virt_addr;
+		dma_addr_t phys_addr;
+	} linking_ram;
+	void __iomem *q_mgr_regs;
+	void *ptr;
+
+	if (q_mgr >= cppi41_num_queue_mgr)
+		return -EINVAL;
+
+	q_mgr_regs = cppi41_queue_mgr[q_mgr].q_mgr_rgn_base;
+
+	__raw_writel(rgn0_base, q_mgr_regs + QMGR_LINKING_RAM_RGN0_BASE_REG);
+	pr_debug("Linking RAM region 0 base @ %p, value: %x\n",
+		 q_mgr_regs + QMGR_LINKING_RAM_RGN0_BASE_REG,
+		 __raw_readl(q_mgr_regs + QMGR_LINKING_RAM_RGN0_BASE_REG));
+
+	__raw_writel(rgn0_size, q_mgr_regs + QMGR_LINKING_RAM_RGN0_SIZE_REG);
+	pr_debug("Linking RAM region 0 size @ %p, value: %x\n",
+		 q_mgr_regs + QMGR_LINKING_RAM_RGN0_SIZE_REG,
+		 __raw_readl(q_mgr_regs + QMGR_LINKING_RAM_RGN0_SIZE_REG));
+
+	ptr = dma_alloc_coherent(NULL, 0x10000 - rgn0_size * 4,
+				 &linking_ram.phys_addr, GFP_KERNEL | GFP_DMA);
+	if (ptr == NULL) {
+		pr_err("ERROR: %s: Unable to allocate linking RAM.\n",
+		       __func__);
+		return -ENOMEM;
+	}
+	linking_ram.virt_addr = ptr;
+
+	__raw_writel(linking_ram.phys_addr,
+		     q_mgr_regs + QMGR_LINKING_RAM_RGN1_BASE_REG);
+	pr_debug("Linking RAM region 1 base @ %p, value: %x\n",
+		 q_mgr_regs + QMGR_LINKING_RAM_RGN1_BASE_REG,
+		 __raw_readl(q_mgr_regs + QMGR_LINKING_RAM_RGN1_BASE_REG));
+
+	ptr = kzalloc(BITS_TO_LONGS(cppi41_queue_mgr[q_mgr].num_queue),
+		      GFP_KERNEL);
+	if (ptr == NULL) {
+		pr_err("ERROR: %s: Unable to allocate queue bitmap.\n",
+		       __func__);
+		dma_free_coherent(NULL, 0x10000 - rgn0_size * 4,
+				  linking_ram.virt_addr, linking_ram.phys_addr);
+		return -ENOMEM;
+	}
+	allocated_queues[q_mgr] = ptr;
+
+	return 0;
+}
+
+int __init cppi41_dma_ctrlr_init(u8 dma_num, u8 q_mgr, u8 num_order)
+{
+	const struct cppi41_dma_block *dma_block;
+	struct cppi41_teardown_desc *curr_td;
+	unsigned num_desc = 1 << num_order;
+	dma_addr_t td_addr;
+	void *ptr;
+	int error, i;
+	u16 q_num;
+
+	if (dma_num >= cppi41_num_dma_block ||
+	    q_mgr >= cppi41_num_queue_mgr)
+		return -EINVAL;
+
+	error = cppi41_queue_alloc(CPPI41_FREE_DESC_QUEUE |
+				   CPPI41_UNASSIGNED_QUEUE, q_mgr, &q_num);
+	if (error) {
+		pr_err("ERROR: %s: Unable to allocate teardown descriptor "
+		       "queue.\n", __func__);
+		return error;
+	}
+	pr_debug("Teardown descriptor queue %d in queue manager %d allocated\n",
+		 q_num, q_mgr);
+
+	/*
+	 * Tell the hardware about the Teardown descriptor
+	 * queue manager and queue number.
+	 */
+	dma_block = &cppi41_dma_block[dma_num];
+	__raw_writel((q_mgr << DMA_TD_DESC_QMGR_SHIFT) |
+		     (q_num << DMA_TD_DESC_QNUM_SHIFT),
+		     dma_block->global_ctrl_base +
+		     DMA_TEARDOWN_FREE_DESC_CTRL_REG);
+	pr_debug("Teardown free descriptor control @ %p, value: %x\n",
+		 dma_block->global_ctrl_base + DMA_TEARDOWN_FREE_DESC_CTRL_REG,
+		 __raw_readl(dma_block->global_ctrl_base +
+			     DMA_TEARDOWN_FREE_DESC_CTRL_REG));
+
+	dma_teardown[dma_num].rgn_size = num_desc *
+					 sizeof(struct cppi41_teardown_desc);
+
+	/* Pre-allocate teardown descriptors. */
+	ptr = dma_alloc_coherent(NULL, dma_teardown[dma_num].rgn_size,
+				 &dma_teardown[dma_num].phys_addr,
+				 GFP_KERNEL | GFP_DMA);
+	if (ptr == NULL) {
+		pr_err("ERROR: %s: Unable to allocate teardown descriptors.\n",
+		       __func__);
+		error = -ENOMEM;
+		goto free_queue;
+	}
+	dma_teardown[dma_num].virt_addr = ptr;
+
+	error = cppi41_mem_rgn_alloc(q_mgr, dma_teardown[dma_num].phys_addr, 5,
+				     num_order, &dma_teardown[dma_num].mem_rgn);
+	if (error) {
+		pr_err("ERROR: %s: Unable to allocate queue manager memory "
+		       "region for teardown descriptors.\n", __func__);
+		goto free_mem;
+	}
+
+	error = cppi41_queue_init(&dma_teardown[dma_num].queue_obj, 0, q_num);
+	if (error) {
+		pr_err("ERROR: %s: Unable to initialize teardown free "
+		       "descriptor queue.\n", __func__);
+		goto free_rgn;
+	}
+
+	/*
+	 * Push all teardown descriptors to the free teardown queue
+	 * for the CPPI 4.1 system.
+	 */
+	curr_td = dma_teardown[dma_num].virt_addr;
+	td_addr = dma_teardown[dma_num].phys_addr;
+
+	for (i = 0; i < num_desc; i++) {
+		cppi41_queue_push(&dma_teardown[dma_num].queue_obj, td_addr,
+				  sizeof(*curr_td), 0);
+		td_addr += sizeof(*curr_td);
+	}
+
+free_rgn:
+	cppi41_mem_rgn_free(q_mgr, dma_teardown[dma_num].mem_rgn);
+free_mem:
+	dma_free_coherent(NULL, dma_teardown[dma_num].rgn_size,
+			  dma_teardown[dma_num].virt_addr,
+			  dma_teardown[dma_num].phys_addr);
+free_queue:
+	cppi41_queue_free(q_mgr, q_num);
+	return error;
+}
+
+int __init cppi41_dma_sched_init(u8 dma_num, const u8 *sched_tbl, u16 tbl_size)
+{
+	const struct cppi41_dma_block *dma_block;
+	unsigned num_reg;
+	int i, j, k;
+	u32 val;
+
+	if (dma_num >= cppi41_num_dma_block ||
+	    !tbl_size || tbl_size > 0x100 || sched_tbl == NULL)
+		return -EINVAL;
+
+	/* Initialize the DMA scheduler. */
+	dma_block = &cppi41_dma_block[dma_num];
+	num_reg = (tbl_size + 3) / 4;
+	for (k = i = 0; i < num_reg; i++) {
+		for (val = j = 0; j < 4; j++, k++) {
+			val >>= 8;
+			if (k < tbl_size)
+				val |= sched_tbl[k] << 24;
+		}
+
+		__raw_writel(val, dma_block->sched_table_base +
+			     DMA_SCHED_TABLE_WORD_REG(i));
+		pr_debug("DMA scheduler table @ %p, value written: %x\n",
+			 dma_block->sched_table_base +
+			 DMA_SCHED_TABLE_WORD_REG(i), val);
+	}
+
+	__raw_writel((tbl_size - 1) << DMA_SCHED_LAST_ENTRY_SHIFT |
+		     DMA_SCHED_ENABLE_MASK,
+		     dma_block->sched_ctrl_base + DMA_SCHED_CTRL_REG);
+	pr_debug("DMA scheduler control @ %p, value: %x\n",
+		 dma_block->sched_ctrl_base + DMA_SCHED_CTRL_REG,
+		 __raw_readl(dma_block->sched_ctrl_base + DMA_SCHED_CTRL_REG));
+
+	return 0;
+}
+
+/*
+ * cppi41_mem_rgn_alloc - allocate a memory region within the queue manager
+ */
+int cppi41_mem_rgn_alloc(u8 q_mgr, dma_addr_t rgn_addr, u8 size_order,
+			 u8 num_order, u8 *mem_rgn)
+{
+	void __iomem *desc_mem_regs;
+	u32 num_desc = 1 << num_order, index, ctrl;
+	int rgn;
+
+	pr_debug("%s called with rgn_addr = %08x, size_order = %d, "
+		 "num_order = %d\n", __func__, rgn_addr, size_order, num_order);
+
+	if (q_mgr >= cppi41_num_queue_mgr ||
+	    size_order < 5 || size_order > 13 ||
+	    num_order  < 5 || num_order  > 12 ||
+	    (rgn_addr & ((1 << size_order) - 1)))
+		return -EINVAL;
+
+	rgn = next_mem_rgn[q_mgr];
+	index = next_desc_index[q_mgr];
+	if (rgn >= CPPI41_MAX_MEM_RGN || index + num_desc > 0x4000)
+		return -ENOSPC;
+
+	next_mem_rgn[q_mgr] = rgn + 1;
+	next_desc_index[q_mgr] = index + num_desc;
+
+	desc_mem_regs = cppi41_queue_mgr[q_mgr].desc_mem_rgn_base;
+
+	/* Write the base register */
+	__raw_writel(rgn_addr, desc_mem_regs + QMGR_MEM_RGN_BASE_REG(rgn));
+	pr_debug("Descriptor region base @ %p, value: %x\n",
+		 desc_mem_regs + QMGR_MEM_RGN_BASE_REG(rgn),
+		 __raw_readl(desc_mem_regs + QMGR_MEM_RGN_BASE_REG(rgn)));
+
+	/* Write the control register */
+	ctrl = ((index << QMGR_MEM_RGN_INDEX_SHIFT) &
+		QMGR_MEM_RGN_INDEX_MASK) |
+	       (((size_order - 5) << QMGR_MEM_RGN_DESC_SIZE_SHIFT) &
+		QMGR_MEM_RGN_DESC_SIZE_MASK) |
+	       (((num_order - 5) << QMGR_MEM_RGN_SIZE_SHIFT) &
+		QMGR_MEM_RGN_SIZE_MASK);
+	__raw_writel(ctrl, desc_mem_regs + QMGR_MEM_RGN_CTRL_REG(rgn));
+	pr_debug("Descriptor region control @ %p, value: %x\n",
+		 desc_mem_regs + QMGR_MEM_RGN_CTRL_REG(rgn),
+		 __raw_readl(desc_mem_regs + QMGR_MEM_RGN_CTRL_REG(rgn)));
+
+	*mem_rgn = rgn;
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_mem_rgn_alloc);
+
+/*
+ * cppi41_mem_rgn_free - free the memory region within the queue manager
+ */
+int cppi41_mem_rgn_free(u8 q_mgr, u8 mem_rgn)
+{
+	void __iomem *desc_mem_regs;
+
+	pr_debug("%s called.\n", __func__);
+
+	if (q_mgr >= cppi41_num_queue_mgr || mem_rgn >= next_mem_rgn[q_mgr])
+		return -EINVAL;
+
+	desc_mem_regs = cppi41_queue_mgr[q_mgr].desc_mem_rgn_base;
+
+	if (__raw_readl(desc_mem_regs + QMGR_MEM_RGN_BASE_REG(mem_rgn)) == 0)
+		return -ENOENT;
+
+	__raw_writel(0, desc_mem_regs + QMGR_MEM_RGN_BASE_REG(mem_rgn));
+	__raw_writel(0, desc_mem_regs + QMGR_MEM_RGN_CTRL_REG(mem_rgn));
+
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_mem_rgn_free);
+
+/*
+ * cppi41_tx_ch_init - initialize a CPPI 4.1 Tx channel object
+ *
+ * Verify the channel info (range checking, etc.) and store the channel
+ * information within the object structure.
+ */
+int cppi41_tx_ch_init(struct cppi41_dma_ch_obj *tx_ch_obj,
+		      u8 dma_num, u8 ch_num)
+{
+	if (dma_num >= cppi41_num_dma_block ||
+	    ch_num  >= cppi41_dma_block[dma_num].num_tx_ch)
+		return -EINVAL;
+
+	/* Populate the channel object structure */
+	tx_ch_obj->base_addr  = cppi41_dma_block[dma_num].ch_ctrl_stat_base +
+				DMA_CH_TX_GLOBAL_CFG_REG(ch_num);
+	tx_ch_obj->global_cfg = __raw_readl(tx_ch_obj->base_addr);
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_tx_ch_init);
+
+/*
+ * cppi41_rx_ch_init - initialize a CPPI 4.1 Rx channel object
+ *
+ * Verify the channel info (range checking, etc.) and store the channel
+ * information within the object structure.
+ */
+int cppi41_rx_ch_init(struct cppi41_dma_ch_obj *rx_ch_obj,
+		      u8 dma_num, u8 ch_num)
+{
+	if (dma_num >= cppi41_num_dma_block ||
+	    ch_num  >= cppi41_dma_block[dma_num].num_rx_ch)
+		return -EINVAL;
+
+	/* Populate the channel object structure */
+	rx_ch_obj->base_addr  = cppi41_dma_block[dma_num].ch_ctrl_stat_base +
+				DMA_CH_RX_GLOBAL_CFG_REG(ch_num);
+	rx_ch_obj->global_cfg = __raw_readl(rx_ch_obj->base_addr);
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_rx_ch_init);
+
+/*
+ * We have to cache the last written Rx/Tx channel global configration register
+ * value due to its bits other than enable/teardown being write-only. Yet there
+ * is a caveat related to caching the enable bit: this bit may be automatically
+ * cleared as a result of teardown, so we can't trust its cached value!
+ * When modifying the write only register fields, we're making use of the fact
+ * that they read back as zeros, and not clearing them explicitly...
+ */
+
+/*
+ * cppi41_dma_ch_default_queue - set CPPI 4.1 channel default completion queue
+ */
+void cppi41_dma_ch_default_queue(struct cppi41_dma_ch_obj *dma_ch_obj,
+				 u8 q_mgr, u16 q_num)
+{
+	u32 val = dma_ch_obj->global_cfg;
+
+	/* Clear the fields to be modified. */
+	val &= ~(DMA_CH_TX_DEFAULT_QMGR_MASK | DMA_CH_TX_DEFAULT_QNUM_MASK |
+		 DMA_CH_TX_ENABLE_MASK);
+
+	/* Set the default completion queue. */
+	val |= ((q_mgr << DMA_CH_TX_DEFAULT_QMGR_SHIFT) &
+		DMA_CH_TX_DEFAULT_QMGR_MASK) |
+	       ((q_num << DMA_CH_TX_DEFAULT_QNUM_SHIFT) &
+		DMA_CH_TX_DEFAULT_QNUM_MASK);
+
+	/* Get the current state of the enable bit. */
+	dma_ch_obj->global_cfg = val |= __raw_readl(dma_ch_obj->base_addr);
+	__raw_writel(val, dma_ch_obj->base_addr);
+	pr_debug("Channel global configuration @ %p, value written: %x, "
+		 "value read: %x\n", dma_ch_obj->base_addr, val,
+		 __raw_readl(dma_ch_obj->base_addr));
+
+}
+EXPORT_SYMBOL(cppi41_dma_ch_default_queue);
+
+/*
+ * cppi41_rx_ch_configure - configure CPPI 4.1 Rx channel
+ */
+void cppi41_rx_ch_configure(struct cppi41_dma_ch_obj *rx_ch_obj,
+			    struct cppi41_rx_ch_cfg  *cfg)
+{
+	void __iomem *base = rx_ch_obj->base_addr;
+	u32 val = __raw_readl(rx_ch_obj->base_addr);
+
+	val |= ((cfg->sop_offset << DMA_CH_RX_SOP_OFFSET_SHIFT) &
+		DMA_CH_RX_SOP_OFFSET_MASK) |
+	       ((cfg->default_desc_type << DMA_CH_RX_DEFAULT_DESC_TYPE_SHIFT) &
+		DMA_CH_RX_DEFAULT_DESC_TYPE_MASK) |
+	       ((cfg->retry_starved << DMA_CH_RX_ERROR_HANDLING_SHIFT) &
+		DMA_CH_RX_ERROR_HANDLING_MASK) |
+	       ((cfg->rx_queue.q_mgr << DMA_CH_RX_DEFAULT_RQ_QMGR_SHIFT) &
+		DMA_CH_RX_DEFAULT_RQ_QMGR_MASK) |
+	       ((cfg->rx_queue.q_num << DMA_CH_RX_DEFAULT_RQ_QNUM_SHIFT) &
+		DMA_CH_RX_DEFAULT_RQ_QNUM_MASK);
+
+	rx_ch_obj->global_cfg = val;
+	__raw_writel(val, base);
+	pr_debug("Rx channel global configuration @ %p, value written: %x, "
+		 "value read: %x\n", base, val, __raw_readl(base));
+
+	base -= DMA_CH_RX_GLOBAL_CFG_REG(0);
+
+	/*
+	 * Set up the packet configuration register
+	 * based on the descriptor type...
+	 */
+	switch (cfg->default_desc_type) {
+	case DMA_CH_RX_DEFAULT_DESC_EMBED:
+		val = ((cfg->cfg.embed_pkt.fd_queue.q_mgr <<
+			DMA_CH_RX_EMBED_FDQ_QMGR_SHIFT) &
+		       DMA_CH_RX_EMBED_FDQ_QMGR_MASK) |
+		      ((cfg->cfg.embed_pkt.fd_queue.q_num <<
+			DMA_CH_RX_EMBED_FDQ_QNUM_SHIFT) &
+		       DMA_CH_RX_EMBED_FDQ_QNUM_MASK) |
+		      ((cfg->cfg.embed_pkt.num_buf_slot <<
+			DMA_CH_RX_EMBED_NUM_SLOT_SHIFT) &
+		       DMA_CH_RX_EMBED_NUM_SLOT_MASK) |
+		      ((cfg->cfg.embed_pkt.sop_slot_num <<
+			DMA_CH_RX_EMBED_SOP_SLOT_SHIFT) &
+		       DMA_CH_RX_EMBED_SOP_SLOT_MASK);
+
+		__raw_writel(val, base + DMA_CH_RX_EMBED_PKT_CFG_REG_B(0));
+		pr_debug("Rx channel embedded packet configuration B @ %p, "
+			 "value written: %x\n",
+			 base + DMA_CH_RX_EMBED_PKT_CFG_REG_B(0), val);
+
+		val = ((cfg->cfg.embed_pkt.free_buf_pool[0].b_pool <<
+			DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(0)) &
+		       DMA_CH_RX_EMBED_FBP_PNUM_MASK(0)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[0].b_mgr <<
+			DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(0)) &
+		       DMA_CH_RX_EMBED_FBP_BMGR_MASK(0)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[1].b_pool <<
+			DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(1)) &
+		       DMA_CH_RX_EMBED_FBP_PNUM_MASK(1)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[1].b_mgr <<
+			DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(1)) &
+		       DMA_CH_RX_EMBED_FBP_BMGR_MASK(1)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[2].b_pool <<
+			DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(2)) &
+		       DMA_CH_RX_EMBED_FBP_PNUM_MASK(2)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[2].b_mgr <<
+			DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(2)) &
+		       DMA_CH_RX_EMBED_FBP_BMGR_MASK(2)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[3].b_pool <<
+			DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(3)) &
+		       DMA_CH_RX_EMBED_FBP_PNUM_MASK(3)) |
+		      ((cfg->cfg.embed_pkt.free_buf_pool[3].b_mgr <<
+			DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(3)) &
+		       DMA_CH_RX_EMBED_FBP_BMGR_MASK(3));
+
+		__raw_writel(val, base + DMA_CH_RX_EMBED_PKT_CFG_REG_A(0));
+		pr_debug("Rx channel embedded packet configuration A @ %p, "
+			 "value written: %x\n",
+			 base + DMA_CH_RX_EMBED_PKT_CFG_REG_A(0), val);
+		break;
+	case DMA_CH_RX_DEFAULT_DESC_HOST:
+		val = ((cfg->cfg.host_pkt.fdb_queue[0].q_num <<
+			DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(0)) &
+		       DMA_CH_RX_HOST_FDQ_QNUM_MASK(0)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[0].q_mgr <<
+			DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(0)) &
+		       DMA_CH_RX_HOST_FDQ_QMGR_MASK(0)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[1].q_num <<
+			DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(1)) &
+		       DMA_CH_RX_HOST_FDQ_QNUM_MASK(1)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[1].q_mgr <<
+			DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(1)) &
+		       DMA_CH_RX_HOST_FDQ_QMGR_MASK(1));
+
+		__raw_writel(val, base + DMA_CH_RX_HOST_PKT_CFG_REG_A(0));
+		pr_debug("Rx channel host packet configuration A @ %p, "
+			 "value written: %x\n",
+			 base + DMA_CH_RX_HOST_PKT_CFG_REG_A(0), val);
+
+		val = ((cfg->cfg.host_pkt.fdb_queue[2].q_num <<
+			DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(2)) &
+		       DMA_CH_RX_HOST_FDQ_QNUM_MASK(2)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[2].q_mgr <<
+			DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(2)) &
+		       DMA_CH_RX_HOST_FDQ_QMGR_MASK(2)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[3].q_num <<
+		       DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(3)) &
+		       DMA_CH_RX_HOST_FDQ_QNUM_MASK(3)) |
+		      ((cfg->cfg.host_pkt.fdb_queue[3].q_mgr <<
+			DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(3)) &
+		       DMA_CH_RX_HOST_FDQ_QMGR_MASK(3));
+
+		__raw_writel(val, base + DMA_CH_RX_HOST_PKT_CFG_REG_B(0));
+		pr_debug("Rx channel host packet configuration B @ %p, "
+			 "value written: %x\n",
+			 base + DMA_CH_RX_HOST_PKT_CFG_REG_B(0), val);
+		break;
+	case DMA_CH_RX_DEFAULT_DESC_MONO:
+		val = ((cfg->cfg.mono_pkt.fd_queue.q_num <<
+			DMA_CH_RX_MONO_FDQ_QNUM_SHIFT) &
+		       DMA_CH_RX_MONO_FDQ_QNUM_MASK) |
+		      ((cfg->cfg.mono_pkt.fd_queue.q_mgr <<
+			DMA_CH_RX_MONO_FDQ_QMGR_SHIFT) &
+		       DMA_CH_RX_MONO_FDQ_QMGR_MASK) |
+		      ((cfg->cfg.mono_pkt.sop_offset <<
+			DMA_CH_RX_MONO_SOP_OFFSET_SHIFT) &
+		       DMA_CH_RX_MONO_SOP_OFFSET_MASK);
+
+		__raw_writel(val, base + DMA_CH_RX_MONO_PKT_CFG_REG(0));
+		pr_debug("Rx channel monolithic packet configuration @ %p, "
+			 "value written: %x\n",
+			 base + DMA_CH_RX_MONO_PKT_CFG_REG(0), val);
+		break;
+	}
+}
+EXPORT_SYMBOL(cppi41_rx_ch_configure);
+
+/*
+ * cppi41_dma_ch_teardown - teardown a given Tx/Rx channel
+ */
+void cppi41_dma_ch_teardown(struct cppi41_dma_ch_obj *dma_ch_obj)
+{
+	u32 val = __raw_readl(dma_ch_obj->base_addr);
+
+	/* Initiate channel teardown. */
+	val |= dma_ch_obj->global_cfg & ~DMA_CH_TX_ENABLE_MASK;
+	dma_ch_obj->global_cfg = val |= DMA_CH_TX_TEARDOWN_MASK;
+	__raw_writel(val, dma_ch_obj->base_addr);
+	pr_debug("Tear down channel @ %p, value written: %x, value read: %x\n",
+		 dma_ch_obj->base_addr, val,
+		 __raw_readl(dma_ch_obj->base_addr));
+}
+EXPORT_SYMBOL(cppi41_dma_ch_teardown);
+
+/*
+ * cppi41_dma_ch_enable - enable Tx/Rx DMA channel in hardware
+ *
+ * Makes the channel ready for data transmission/reception.
+ */
+void cppi41_dma_ch_enable(struct cppi41_dma_ch_obj *dma_ch_obj)
+{
+	u32 val = dma_ch_obj->global_cfg | DMA_CH_TX_ENABLE_MASK;
+
+	/* Teardown bit remains set after completion, so clear it now... */
+	dma_ch_obj->global_cfg = val &= ~DMA_CH_TX_TEARDOWN_MASK;
+	__raw_writel(val, dma_ch_obj->base_addr);
+	pr_debug("Enable channel @ %p, value written: %x, value read: %x\n",
+		 dma_ch_obj->base_addr, val,
+		 __raw_readl(dma_ch_obj->base_addr));
+}
+EXPORT_SYMBOL(cppi41_dma_ch_enable);
+
+/*
+ * cppi41_dma_ch_disable - disable Tx/Rx DMA channel in hardware
+ */
+void cppi41_dma_ch_disable(struct cppi41_dma_ch_obj *dma_ch_obj)
+{
+	dma_ch_obj->global_cfg &= ~DMA_CH_TX_ENABLE_MASK;
+	__raw_writel(dma_ch_obj->global_cfg, dma_ch_obj->base_addr);
+	pr_debug("Disable channel @ %p, value written: %x, value read: %x\n",
+		 dma_ch_obj->base_addr, dma_ch_obj->global_cfg,
+		 __raw_readl(dma_ch_obj->base_addr));
+}
+EXPORT_SYMBOL(cppi41_dma_ch_disable);
+
+/**
+ * alloc_queue - allocate a queue in the given range
+ * @allocated:	pointer to the bitmap of the allocated queues
+ * @excluded:	pointer to the bitmap of the queues excluded from allocation
+ *		(optional)
+ * @start:	starting queue number
+ * @count:	number of queues available
+ *
+ * Returns queue number on success, -ENOSPC otherwise.
+ */
+static int alloc_queue(unsigned long *allocated, const unsigned long *excluded,
+		       unsigned start, unsigned count)
+{
+	unsigned n, end = start + count;
+
+	do {
+		n = find_next_zero_bit(allocated, end, start);
+		if (n >= end)
+			return -ENOSPC;
+		start = n + 1;
+	} while (test_bit(n, excluded) || test_and_set_bit(n, allocated));
+
+	return n;
+}
+
+/*
+ * cppi41_queue_alloc - allocate a queue of a given type in the queue manager
+ */
+int cppi41_queue_alloc(u8 type, u8 q_mgr, u16 *q_num)
+{
+	int res = -ENOSPC;
+
+	if (q_mgr >= cppi41_num_queue_mgr)
+		return -EINVAL;
+
+	/* Mask out the unsupported queue types */
+	type &= cppi41_queue_mgr[q_mgr].queue_types;
+	/* First see if a free descriptor queue was requested... */
+	if (type & CPPI41_FREE_DESC_QUEUE)
+		res = alloc_queue(allocated_queues[q_mgr], NULL,
+				  cppi41_queue_mgr[q_mgr].base_fdq_num,  16);
+
+	/* Then see if a free descriptor/buffer queue was requested... */
+	if (res < 0 && (type & CPPI41_FREE_DESC_BUF_QUEUE))
+		res = alloc_queue(allocated_queues[q_mgr], NULL,
+				  cppi41_queue_mgr[q_mgr].base_fdbq_num, 16);
+
+	/* Last see if an unassigned queue was requested... */
+	if (res < 0 && (type & CPPI41_UNASSIGNED_QUEUE))
+		res = alloc_queue(allocated_queues[q_mgr],
+				  cppi41_queue_mgr[q_mgr].assigned, 0,
+				  cppi41_queue_mgr[q_mgr].num_queue);
+
+	/* See if any queue was allocated... */
+	if (res < 0)
+		return res;
+
+	/* Return the queue allocated */
+	*q_num = res;
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_queue_alloc);
+
+/*
+ * cppi41_queue_free - free the given queue in the queue manager
+ */
+int cppi41_queue_free(u8 q_mgr, u16 q_num)
+{
+	if (q_mgr >= cppi41_num_queue_mgr ||
+	    q_num >= cppi41_queue_mgr[q_mgr].num_queue ||
+	    !test_and_clear_bit(q_num, allocated_queues[q_mgr]))
+		return -EINVAL;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(cppi41_queue_free);
+
+/*
+ * cppi41_queue_init - initialize a CPPI 4.1 queue object
+ */
+int cppi41_queue_init(struct cppi41_queue_obj *queue_obj, u8 q_mgr, u16 q_num)
+{
+	if (q_mgr >= cppi41_num_queue_mgr ||
+	    q_num >= cppi41_queue_mgr[q_mgr].num_queue)
+		return -EINVAL;
+
+	queue_obj->base_addr = cppi41_queue_mgr[q_mgr].q_mgmt_rgn_base +
+			       QMGR_QUEUE_STATUS_REG_A(q_num);
+
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_queue_init);
+
+/*
+ * cppi41_queue_push - push a descriptor into the given queue
+ */
+void cppi41_queue_push(const struct cppi41_queue_obj *queue_obj, u32 desc_addr,
+		       u32 desc_size, u32 pkt_size)
+{
+	u32 val;
+
+	/*
+	 * Write to the tail of the queue.
+	 * TODO: Can't think of a reason why a queue to head may be required.
+	 * If it is, the API may have to be extended.
+	 */
+#if 0
+	/*
+	 * Also, can't understand why packet size is required to queue up a
+	 * descriptor. The spec says packet size *must* be written prior to
+	 * the packet write operation.
+	 */
+	if (pkt_size)
+		val = (pkt_size << QMGR_QUEUE_PKT_SIZE_SHIFT) &
+		      QMGR_QUEUE_PKT_SIZE_MASK;
+	__raw_writel(val, queue_obj->base_addr + QMGR_QUEUE_REG_C(0));
+#endif
+
+	val = (((desc_size - 24) >> (2 - QMGR_QUEUE_DESC_SIZE_SHIFT)) &
+	       QMGR_QUEUE_DESC_SIZE_MASK) |
+	      (desc_addr & QMGR_QUEUE_DESC_PTR_MASK);
+
+	pr_debug("Pushing value %x to queue @ %p\n", val, queue_obj->base_addr);
+
+	__raw_writel(val, queue_obj->base_addr + QMGR_QUEUE_REG_D(0));
+}
+EXPORT_SYMBOL(cppi41_queue_push);
+
+/*
+ * cppi41_queue_pop - pop a descriptor from a given queue
+ */
+unsigned long cppi41_queue_pop(const struct cppi41_queue_obj *queue_obj)
+{
+	u32 val = __raw_readl(queue_obj->base_addr + QMGR_QUEUE_REG_D(0));
+
+	pr_debug("Popping value %x from queue @ %p\n",
+		 val, queue_obj->base_addr);
+
+	return val & QMGR_QUEUE_DESC_PTR_MASK;
+}
+EXPORT_SYMBOL(cppi41_queue_pop);
+
+/*
+ * cppi41_get_teardown_info - extract information from a teardown descriptor
+ */
+int cppi41_get_teardown_info(unsigned long addr, u32 *info)
+{
+	struct cppi41_teardown_desc *desc;
+	int dma_num;
+
+	for (dma_num = 0; dma_num < cppi41_num_dma_block; dma_num++)
+		if (addr >= dma_teardown[dma_num].phys_addr &&
+		    addr <  dma_teardown[dma_num].phys_addr +
+			    dma_teardown[dma_num].rgn_size)
+			break;
+
+	if (dma_num == cppi41_num_dma_block)
+		return -EINVAL;
+
+	desc = addr - dma_teardown[dma_num].phys_addr +
+	       dma_teardown[dma_num].virt_addr;
+
+	if ((desc->teardown_info & CPPI41_DESC_TYPE_MASK) !=
+	    (CPPI41_DESC_TYPE_TEARDOWN << CPPI41_DESC_TYPE_SHIFT))
+		return -EINVAL;
+
+	*info = desc->teardown_info;
+#if 1
+	/* Hardware is not giving the current DMA number as of now. :-/ */
+	*info |= (dma_num << CPPI41_TEARDOWN_DMA_NUM_SHIFT) &
+		 CPPI41_TEARDOWN_DMA_NUM_MASK;
+#else
+	dma_num = (desc->teardown_info & CPPI41_TEARDOWN_DMA_NUM_MASK) >>
+		 CPPI41_TEARDOWN_DMA_NUM_SHIFT;
+#endif
+
+	cppi41_queue_push(&dma_teardown[dma_num].queue_obj, addr,
+			  sizeof(struct cppi41_teardown_desc), 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(cppi41_get_teardown_info);
Index: linux-davinci/arch/arm/include/asm/hardware/cppi41.h
===================================================================
--- /dev/null
+++ linux-davinci/arch/arm/include/asm/hardware/cppi41.h
@@ -0,0 +1,718 @@
+/*
+ * CPPI 4.1 definitions
+ *
+ * Copyright (c) 2008-2010, MontaVista Software, Inc. <source at mvista.com>
+ *
+ * Based on the PAL CPPI 4.1 implementation
+ * Copyright (C) 2007, Texas Instruments, Inc. http://www.ti.com/
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/types.h>
+
+/*
+ * Queue Manager - Control Registers Region
+ */
+#define QMGR_REVISION_REG		0x00	/* Major and minor versions */
+						/* of the module */
+#define QMGR_QUEUE_DIVERSION_REG	0x08	/* Queue Diversion register */
+#define QMGR_FREE_DESC_BUF_STARVED_REG(n) (0x20 + ((n) << 2)) /* Free Desc./ */
+						/* Buffer Starvation Count */
+#define QMGR_FREE_DESC_STARVED_REG(n)	(0x30 + ((n) << 2)) /* Free Desc. */
+						/* Starvation Count */
+#define QMGR_LINKING_RAM_RGN0_BASE_REG	0x80	/* Linking RAM Region 0 Base */
+						/* Address */
+#define QMGR_LINKING_RAM_RGN0_SIZE_REG	0x84	/* Linking RAM Region 0 Size */
+#define QMGR_LINKING_RAM_RGN1_BASE_REG	0x88	/* Linking RAM Region 1 Base */
+						/* Address */
+#define QMGR_QUEUE_PENDING_REG(n)	(0x90 + ((n) << 2)) /* Pending status */
+						/* for all queues */
+
+/*
+ * Queue Manager - Memory Region Registers
+ */
+#define QMGR_MEM_RGN_BASE_REG(r)	(0x00 + ((r) << 4))
+#define QMGR_MEM_RGN_CTRL_REG(r)	(0x04 + ((r) << 4))
+
+/* Memory Region R Control Register bits */
+#define QMGR_MEM_RGN_INDEX_SHIFT	16
+#define QMGR_MEM_RGN_INDEX_MASK 	(0x3fff << QMGR_MEM_RGN_INDEX_SHIFT)
+#define QMGR_MEM_RGN_DESC_SIZE_SHIFT	8
+#define QMGR_MEM_RGN_DESC_SIZE_MASK	(0xf << QMGR_MEM_RGN_DESC_SIZE_SHIFT)
+#define QMGR_MEM_RGN_SIZE_SHIFT 	0
+#define QMGR_MEM_RGN_SIZE_MASK		(7 << QMGR_MEM_RGN_SIZE_SHIFT)
+
+/*
+ * Queue Manager - Queues Region
+ */
+#define QMGR_QUEUE_REG_A(n)		(0x00 + ((n) << 4))
+#define QMGR_QUEUE_REG_B(n)		(0x04 + ((n) << 4))
+#define QMGR_QUEUE_REG_C(n)		(0x08 + ((n) << 4))
+#define QMGR_QUEUE_REG_D(n)		(0x0C + ((n) << 4))
+
+/* Queue N Register C bits */
+#define QMGR_QUEUE_HEAD_TAIL_SHIFT	31
+#define QMGR_QUEUE_HEAD_TAIL_MASK	(1 << QMGR_QUEUE_HEAD_TAIL_SHIFT)
+#define QMGR_QUEUE_PKT_SIZE_SHIFT	0
+#define QMGR_QUEUE_PKT_SIZE_MASK	(0x3fff << QMGR_QUEUE_PKT_SIZE_SHIFT)
+/* Queue N Register D bits */
+#define QMGR_QUEUE_DESC_PTR_SHIFT	5
+#define QMGR_QUEUE_DESC_PTR_MASK	(0x7ffffff << QMGR_QUEUE_DESC_PTR_SHIFT)
+#define QMGR_QUEUE_DESC_SIZE_SHIFT	0
+#define QMGR_QUEUE_DESC_SIZE_MASK	(0x1f << QMGR_QUEUE_DESC_SIZE_SHIFT)
+
+/*
+ * Queue Manager - Queue Status Region
+ */
+#define QMGR_QUEUE_STATUS_REG_A(n)	(0x00 + ((n) << 4))
+#define QMGR_QUEUE_STATUS_REG_B(n)	(0x04 + ((n) << 4))
+#define QMGR_QUEUE_STATUS_REG_C(n)	(0x08 + ((n) << 4))
+
+/*
+ * DMA Controller - Global Control Registers Region
+ */
+#define DMA_REVISION_REG		0x00	/* Major and minor versions */
+						/* of the module */
+#define DMA_TEARDOWN_FREE_DESC_CTRL_REG 0x04	/* Queue  manager and queue */
+						/* number for Teardown free */
+						/* descriptor queue */
+#define DMA_EMULATION_CTRL_REG		0x08	/* Emulation control register */
+
+/* Teardown Free Descriptor Queue Control Register bits */
+#define DMA_TD_DESC_QMGR_SHIFT		12
+#define DMA_TD_DESC_QMGR_MASK		(3 << DMA_TD_DESC_QMGR_SHIFT)
+#define DMA_TD_DESC_QNUM_SHIFT		0
+#define DMA_TD_DESC_QNUM_MASK		(0xfff << DMA_TD_DESC_QNUM_SHIFT)
+
+/*
+ * DMA Controller - Channel Control / Status Registers Region
+ */
+#define DMA_CH_TX_GLOBAL_CFG_REG(n)	 (0x00 + ((n) << 5))
+#define DMA_CH_RX_GLOBAL_CFG_REG(n)	 (0x08 + ((n) << 5))
+#define DMA_CH_RX_HOST_PKT_CFG_REG_A(n)  (0x0C + ((n) << 5))
+#define DMA_CH_RX_HOST_PKT_CFG_REG_B(n)  (0x10 + ((n) << 5))
+#define DMA_CH_RX_EMBED_PKT_CFG_REG_A(n) (0x14 + ((n) << 5))
+#define DMA_CH_RX_EMBED_PKT_CFG_REG_B(n) (0x18 + ((n) << 5))
+#define DMA_CH_RX_MONO_PKT_CFG_REG(n)	 (0x1C + ((n) << 5))
+
+/* Tx Channel N Global Configuration Register bits */
+#define DMA_CH_TX_ENABLE_SHIFT		31
+#define DMA_CH_TX_ENABLE_MASK		(1 << DMA_CH_TX_ENABLE_SHIFT)
+#define DMA_CH_TX_TEARDOWN_SHIFT	30
+#define DMA_CH_TX_TEARDOWN_MASK		(1 << DMA_CH_TX_TEARDOWN_SHIFT)
+#define DMA_CH_TX_DEFAULT_QMGR_SHIFT	12
+#define DMA_CH_TX_DEFAULT_QMGR_MASK	(3 << DMA_CH_TX_DEFAULT_QMGR_SHIFT)
+#define DMA_CH_TX_DEFAULT_QNUM_SHIFT	0
+#define DMA_CH_TX_DEFAULT_QNUM_MASK	(0xfff << DMA_CH_TX_DEFAULT_QNUM_SHIFT)
+
+/* Rx Channel N Global Configuration Register bits */
+#define DMA_CH_RX_ENABLE_SHIFT		31
+#define DMA_CH_RX_ENABLE_MASK		(1 << DMA_CH_RX_ENABLE_SHIFT)
+#define DMA_CH_RX_TEARDOWN_SHIFT	30
+#define DMA_CH_RX_TEARDOWN_MASK		(1 << DMA_CH_RX_TEARDOWN_SHIFT)
+#define DMA_CH_RX_ERROR_HANDLING_SHIFT	24
+#define DMA_CH_RX_ERROR_HANDLING_MASK	(1 << DMA_CH_RX_ERROR_HANDLING_SHIFT)
+#define DMA_CH_RX_SOP_OFFSET_SHIFT	16
+#define DMA_CH_RX_SOP_OFFSET_MASK	(0xff << DMA_CH_RX_SOP_OFFSET_SHIFT)
+#define DMA_CH_RX_DEFAULT_DESC_TYPE_SHIFT 14
+#define DMA_CH_RX_DEFAULT_DESC_TYPE_MASK  (3 << \
+					   DMA_CH_RX_DEFAULT_DESC_TYPE_SHIFT)
+#define DMA_CH_RX_DEFAULT_DESC_EMBED	0
+#define DMA_CH_RX_DEFAULT_DESC_HOST	1
+#define DMA_CH_RX_DEFAULT_DESC_MONO	2
+#define DMA_CH_RX_DEFAULT_RQ_QMGR_SHIFT 12
+#define DMA_CH_RX_DEFAULT_RQ_QMGR_MASK	(3 << DMA_CH_RX_DEFAULT_RQ_QMGR_SHIFT)
+#define DMA_CH_RX_DEFAULT_RQ_QNUM_SHIFT 0
+#define DMA_CH_RX_DEFAULT_RQ_QNUM_MASK	(0xfff << \
+					 DMA_CH_RX_DEFAULT_RQ_QNUM_SHIFT)
+
+/* Rx Channel N Host Packet Configuration Register A/B bits */
+#define DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(n) (12 + 16 * ((n) & 1))
+#define DMA_CH_RX_HOST_FDQ_QMGR_MASK(n)  (3 << DMA_CH_RX_HOST_FDQ_QMGR_SHIFT(n))
+#define DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(n) (0 + 16 * ((n) & 1))
+#define DMA_CH_RX_HOST_FDQ_QNUM_MASK(n)  (0xfff << \
+					  DMA_CH_RX_HOST_FDQ_QNUM_SHIFT(n))
+
+/* Rx Channel N Embedded Packet Configuration Register A bits */
+#define DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(n) (6 + 8 * (n))
+#define DMA_CH_RX_EMBED_FBP_BMGR_MASK(n)  (3 << \
+					   DMA_CH_RX_EMBED_FBP_BMGR_SHIFT(n))
+#define DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(n) (0 + 8 * (n))
+#define DMA_CH_RX_EMBED_FBP_PNUM_MASK(n)  (0x1f << \
+					   DMA_CH_RX_EMBED_FBP_PNUM_SHIFT(n))
+
+/* Rx Channel N Embedded Packet Configuration Register B bits */
+#define DMA_CH_RX_EMBED_NUM_SLOT_SHIFT	24
+#define DMA_CH_RX_EMBED_NUM_SLOT_MASK	(7 << DMA_CH_RX_EMBED_NUM_SLOT_SHIFT)
+#define DMA_CH_RX_EMBED_SOP_SLOT_SHIFT	16
+#define DMA_CH_RX_EMBED_SOP_SLOT_MASK	(7 << DMA_CH_RX_EMBED_SOP_SLOT_SHIFT)
+#define DMA_CH_RX_EMBED_FDQ_QMGR_SHIFT	12
+#define DMA_CH_RX_EMBED_FDQ_QMGR_MASK	(3 << DMA_CH_RX_EMBED_FDQ_QMGR_SHIFT)
+#define DMA_CH_RX_EMBED_FDQ_QNUM_SHIFT	0
+#define DMA_CH_RX_EMBED_FDQ_QNUM_MASK	(0xfff << \
+					 DMA_CH_RX_EMBED_FDQ_QNUM_SHIFT)
+
+/* Rx Channel N Monolithic Packet Configuration Register bits */
+#define DMA_CH_RX_MONO_SOP_OFFSET_SHIFT 16
+#define DMA_CH_RX_MONO_SOP_OFFSET_MASK	(0xff << \
+					 DMA_CH_RX_MONO_SOP_OFFSET_SHIFT)
+#define DMA_CH_RX_MONO_FDQ_QMGR_SHIFT	12
+#define DMA_CH_RX_MONO_FDQ_QMGR_MASK	(3 << DMA_CH_RX_MONO_FDQ_QMGR_SHIFT)
+#define DMA_CH_RX_MONO_FDQ_QNUM_SHIFT	0
+#define DMA_CH_RX_MONO_FDQ_QNUM_MASK	(0xfff << DMA_CH_RX_MONO_FDQ_QNUM_SHIFT)
+
+/*
+ * DMA Scheduler - Control Region
+ */
+#define DMA_SCHED_CTRL_REG		0x00
+
+/* DMA Scheduler Control Register bits */
+#define DMA_SCHED_ENABLE_SHIFT		31
+#define DMA_SCHED_ENABLE_MASK		(1 << DMA_SCHED_ENABLE_SHIFT)
+#define DMA_SCHED_LAST_ENTRY_SHIFT	0
+#define DMA_SCHED_LAST_ENTRY_MASK	(0xff << DMA_SCHED_LAST_ENTRY_SHIFT)
+
+/*
+ * DMA Scheduler - Table Region
+ */
+#define DMA_SCHED_TABLE_WORD_REG(n)	((n) << 2)
+
+/*
+ * CPPI 4.1 Host Packet Descriptor
+ */
+struct cppi41_host_pkt_desc {
+	u32 desc_info;		/* Descriptor type, protocol specific word */
+				/* count, packet length */
+	u32 tag_info;		/* Source tag (31:16), destination tag (15:0) */
+	u32 pkt_info;		/* Packet error state, type, protocol flags, */
+				/* return info, descriptor location */
+	u32 buf_len;		/* Number of valid data bytes in the buffer */
+	u32 buf_ptr;		/* Pointer to the buffer associated with */
+				/* this descriptor */
+	u32 next_desc_ptr;	/* Pointer to the next buffer descriptor */
+	u32 orig_buf_len;	/* Original buffer length */
+	u32 orig_buf_ptr;	/* Original buffer pointer */
+	u32 stk_comms_info[2];	/* Network stack private communications info */
+};
+
+/*
+ * CPPI 4.1 Host Buffer Descriptor
+ */
+struct cppi41_host_buf_desc {
+	u32 reserved[2];
+	u32 buf_recl_info;	/* Return info, descriptor location */
+	u32 buf_len;		/* Number of valid data bytes in the buffer */
+	u32 buf_ptr;		/* Pointer to the buffer associated with */
+				/* this descriptor */
+	u32 next_desc_ptr;	/* Pointer to the next buffer descriptor */
+	u32 orig_buf_len;	/* Original buffer length */
+	u32 orig_buf_ptr;	/* Original buffer pointer */
+};
+
+#define CPPI41_DESC_TYPE_SHIFT		27
+#define CPPI41_DESC_TYPE_MASK		(0x1f << CPPI41_DESC_TYPE_SHIFT)
+#define CPPI41_DESC_TYPE_HOST		16
+#define CPPI41_DESC_TYPE_MONOLITHIC	18
+#define CPPI41_DESC_TYPE_TEARDOWN	19
+#define CPPI41_PROT_VALID_WORD_CNT_SHIFT 22
+#define CPPI41_PROT_VALID_WORD_CNT_MASK	(0x1f << CPPI41_PROT_WORD_CNT_SHIFT)
+#define CPPI41_PKT_LEN_SHIFT		0
+#define CPPI41_PKT_LEN_MASK		(0x1fffff << CPPI41_PKT_LEN_SHIFT)
+
+#define CPPI41_PKT_ERROR_SHIFT		31
+#define CPPI41_PKT_ERROR_MASK		(1 << CPPI41_PKT_ERROR_SHIFT)
+#define CPPI41_PKT_TYPE_SHIFT		26
+#define CPPI41_PKT_TYPE_MASK		(0x1f << CPPI41_PKT_TYPE_SHIFT)
+#define CPPI41_PKT_TYPE_ATM_AAL5	0
+#define CPPI41_PKT_TYPE_ATM_NULL_AAL	1
+#define CPPI41_PKT_TYPE_ATM_OAM		2
+#define CPPI41_PKT_TYPE_ATM_TRANSPARENT	3
+#define CPPI41_PKT_TYPE_EFM		4
+#define CPPI41_PKT_TYPE_USB		5
+#define CPPI41_PKT_TYPE_GENERIC		6
+#define CPPI41_PKT_TYPE_ETHERNET 	7
+#define CPPI41_RETURN_POLICY_SHIFT	15
+#define CPPI41_RETURN_POLICY_MASK 	(1 << CPPI41_RETURN_POLICY_SHIFT)
+#define CPPI41_RETURN_LINKED		0
+#define CPPI41_RETURN_UNLINKED		1
+#define CPPI41_ONCHIP_SHIFT		14
+#define CPPI41_ONCHIP_MASK		(1 << CPPI41_ONCHIP_SHIFT)
+#define CPPI41_RETURN_QMGR_SHIFT 	12
+#define CPPI41_RETURN_QMGR_MASK		(3 << CPPI41_RETURN_QMGR_SHIFT)
+#define CPPI41_RETURN_QNUM_SHIFT 	0
+#define CPPI41_RETURN_QNUM_MASK		(0xfff << CPPI41_RETURN_QNUM_SHIFT)
+
+#define CPPI41_SRC_TAG_PORT_NUM_SHIFT	27
+#define CPPI41_SRC_TAG_PORT_NUM_MASK	(0x1f << CPPI41_SRC_TAG_PORT_NUM_SHIFT)
+#define CPPI41_SRC_TAG_CH_NUM_SHIFT	21
+#define CPPI41_SRC_TAG_CH_NUM_MASK	(0x3f << CPPI41_SRC_TAG_CH_NUM_SHIFT)
+#define CPPI41_SRC_TAG_SUB_CH_NUM_SHIFT 16
+#define CPPI41_SRC_TAG_SUB_CH_NUM_MASK	(0x1f << \
+					CPPI41_SRC_TAG_SUB_CH_NUM_SHIFT)
+#define CPPI41_DEST_TAG_SHIFT		0
+#define CPPI41_DEST_TAG_MASK		(0xffff << CPPI41_DEST_TAG_SHIFT)
+
+/*
+ * CPPI 4.1 Teardown Descriptor
+ */
+struct cppi41_teardown_desc {
+	u32 teardown_info;	/* Teardown information */
+	u32 reserved[7];	/* 28 byte padding */
+};
+
+#define CPPI41_TEARDOWN_TX_RX_SHIFT	16
+#define CPPI41_TEARDOWN_TX_RX_MASK	(1 << CPPI41_TEARDOWN_TX_RX_SHIFT)
+#define CPPI41_TEARDOWN_DMA_NUM_SHIFT	10
+#define CPPI41_TEARDOWN_DMA_NUM_MASK	(0x3f << CPPI41_TEARDOWN_DMA_NUM_SHIFT)
+#define CPPI41_TEARDOWN_CHAN_NUM_SHIFT	0
+#define CPPI41_TEARDOWN_CHAN_NUM_MASK	(0x3f << CPPI41_TEARDOWN_CHAN_NUM_SHIFT)
+
+#define CPPI41_MAX_MEM_RGN		16
+
+/* CPPI 4.1 configuration for DA8xx */
+#define CPPI41_NUM_QUEUE_MGR		1	/* 4  max */
+#define CPPI41_NUM_DMA_BLOCK		4	/* 64 max */
+
+/**
+ * struct cppi41_queue - Queue Tuple
+ *
+ * The basic queue tuple in CPPI 4.1 used across all data structures
+ * where a definition of a queue is required.
+ */
+struct cppi41_queue {
+	u8  q_mgr;		/* The queue manager number */
+	u16 q_num;		/* The queue number */
+};
+
+/**
+ * struct cppi41_buf_pool - Buffer Pool Tuple
+ *
+ * The basic buffer pool tuple in CPPI 4.1 used across all data structures
+ * where a definition of a buffer pool is required.
+ */
+struct cppi41_buf_pool {
+	u8  b_mgr;		/* The buffer manager number */
+	u16 b_pool;		/* The buffer pool number */
+};
+
+/**
+ * struct cppi41_queue_mgr - Queue Manager information
+ *
+ * Contains the information about the queue manager which should be copied from
+ * the hardware spec as is.
+ */
+struct cppi41_queue_mgr {
+	void __iomem *q_mgr_rgn_base; /* Base address of the Control region. */
+	void __iomem *desc_mem_rgn_base; /* Base address of the descriptor */
+				/* memory region. */
+	void __iomem *q_mgmt_rgn_base; /* Base address of the queues region. */
+	void __iomem *q_stat_rgn_base; /* Base address of the queue status */
+				/* region. */
+	u16 num_queue;		/* Number of the queues supported. */
+	u8  queue_types; 	/* Bitmask of the supported queue types. */
+	u16 base_fdq_num;	/* The base free descriptor queue number. */
+				/* If present, there's always 16 such queues. */
+	u16 base_fdbq_num;	/* The base free descriptor/buffer queue */
+				/* number.  If present, there's always 16 */
+				/* such queues. */
+	const unsigned long *assigned; /* Pointer to the bitmask of the */
+				/* pre-assigned queues. */
+};
+
+/* Queue type flags */
+#define CPPI41_FREE_DESC_QUEUE		0x01
+#define CPPI41_FREE_DESC_BUF_QUEUE	0x02
+#define CPPI41_UNASSIGNED_QUEUE 	0x04
+
+/**
+ * struct cppi41_embed_pkt_cfg - Rx Channel Embedded packet configuration
+ *
+ * An instance of this structure forms part of the Rx channel information
+ * structure.
+ */
+struct cppi41_embed_pkt_cfg {
+	struct cppi41_queue fd_queue; /* Free Descriptor queue.*/
+	u8 num_buf_slot;	/* Number of buffer slots in the descriptor */
+	u8 sop_slot_num;	/* SOP buffer slot number. */
+	struct cppi41_buf_pool free_buf_pool[4]; /* Free Buffer pool. Element */
+				/* 0 used for the 1st Rx buffer, etc. */
+};
+
+/**
+ * struct cppi41_host_pkt_cfg - Rx Channel Host Packet Configuration
+ *
+ * An instance of this structure forms part of the Rx channel information
+ * structure.
+ */
+struct cppi41_host_pkt_cfg {
+	struct cppi41_queue fdb_queue[4]; /* Free Desc/Buffer queue. Element */
+				/* 0 used for 1st Rx buffer, etc. */
+};
+
+/**
+ * struct cppi41_mono_pkt_cfg - Rx Channel Monolithic Packet Configuration
+ *
+ * An instance of this structure forms part of the Rx channel information
+ * structure.
+ */
+struct cppi41_mono_pkt_cfg {
+	struct cppi41_queue fd_queue; /* Free descriptor queue */
+	u8 sop_offset;		/* Number of bytes to skip before writing */
+				/* payload */
+};
+
+enum cppi41_rx_desc_type {
+	cppi41_rx_embed_desc,
+	cppi41_rx_host_desc,
+	cppi41_rx_mono_desc,
+};
+
+/**
+ * struct cppi41_rx_ch_cfg - Rx Channel Configuration
+ *
+ * Must be allocated and filled by the caller of cppi41_rx_ch_configure().
+ *
+ * The same channel can be configured to receive different descripor type
+ * packets (not simultaneously). When the Rx packets on a port need to be sent
+ * to the SR, the channel's default descriptor type is set to Embedded and the
+ * Rx completion queue is set to the queue which CPU polls for input packets.
+ * When in SR bypass mode, the same channel's default descriptor type will be
+ * set to Host and the Rx completion queue set to one of the queues which host
+ * can get interrupted on (via the Queuing proxy/accumulator). In this example,
+ * the embedded mode configuration fetches free descriptor from the Free
+ * descriptor queue (as defined by struct cppi41_embed_pkt_cfg) and host
+ * mode configuration fetches free descriptors/buffers from the free descriptor/
+ * buffer queue (as defined by struct cppi41_host_pkt_cfg).
+ *
+ * NOTE: There seems to be no separate configuration for teardown completion
+ * descriptor. The assumption is rxQueue tuple is used for this purpose as well.
+ */
+struct cppi41_rx_ch_cfg {
+	enum cppi41_rx_desc_type default_desc_type; /* Describes which queue */
+				/* configuration is used for the free */
+				/* descriptors and/or buffers */
+	u8 sop_offset;		/* Number of bytes to skip in SOP buffer */
+				/* before writing payload */
+	u8 retry_starved;	/* 0 = Drop packet on descriptor/buffer */
+				/* starvartion, 1 = DMA retries FIFO block */
+				/* transfer at a later time */
+	struct cppi41_queue rx_queue; /* Rx complete packets queue */
+	union {
+		struct cppi41_host_pkt_cfg host_pkt; /* Host packet */
+				/* configuration. This defines where channel */
+				/* picks free descriptors from. */
+		struct cppi41_embed_pkt_cfg embed_pkt; /* Embedded packet */
+				/* configuration. This defines where channel */
+				/* picks free descriptors/buffers from. */
+				/* from. */
+		struct cppi41_mono_pkt_cfg mono_pkt; /* Monolithic packet */
+				/* configuration. This defines where channel */
+				/* picks free descriptors from. */
+	} cfg;			/* Union of packet configuration structures */
+				/* to be filled in depending on the */
+				/* defDescType field. */
+};
+
+/**
+ * struct cppi41_tx_ch - Tx channel information
+ *
+ * NOTE: The queues that feed into the Tx channel are fixed at SoC design time.
+ */
+struct cppi41_tx_ch {
+	u8 port_num;		/* Port number. */
+	u8 ch_num;		/* Channel number within port. */
+	u8 sub_ch_num;		/* Sub-channel number within channel. */
+	u8 num_tx_queue;	/* Number of queues from which the channel */
+				/* can feed. */
+	struct cppi41_queue tx_queue[4]; /* List of queues from which the */
+				/* channel can feed. */
+};
+
+/**
+ * struct cppi41_dma_block - CPPI 4.1 DMA configuration
+ *
+ * Configuration information for CPPI DMA functionality. Includes the Global
+ * configuration, Channel configuration, and the Scheduler configuration.
+ */
+struct cppi41_dma_block {
+	void __iomem *global_ctrl_base; /* Base address of the Global Control */
+				/* registers. */
+	void __iomem *ch_ctrl_stat_base; /* Base address of the Channel */
+				/* Control/Status registers. */
+	void __iomem *sched_ctrl_base; /* Base address of the Scheduler */
+				/* Control register. */
+	void __iomem *sched_table_base; /* Base address of the Scheduler */
+				/* Table registers. */
+	u8 num_tx_ch;		/* Number of the Tx channels. */
+	u8 num_rx_ch;		/* Number of the Rx channels. */
+	const struct cppi41_tx_ch *tx_ch_info;
+};
+
+extern const struct cppi41_queue_mgr cppi41_queue_mgr[];
+extern const struct cppi41_dma_block cppi41_dma_block[];
+extern const u8 cppi41_num_queue_mgr;
+extern const u8 cppi41_num_dma_block;
+
+/**
+ * struct cppi41_dma_ch_obj - CPPI 4.1 DMA Channel object
+ */
+struct cppi41_dma_ch_obj {
+	void __iomem *base_addr; /* The address of the channel global */
+				/* configuration register */
+	u32 global_cfg;		/* Tx/Rx global configuration backed-up value */
+};
+
+/**
+ * struct cppi41_queue_obj - CPPI 4.1 queue object
+ */
+struct cppi41_queue_obj {
+	void __iomem *base_addr; /* The base address of the queue management */
+				/* registers */
+};
+
+/**
+ * cppi41_queue_mgr_init - CPPI 4.1 queue manager initialization.
+ * @q_mgr:	the queue manager to initialize
+ * @rgn0_base:	linking RAM region 0 physical address
+ * @rgn0_size:	linking RAM region 0 size in 32-bit words (0 to 0x3fff)
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int cppi41_queue_mgr_init(u8 q_mgr, dma_addr_t rgn0_base, u16 rgn0_size);
+
+/*
+ * CPPI 4.1 Queue Manager Memory Region Allocation and De-allocation APIs.
+ */
+
+/**
+ * cppi41_mem_rgn_alloc - CPPI 4.1 queue manager memory region allocation.
+ * @q_mgr:	the queue manager whose memory region to allocate
+ * @rgn_addr:	physical address of the memory region
+ * @size_order:	descriptor size as a power of two (between 5 and 13)
+ * @num_order:	number of descriptors as a power of two (between 5 and 12)
+ * @mem_rgn:	pointer to the index of the memory region allocated
+ *
+ * This function allocates a memory region within the queue manager
+ * consisiting of the descriptors of paricular size and number.
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int cppi41_mem_rgn_alloc(u8 q_mgr, dma_addr_t rgn_addr, u8 size_order,
+			 u8 num_order, u8 *mem_rgn);
+
+/**
+ * cppi41_mem_rgn_free - CPPI 4.1 queue manager memory region de-allocation.
+ * @q_mgr:	the queue manager whose memory region was allocated
+ * @mem_rgn:	index of the memory region
+ *
+ * This function frees the memory region allocated by cppi41_mem_rgn_alloc().
+ *
+ * Returns 0 on success, -EINVAL otherwise.
+ */
+int cppi41_mem_rgn_free(u8 q_mgr, u8 mem_rgn);
+
+/**
+ * cppi41_dma_ctrlr_init - CPPI 4.1 DMA controller initialization.
+ * @dma_num:	number of the DMA block
+ * @q_mgr:	the queue manager in which to allocate the free teardown
+ *		descriptor queue
+ * @num_order:	number of teardown descriptors as a power of two (at least 5)
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int cppi41_dma_ctrlr_init(u8 dma_num, u8 q_mgr, u8 num_order);
+
+/**
+ * cppi41_dma_sched_init - CPPI 4.1 DMA scheduler initialization.
+ * @dma_num:	number of the DMA block
+ * @sched_tbl:	the DMA scheduler table
+ * @tbl_size:	number of entries in the DMA scheduler table
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int cppi41_dma_sched_init(u8 dma_num, const u8 *sched_tbl, u16 tbl_size);
+
+/*
+ * CPPI 4.1 DMA Channel Management APIs
+ */
+
+/**
+ * cppi41_tx_ch_init - initialize CPPI 4.1 transmit channel object
+ * @tx_ch_obj:	pointer to Tx channel object
+ * @dma_num:	DMA block to which this channel belongs
+ * @ch_num:	DMA channel number
+ *
+ * Returns 0 if valid Tx channel, -EINVAL otherwise.
+ */
+int cppi41_tx_ch_init(struct cppi41_dma_ch_obj *tx_ch_obj,
+		      u8 dma_num, u8 ch_num);
+
+/**
+ * cppi41_rx_ch_init - initialize CPPI 4.1 receive channel object
+ * @rx_ch_obj:	pointer to Rx channel object
+ * @dma_num:	DMA block to which this channel belongs
+ * @ch_num:	DMA channel number
+ *
+ * Returns 0 if valid Rx channel, -EINVAL otherwise.
+ */
+int cppi41_rx_ch_init(struct cppi41_dma_ch_obj *rx_ch_obj,
+		      u8 dma_num, u8 ch_num);
+
+/**
+ * cppi41_dma_ch_default_queue - set CPPI 4.1 channel default completion queue
+ * @dma_ch_obj: pointer to DMA channel object
+ * @q_mgr:	default queue manager
+ * @q_num:	default queue number
+ *
+ * This function configures the specified channel.  The caller is required to
+ * provide the default queue onto which the teardown descriptors will be queued.
+ */
+void cppi41_dma_ch_default_queue(struct cppi41_dma_ch_obj *dma_ch_obj,
+				 u8 q_mgr, u16 q_num);
+
+/**
+ * cppi41_rx_ch_configure - configure CPPI 4.1 receive channel
+ * @rx_ch_obj:	pointer to Rx channel object
+ * @cfg:	pointer to Rx channel configuration
+ *
+ * This function configures and opens the specified Rx channel.  The caller
+ * is required to provide channel configuration information by initializing
+ * a struct cppi41_rx_ch_cfg.
+ */
+void cppi41_rx_ch_configure(struct cppi41_dma_ch_obj *rx_ch_obj,
+			    struct cppi41_rx_ch_cfg  *cfg);
+
+/**
+ * cppi41_dma_ch_enable - enable CPPI 4.1 Tx/Rx DMA channel
+ * @dma_ch_obj:	pointer to DMA channel object
+ *
+ * This function enables  a specified Tx channel.  The caller is required to
+ * provide a reference to a channel object initialized by an earlier call of
+ * the cppi41_dma_ch_init() function.  After the successful completion of this
+ * function, the Tx DMA channel will be active and ready for data transmission.
+ */
+void cppi41_dma_ch_enable(struct cppi41_dma_ch_obj *dma_ch_obj);
+
+/**
+ * cppi41_dma_ch_disable - disable CPPI 4.1 Tx/Rx DMA channel
+ * @dma_ch_obj:	pointer to DMA channel object
+ *
+ * This function disables a specific Tx channel.  The caller is required to
+ * provide a reference to a channel object initialized by an earlier call of
+ * the cppi41_dma_ch_init() function.  After the successful completion of this
+ * function, the Tx DMA channel will be deactived.
+ */
+void cppi41_dma_ch_disable(struct cppi41_dma_ch_obj *dma_ch_obj);
+
+/**
+ * cppi41_dma_ch_teardown - tear down CPPI 4.1 transmit channel
+ * @dma_ch_obj:	pointer DMA channel object
+ *
+ * This function triggers the teardown of the given DMA channel.
+ *
+ * ATTENTION: Channel disable should not be called before the teardown is
+ * completed as a disable will stop the DMA scheduling on the channel resulting
+ * in the teardown complete event not being registered at all.
+ *
+ * NOTE: A successful channel teardown event is reported via queueing of a
+ * teardown descriptor.
+ *
+ * This function just sets up for the teardown of the channel and returns. The
+ * caller must detect the channel teardown event to assume that the channel is
+ * disabled.
+ *
+ * See cppi41_get_teardown_info() for the teardown completion processing.
+ */
+void cppi41_dma_ch_teardown(struct cppi41_dma_ch_obj *dma_ch_obj);
+
+/*
+ * CPPI 4.1 Queue Allocation and De-allocation APIs.
+ */
+
+/**
+ * cppi41_queue_alloc - allocate CPPI 4.1 queue
+ * @type:	queue type bitmask
+ * @q_mgr:	queue manager
+ * @q_num:	pointer to the queue number
+ *
+ * Returns 0 if queue allocated, error otherwise.
+ */
+int cppi41_queue_alloc(u8 type, u8 q_mgr, u16 *q_num);
+
+/**
+ * cppi41_queue_free - de-allocate CPPI 4.1 queue
+ * @q_mgr:	queue manager
+ * @q_num:	queue number
+ *
+ * Returns 0 on success, -EINVAL otherwise.
+ */
+int cppi41_queue_free(u8 q_mgr, u16 q_num);
+
+/*
+ *  CPPI 4.1 Queue Management APIs
+ */
+
+/**
+ * cppi41_queue_init - initialize CPPI 4.1 queue object
+ * @queue_obj:	pointer to the queue object
+ * @q_mgr:	queue manager
+ * @q_num:	queue number
+ *
+ * Returns 0 if valid queue, -EINVAL otherwise.
+ */
+int cppi41_queue_init(struct cppi41_queue_obj *queue_obj, u8 q_mgr, u16 q_num);
+
+/**
+ * cppi41_queue_push - push to CPPI 4.1 queue
+ * @queue_obj:	pointer to the queue object
+ * @desc_addr:	descriptor physical address
+ * @desc_size:	descriptor size
+ * @pkt_size:	packet size
+ *
+ * This function is called to queue a descriptor onto a queue.
+ * NOTE: pSize parameter is optional. Pass 0 in case not required.
+ */
+void cppi41_queue_push(const struct cppi41_queue_obj *queue_obj, u32 desc_addr,
+		       u32 desc_size, u32 pkt_size);
+
+/**
+ * cppi41_queue_pop - pop from CPPI 4.1 queue
+ * @queue_obj:	pointer to the queue object
+ *
+ * This function is called to pop a single descriptor from the queue.
+ *
+ * Returns a packet descriptor's physical address.
+ */
+unsigned long cppi41_queue_pop(const struct cppi41_queue_obj *queue_obj);
+
+/*
+ * CPPI 4.1 Miscellaneous APIs
+ */
+
+/**
+ * cppi41_get_teardown_info - CPPI 4.1 teardown completion processing function
+ *
+ * @addr:	physical address of teardown descriptor
+ * @info:	pointer to the teardown information word
+ *
+ * This function is called to complete the teardown processing on a channel
+ * and provides teardown information from the teardown descriptor passed to it.
+ * It also recycles the teardown descriptor back to the teardown descriptor
+ * queue.
+ *
+ * Returns 0 if valid descriptor, -EINVAL otherwise.
+ */
+int cppi41_get_teardown_info(unsigned long addr, u32 *info);