DMA engine acceleration HOWTO
Jon Masters
jonathan at jonmasters.org
Wed Dec 7 07:40:55 EST 2005
Hi folks,
As part of some work I'm doing for Liberte Software (not finished/released yet),
here's a quick hacking guide to enabling an SoC DMA engine in your NAND driver.
In this case, I am using a ppc405 core and am programming the DMA controller
inline since we know the Linux ppcxx_dma code is unworkable.
We found that by adding this support, we were able to achieve massive kernel
overhead reduction during MTD ops where the system is now able to go do
processing work for us - we're down to about 30% overhead now when running
YAFFS2 and doing continuous read/writes to very large MTD devices.
MTD uses read_buf and write_buf routines for talking to the flash. We
create DMA versions thereof:
static void mynand_write_buf(struct mtd_info *mtd, const u_char *buf, int len);
static void mynand_read_buf(struct mtd_info *mtd, u_char *buf, int len);
And an IRQ handler:
static void mynand_dma_irq(int irq, void *this, struct pt_regs *regs);
Then we wire up the whole process using a completion and some #defines:
/* DMA stuff */
#define MYNAND_USE_DMA 1 /* Use DMA */
#define MYNAND_DMACH 2 /* Use DMA channel 2 */
#define MYNAND_DMA_INT 7 /* Use DMA channel 2 (IRQ 6???) */
#define MYNAND_DMA_BUFSIZE 4096 /* 1 page */
ppc_dma_ch_t dma_ch;
static dma_addr_t mynand_dma_addr;
static u_char *mynand_dma_buf;
DECLARE_COMPLETION(mynand_dma_completion);
Then we need to add some code to our init function to allocate an IRQ and a
suitable DMA bounce buffer:
if (request_irq(MYNAND_DMA_INT, &mynand_dma_irq, SA_INTERRUPT,
"mynand_dma_irq", this)) {
printk("mynand_init: request_irq failed int=%d\n",
MYNAND_DMA_INT);
return -1;
}
mynand_dma_buf = consistent_alloc(GFP_KERNEL, MYNAND_DMA_BUFSIZE,
&mynand_dma_addr);
if (NULL == mynand_dma_buf) {
printk("mynand_init: ouch. memory problem! FIXME!\n");
free_irq(MYNAND_DMA_INT, this);
return -1;
} else {
memset(mynand_dma_buf, 0, MYNAND_DMA_BUFSIZE);
}
Here are the functions (for example, conditionally selected by MYNAND_USE_DMA):
(these are for ppc4xx systems and make all kinds of assumption, but you can see
as an example thus:)
static void mynand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
{
int i;
struct nand_chip *this = mtd->priv;
unsigned long IO_ADDR_DMA_W = this->IO_ADDR_W-(unsigned long)mynand_base_logical+mynand_base_physical;
//printk("mynand_write_buf: %d bytes.\n",len);
memcpy(mynand_dma_buf, buf, len);
mtdcr(DCRN_DMACR2, 0);
mtdcr(DCRN_DMACR2, SET_DMA_PW(PW_8)); /* 1. transfer is 8 bit */
mtdcr(DCRN_DMASA2, mynand_dma_addr); /* 2. Source Address */
mtdcr(DCRN_DMADA2, IO_ADDR_DMA_W); /* 2. Destination Address */
mtdcr(DCRN_DMACT2, len); /* 3. Set count */
mtdcr(DCRN_DMASR, DMA_CS2|DMA_TS2|DMA_CH2_ERR);
/* 4. Clear status */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_SAI(1));
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_DAI(0));
/* 5. Destination Increment */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_TM(TM_S_MM));
/* 5. Software memory-to-memory */ mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_CIE_ENABLE(1));
/* 5. Interrupt enable */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_PL(EXTERNAL_PERIPHERAL));
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_ETD(1)); /* 1 req. */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_TCE(1)); /* 1 req. */
//printk("DMA Configuration:\n\n");
//printk("DCRN_DMACR2=0x%08x\n", mfdcr(DCRN_DMACR2));
//printk("DCRN_DMASA2=0x%08x\n", mfdcr(DCRN_DMASA2));
//printk("DCRN_DMADA2=0x%08x\n", mfdcr(DCRN_DMADA2));
//printk("DCRN_DMACT2=0x%08x\n", mfdcr(DCRN_DMACT2));
//printk("DCRN_DMASR=0x%08x\n", mfdcr(DCRN_DMASR));
/* enable DMA */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_CE_ENABLE(1));
/* 5. Channel enable */
wait_for_completion(&mynand_dma_completion);
//for (i=0; i<len;i++)
// buf[i] = readb(this->IO_ADDR_R);
}
static void mynand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
{
int i;
struct nand_chip *this = mtd->priv;
unsigned long IO_ADDR_DMA_R = this->IO_ADDR_R-(unsigned long)mynand_base_logical+mynand_base_physical;
//printk("mynand_read_buf: %d bytes.\n",len);
mtdcr(DCRN_DMACR2, 0);
mtdcr(DCRN_DMACR2, SET_DMA_PW(PW_8)); /* 1. transfer is 8 bit */
mtdcr(DCRN_DMASA2, IO_ADDR_DMA_R); /* 2. Source Address */
mtdcr(DCRN_DMADA2, mynand_dma_addr); /* 2. Destination Address */
mtdcr(DCRN_DMACT2, len); /* 3. Set count */
mtdcr(DCRN_DMASR, DMA_CS2|DMA_TS2|DMA_CH2_ERR);
/* 4. Clear status */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_SAI(0));
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_DAI(1));
/* 5. Destination Increment */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_TM(TM_S_MM));
/* 5. Software memory-to-memory */ mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_CIE_ENABLE(1));
/* 5. Interrupt enable */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_PL(EXTERNAL_PERIPHERAL));
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_ETD(1)); /* 1 req. */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_TCE(1)); /* 1 req. */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_BEN(1));
//printk("DMA Configuration:\n\n");
//printk("DCRN_DMACR2=0x%08x\n", mfdcr(DCRN_DMACR2));
//printk("DCRN_DMASA2=0x%08x\n", mfdcr(DCRN_DMASA2));
//printk("DCRN_DMADA2=0x%08x\n", mfdcr(DCRN_DMADA2));
//printk("DCRN_DMACT2=0x%08x\n", mfdcr(DCRN_DMACT2));
//printk("DCRN_DMASR=0x%08x\n", mfdcr(DCRN_DMASR));
/* enable DMA */
mtdcr(DCRN_DMACR2, mfdcr(DCRN_DMACR2) | SET_DMA_CE_ENABLE(1));
/* 5. Channel enable */
wait_for_completion(&mynand_dma_completion);
memcpy(buf, mynand_dma_buf, len);
//printk("transfer done!\n");
//for (i=0; i<len;i++)
// buf[i] = readb(this->IO_ADDR_R);
}
static void mynand_dma_irq(int irq, void *this, struct pt_regs *regs) {
//printk("mynand: DMA interrupt %d.\n", irq);
mtdcr(DCRN_DMASR, DMA_CS2|DMA_TS2|DMA_CH2_ERR); /* zero status */
complete(&mynand_dma_completion);
}
Enjoy!
Jon.
More information about the linux-mtd
mailing list