[PATCH 03/10] mtd: spi-nor: aspeed: add DMA support

Cyrille Pitchen cyrille.pitchen at wedev4u.fr
Thu Apr 6 12:13:39 PDT 2017


Le 06/04/2017 à 21:07, Cyrille Pitchen a écrit :
> Hi Cédric,
> 
> Le 06/04/2017 à 18:56, Cédric Le Goater a écrit :
>> The Aspeed FMC controller can handle transfers to the flash modules
>> using DMAs. A couple of registers first need to be programmed with the
>> DRAM and flash addresses and the length of the transfer. The transfer
>> is then initiated using a DMA control register and an interrupt
>> notifies the completion.
>>
>> Such transfers can replace the current IO mode in the read/write ops
>> when some conditions are met on the size and the alignment. In case of
>> failure, a timeout for instance, the operation is restarted using the
>> IO mode.
>>
>> DMA support does not seem to be that efficient. So we provide some
>> sysfs files for tuning and to switch it on and off (default is off)
>>
>> Signed-off-by: Cédric Le Goater <clg at kaod.org>
>> ---
>>  drivers/mtd/spi-nor/aspeed-smc.c | 231 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 231 insertions(+)
>>
>> diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
>> index a98d454d07ed..7dfa1ea0a787 100644
>> --- a/drivers/mtd/spi-nor/aspeed-smc.c
>> +++ b/drivers/mtd/spi-nor/aspeed-smc.c
>> @@ -11,6 +11,8 @@
>>  
>>  #include <linux/bug.h>
>>  #include <linux/device.h>
>> +#include <linux/dma-direction.h>
>> +#include <linux/dma-mapping.h>
>>  #include <linux/io.h>
>>  #include <linux/module.h>
>>  #include <linux/mutex.h>
>> @@ -40,6 +42,7 @@ struct aspeed_smc_info {
>>  	bool hastype;		/* flash type field exists in config reg */
>>  	u8 we0;			/* shift for write enable bit for CE0 */
>>  	u8 ctl0;		/* offset in regs of ctl for CE0 */
>> +	bool has_dma;
>>  
>>  	void (*set_4b)(struct aspeed_smc_chip *chip);
>>  };
>> @@ -53,6 +56,7 @@ static const struct aspeed_smc_info fmc_2400_info = {
>>  	.hastype = true,
>>  	.we0 = 16,
>>  	.ctl0 = 0x10,
>> +	.has_dma = true,
>>  	.set_4b = aspeed_smc_chip_set_4b,
>>  };
>>  
>> @@ -62,6 +66,7 @@ static const struct aspeed_smc_info spi_2400_info = {
>>  	.hastype = false,
>>  	.we0 = 0,
>>  	.ctl0 = 0x04,
>> +	.has_dma = false,
>>  	.set_4b = aspeed_smc_chip_set_4b_spi_2400,
>>  };
>>  
>> @@ -71,6 +76,7 @@ static const struct aspeed_smc_info fmc_2500_info = {
>>  	.hastype = true,
>>  	.we0 = 16,
>>  	.ctl0 = 0x10,
>> +	.has_dma = true,
>>  	.set_4b = aspeed_smc_chip_set_4b,
>>  };
>>  
>> @@ -80,6 +86,7 @@ static const struct aspeed_smc_info spi_2500_info = {
>>  	.hastype = false,
>>  	.we0 = 16,
>>  	.ctl0 = 0x10,
>> +	.has_dma = false,
>>  	.set_4b = aspeed_smc_chip_set_4b,
>>  };
>>  
>> @@ -97,6 +104,7 @@ struct aspeed_smc_chip {
>>  	struct aspeed_smc_controller *controller;
>>  	void __iomem *ctl;			/* control register */
>>  	void __iomem *ahb_base;			/* base of chip window */
>> +	unsigned long phys_base;		/* physical address of window */
>>  	u32 ctl_val[smc_max];			/* control settings */
>>  	enum aspeed_smc_flash_type type;	/* what type of flash */
>>  	struct spi_nor nor;
>> @@ -110,6 +118,18 @@ struct aspeed_smc_controller {
>>  	void __iomem *regs;			/* controller registers */
>>  	void __iomem *ahb_base;			/* per-chip windows resource */
>>  
>> +	/* interrupt handling */
>> +	int irq;
>> +
>> +	/* dma */
>> +	bool dma_enabled;
>> +	struct completion dma_done;
>> +
>> +	/* dma logging */
>> +	size_t dma_length;
>> +	dma_addr_t dma_addr;			/* bus address of buffer */
>> +	dma_addr_t flash_addr;			/* flash address */
>> +
>>  	struct aspeed_smc_chip *chips[0];	/* pointers to attached chips */
>>  };
>>  
>> @@ -182,6 +202,11 @@ struct aspeed_smc_controller {
>>  	(CONTROL_AAF_MODE | CONTROL_CE_INACTIVE_MASK | CONTROL_CLK_DIV4 | \
>>  	 CONTROL_CLOCK_FREQ_SEL_MASK | CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
>>  
>> +/* Interrupt Control and Status Register */
>> +#define INTERRUPT_STATUS_REG		0x08
>> +#define     INTERRUPT_DMA_ENABLE	BIT(3)
>> +#define     INTERRUPT_DMA_STATUS	BIT(11)
>> +
>>  /*
>>   * The Segment Register uses a 8MB unit to encode the start address
>>   * and the end address of the mapping window of a flash SPI slave :
>> @@ -194,6 +219,108 @@ struct aspeed_smc_controller {
>>  #define SEGMENT_ADDR_START(_r)		((((_r) >> 16) & 0xFF) << 23)
>>  #define SEGMENT_ADDR_END(_r)		((((_r) >> 24) & 0xFF) << 23)
>>  
>> +/* DMA Registers */
>> +#define DMA_CONTROL_REG			0x80
>> +#define     DMA_ENABLE			BIT(0)
>> +#define     DMA_WRITE			BIT(1)
>> +
>> +#define DMA_FLASH_BASE_REG		0x84
>> +#define DMA_DRAM_BASE_REG		0x88
>> +#define DMA_LENGTH_REG			0x8c
>> +
>> +/*
>> + * DMAs do not seem to be that fast, so disable by default
>> + */
>> +static bool use_dma;
>> +module_param(use_dma, bool, 0644);
>> +
>> +static unsigned int min_dma_size = 256;
>> +module_param(min_dma_size, uint, 0644);
>> +
>> +/* with 100ms we had a couple of timeouts */
>> +static unsigned int dma_timeout = 200;
>> +module_param(dma_timeout, uint, 0644);
>> +
>> +static void aspeed_smc_dma_done(struct aspeed_smc_controller *controller)
>> +{
>> +	writel(0, controller->regs + INTERRUPT_STATUS_REG);
>> +	writel(0, controller->regs + DMA_CONTROL_REG);
>> +}
>> +
>> +#define DMA_LENGTH(x) (((x) - 4) & ~0xFE000003)
>> +#define DMA_ADDR(x) ((x) & ~0x00000003)
>> +
>> +static inline void aspeed_smc_chip_configure(struct aspeed_smc_chip *chip,
>> +					     u32 ctl)
>> +{
>> +	ctl |= CONTROL_CE_STOP_ACTIVE_CONTROL;
>> +	writel(ctl, chip->ctl);
>> +
>> +	ctl &= ~CONTROL_CE_STOP_ACTIVE_CONTROL;
>> +	writel(ctl, chip->ctl);
>> +}
>> +
>> +static int aspeed_smc_dma_start(struct aspeed_smc_chip *chip,
>> +				u32 offset, void *buf, size_t length,
>> +				int is_write)
>> +{
>> +	struct aspeed_smc_controller *controller = chip->controller;
>> +	dma_addr_t dma_addr, flash_addr;
>> +	int ret = 0;
>> +
>> +	aspeed_smc_chip_configure(chip, is_write ? chip->ctl_val[smc_write] :
>> +		chip->ctl_val[smc_read]);
>> +
>> +	dev_dbg(chip->nor.dev, "DMA %s to=0x%08x len=0x%08x\n",
>> +		is_write ? "write" : "read", offset, length);
>> +
>> +	dma_addr = dma_map_single(chip->nor.dev, buf, length,
>> +				  (is_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
> 
> big mistake! Don't worry, your not the first one to fall into this trap
> and I did this mistake too in earlier versions of the Atmel QSPI
> controller driver (not mainlined) ;)
> 
> So please, let me explain:
> 
> You can use dma_map_single() if 'buf' is allocated with kmalloc() for
> instance because kmalloc'ed memory pages are guaranteed to be contiguous
> in physical space too (not only in virtual space).
> Also kmalloc'ed memory can be (un)mapped for DMA usages: when cleaning
> and/or invalidating the data cache lines, the cache aliasing issue
> should be avoided, hence it's safe.
> 
> On the other hand, vmalloc'ed memory pages are not guaranteed to be
> contiguous in physical space and often they aren't. So just for this
> only reason, you cannot use dma_map_single().
> 
> 
> More technical explanations but you can skip the following section if
> you want:
> 
> ####################################################################
> Then you could think of using dma_map_sg() instead. This is what the
> __spi_map_sg() / spi_map_buf() functions do in the SPI sub-system: in
> the case of vmalloc'ed memory, spi_map_buf() first build a
> scatter-gather list from the allocated pages before calling dma_map_sg().
> 
> However the vmalloc'ed memory area is not safe about the cache alias
> issue. Depending on model of the data cache it may be safe or not:
> 
> - PIPT (Physically Indexed / Physically Tagged): should be safe in any
> case (data cache of ARM Cortex A5 for instance).
> 
> - VIPT (Virtually Indexed / Physically Tagged): if the size of the data
> cache is "small" enough so the number of (index + offset) bits in the
> virtual address is less are equal to the number of offset bits for the
> smallest page size in the MMU (likely 12 bits for a 4KB MMU page size),
> the cache alias issue should be avoided. Otherwise this issue may occur
> so KO.
> 
> - VIVT (Virtually Indexed / Virtually Tagged): this is the worst case.
> Even the tag part comes from the virtual address so OK (ARM 926 I guess).
                                                 KO --^
> 
> e.g. for a 32bit address:
> 
> Depending on the cache model, the virtual and/or physical is used to
> extract the TAG, INDEX and OFFSET.
> 
> 31                         Y                   0
> +--------------------------+--------+----------+
> |           TAG            |  INDEX |   OFFSET |
> +--------------------------+--------+----------+
> 
> The INDEX part is used the select the right set in the N-way set
> associated cache. Then the TAG part is used to find the right cache line
> within the selected set.
> 
> For a minimum MMU page size of 4096 (== 2^12) bytes, if (Y < 12),
> whatever you use the physical of virtual address, the (INDEX + OFFSET)
> bits are the same. Then if the TAG comes from the physical address, no
> problem: even if a given physical page is mapped twice (or more) in the
> virtual address space through the MMU, all those virtual addresses are
> cached in the very same cache lines.
> So for a VIPT cache model, it depends on the geometry (cache size, cache
> line size, number of ways) of your cache.
> 
> SPI controller drivers should not rely on this "trick": it may not work
> later if the same controller IP is integrated in another SoC using a
> different core with a different cache model or different cache geometry.
> ####################################################################
> 
> 
> All that to say, other parts of the kernel, like the ubifs layer,
> allocates buffer with vmalloc() instead of kmalloc().
> So you may test your code programming a UBI file-system on your SPI
> flash memory and your kernel is likely to crash.
> 
> 
>> +
>> +	if (unlikely(dma_mapping_error(chip->nor.dev, dma_addr))) {
>> +		dev_err(chip->nor.dev, "Failed to dma_map_single()\n");
>> +		ret = -ENOMEM;
>> +		goto out;
>> +	}
>> +	flash_addr = chip->phys_base + offset;
>> +
>> +	controller->dma_length = length;
>> +	controller->dma_addr = dma_addr;
>> +	controller->flash_addr = flash_addr;
>> +
>> +	reinit_completion(&controller->dma_done);
>> +
>> +	writel(0, controller->regs + DMA_CONTROL_REG);
>> +	writel(DMA_ADDR(flash_addr), controller->regs +
>> +	       DMA_FLASH_BASE_REG);
>> +	writel(DMA_ADDR(dma_addr), controller->regs + DMA_DRAM_BASE_REG);
>> +	writel(DMA_LENGTH(length), controller->regs + DMA_LENGTH_REG);
>> +
>> +	writel(INTERRUPT_DMA_ENABLE,
>> +	       controller->regs + INTERRUPT_STATUS_REG);
>> +
>> +	writel(DMA_ENABLE | (is_write << 1),
>> +	       controller->regs + DMA_CONTROL_REG);
>> +
>> +	if (!wait_for_completion_timeout(&controller->dma_done,
>> +					 msecs_to_jiffies(dma_timeout))) {
>> +		dev_err(chip->nor.dev,
>> +			"DMA timeout addr@%.8x faddr@%.8x size=%x\n",
>> +			controller->dma_addr,
>> +			controller->flash_addr,
>> +			controller->dma_length);
>> +		ret = -ETIMEDOUT;
>> +		aspeed_smc_dma_done(controller);
>> +	}
>> +
>> +	dma_unmap_single(chip->nor.dev,
>> +			 controller->dma_addr, controller->dma_length,
>> +			 (is_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
>> +out:
>> +	aspeed_smc_chip_configure(chip, chip->ctl_val[smc_read]);
>> +	return ret;
>> +}
>> +
>>  /*
>>   * In user mode all data bytes read or written to the chip decode address
>>   * range are transferred to or from the SPI bus. The range is treated as a
>> @@ -366,12 +493,32 @@ static void aspeed_smc_send_cmd_addr(struct spi_nor *nor, u8 cmd, u32 addr)
>>  	}
>>  }
>>  
>> +/*
>> + * Try DMA transfer when size and alignment are correct. In case
>> + * of failure, just restart using the IO mode.
>> + */
>> +static int aspeed_smc_dma_check(struct aspeed_smc_chip *chip, loff_t off,
>> +				size_t len)
>> +{
>> +	return (IS_ALIGNED(off, 4) && IS_ALIGNED(len, 4) &&
>> +		len >= min_dma_size && chip->controller->dma_enabled &&
>> +		use_dma);
>> +}
>> +
> 
> vmalloc'ed memory or other high memory regions (not DMA capable) can
> still pass this check.
> 
> Best regards,
> 
> Cyrille
> 
>>  static ssize_t aspeed_smc_read_user(struct spi_nor *nor, loff_t from,
>>  				    size_t len, u_char *read_buf)
>>  {
>>  	struct aspeed_smc_chip *chip = nor->priv;
>>  	int i;
>>  	u8 dummy = 0xFF;
>> +	int ret;
>> +
>> +	if (aspeed_smc_dma_check(chip, from, len)) {
>> +		ret = aspeed_smc_dma_start(chip, from, read_buf, len, 0);
>> +		if (!ret)
>> +			goto out;
>> +		dev_err(chip->nor.dev, "DMA read failed: %d", ret);
>> +	}
>>  
>>  	aspeed_smc_start_user(nor);
>>  	aspeed_smc_send_cmd_addr(nor, nor->read_opcode, from);
>> @@ -380,6 +527,7 @@ static ssize_t aspeed_smc_read_user(struct spi_nor *nor, loff_t from,
>>  
>>  	aspeed_smc_read_from_ahb(read_buf, chip->ahb_base, len);
>>  	aspeed_smc_stop_user(nor);
>> +out:
>>  	return len;
>>  }
>>  
>> @@ -387,11 +535,21 @@ static ssize_t aspeed_smc_write_user(struct spi_nor *nor, loff_t to,
>>  				     size_t len, const u_char *write_buf)
>>  {
>>  	struct aspeed_smc_chip *chip = nor->priv;
>> +	int ret;
>> +
>> +	if (aspeed_smc_dma_check(chip, to, len)) {
>> +		ret = aspeed_smc_dma_start(chip, to, (void *)write_buf,
>> +					   len, 1);
>> +		if (!ret)
>> +			goto out;
>> +		dev_err(chip->nor.dev, "DMA write failed: %d", ret);
>> +	}
>>  
>>  	aspeed_smc_start_user(nor);
>>  	aspeed_smc_send_cmd_addr(nor, nor->program_opcode, to);
>>  	aspeed_smc_write_to_ahb(chip->ahb_base, write_buf, len);
>>  	aspeed_smc_stop_user(nor);
>> +out:
>>  	return len;
>>  }
>>  
>> @@ -527,6 +685,8 @@ static int aspeed_smc_chip_setup_init(struct aspeed_smc_chip *chip,
>>  		return -EINVAL;
>>  	}
>>  
>> +	chip->phys_base = res->start + (chip->ahb_base - controller->ahb_base);
>> +
>>  	/*
>>  	 * Get value of the inherited control register. U-Boot usually
>>  	 * does some timing calibration on the FMC chip, so it's good
>> @@ -597,6 +757,7 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
>>  	}
>>  
>>  	chip->ctl_val[smc_read] |= cmd |
>> +		chip->nor.read_opcode << CONTROL_COMMAND_SHIFT |
>>  		CONTROL_IO_DUMMY_SET(chip->nor.read_dummy / 8);
>>  
>>  	dev_dbg(controller->dev, "base control register: %08x\n",
>> @@ -695,6 +856,74 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
>>  	return ret;
>>  }
>>  
>> +static irqreturn_t aspeed_smc_irq(int irq, void *arg)
>> +{
>> +	struct aspeed_smc_controller *controller = arg;
>> +	struct device *dev = controller->dev;
>> +	irqreturn_t ret = IRQ_NONE;
>> +	u32 dma_ctl = readl(controller->regs + DMA_CONTROL_REG);
>> +	u32 status = readl(controller->regs + INTERRUPT_STATUS_REG);
>> +
>> +	dev_dbg(dev, "received IRQ. status: %x", status);
>> +
>> +	if (!(status & INTERRUPT_DMA_ENABLE) || !(dma_ctl & DMA_ENABLE)) {
>> +		dev_err(dev, "No DMA. bad IRQ status: %x", status);
>> +		goto out;
>> +	}
>> +
>> +	if (!(status & INTERRUPT_DMA_STATUS)) {
>> +		dev_err(dev, "DMA still in progress. length %d\n",
>> +			readl(controller->regs + DMA_LENGTH_REG));
>> +		goto out;
>> +	}
>> +
>> +	ret = IRQ_HANDLED;
>> +	aspeed_smc_dma_done(controller);
>> +	complete(&controller->dma_done);
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>> +static int aspeed_smc_config_irq(struct aspeed_smc_controller *controller,
>> +				 struct platform_device *pdev)
>> +{
>> +	struct device *dev = &pdev->dev;
>> +	int rc;
>> +
>> +	controller->irq = platform_get_irq(pdev, 0);
>> +	if (!controller->irq)
>> +		return -ENODEV;
>> +
>> +	rc = devm_request_irq(dev, controller->irq, aspeed_smc_irq, IRQF_SHARED,
>> +			      DEVICE_NAME, controller);
>> +	if (rc < 0) {
>> +		dev_warn(dev, "Unable to request IRQ %d\n", controller->irq);
>> +		controller->irq = 0;
>> +		return rc;
>> +	}
>> +
>> +	dev_info(dev, "Using IRQ %d\n", controller->irq);
>> +	return 0;
>> +}
>> +
>> +static void aspeed_smc_dma_setup(struct aspeed_smc_controller *controller,
>> +				 struct platform_device *pdev)
>> +{
>> +	const struct aspeed_smc_info *info = controller->info;
>> +
>> +	init_completion(&controller->dma_done);
>> +
>> +	controller->dma_enabled = false;
>> +	if (info->has_dma)
>> +		controller->dma_enabled = !aspeed_smc_config_irq(controller,
>> +								 pdev);
>> +
>> +	if (controller->dma_enabled)
>> +		dev_info(controller->dev, "DMA support %s.\n",
>> +			 use_dma ? "enabled" : "disabled");
>> +}
>> +
>>  static int aspeed_smc_probe(struct platform_device *pdev)
>>  {
>>  	struct device_node *np = pdev->dev.of_node;
>> @@ -730,6 +959,8 @@ static int aspeed_smc_probe(struct platform_device *pdev)
>>  	if (IS_ERR(controller->ahb_base))
>>  		return PTR_ERR(controller->ahb_base);
>>  
>> +	aspeed_smc_dma_setup(controller, pdev);
>> +
>>  	ret = aspeed_smc_setup_flash(controller, np, res);
>>  	if (ret)
>>  		dev_err(dev, "Aspeed SMC probe failed %d\n", ret);
>>
> 




More information about the linux-mtd mailing list