[PATCH 3/4] soc: visconti: Add Toshiba Visconti DNN image processing accelerator
Yuji Ishikawa
yuji2.ishikawa at toshiba.co.jp
Thu Apr 28 06:11:27 PDT 2022
Add support to DNN image processing accelerator on Toshiba Visconti ARM SoCs.
The accelerator is applicable to DNN inference tasks.
Signed-off-by: Yuji Ishikawa <yuji2.ishikawa at toshiba.co.jp>
Reviewed-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu at toshiba.co.jp>
---
drivers/soc/visconti/Kconfig | 6 +
drivers/soc/visconti/Makefile | 2 +
drivers/soc/visconti/dnn/Makefile | 6 +
drivers/soc/visconti/dnn/dnn.c | 533 +++++++++++++++++++++++++
drivers/soc/visconti/dnn/hwd_dnn.c | 183 +++++++++
drivers/soc/visconti/dnn/hwd_dnn.h | 68 ++++
drivers/soc/visconti/dnn/hwd_dnn_reg.h | 228 +++++++++++
drivers/soc/visconti/uapi/dnn.h | 77 ++++
8 files changed, 1103 insertions(+)
create mode 100644 drivers/soc/visconti/dnn/Makefile
create mode 100644 drivers/soc/visconti/dnn/dnn.c
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.c
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.h
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn_reg.h
create mode 100644 drivers/soc/visconti/uapi/dnn.h
diff --git a/drivers/soc/visconti/Kconfig b/drivers/soc/visconti/Kconfig
index 8b1378917..a25287d0c 100644
--- a/drivers/soc/visconti/Kconfig
+++ b/drivers/soc/visconti/Kconfig
@@ -1 +1,7 @@
+if ARCH_VISCONTI
+
+config VISCONTI_DNN
+ bool "Visconti DNN driver"
+
+endif
diff --git a/drivers/soc/visconti/Makefile b/drivers/soc/visconti/Makefile
index 8d710da08..b9bd0f7e2 100644
--- a/drivers/soc/visconti/Makefile
+++ b/drivers/soc/visconti/Makefile
@@ -4,3 +4,5 @@
#
obj-y += ipa_common.o
+
+obj-$(CONFIG_VISCONTI_DNN) += dnn/
diff --git a/drivers/soc/visconti/dnn/Makefile b/drivers/soc/visconti/dnn/Makefile
new file mode 100644
index 000000000..52d57b60d
--- /dev/null
+++ b/drivers/soc/visconti/dnn/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Visconti DNN driver
+#
+
+obj-y += dnn.o hwd_dnn.o
diff --git a/drivers/soc/visconti/dnn/dnn.c b/drivers/soc/visconti/dnn/dnn.c
new file mode 100644
index 000000000..aeff92037
--- /dev/null
+++ b/drivers/soc/visconti/dnn/dnn.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+
+#include "../ipa_common.h"
+#include "../uapi/dnn.h"
+#include "hwd_dnn.h"
+
+struct dnn_priv {
+ struct device *dev;
+ struct miscdevice miscdev;
+ struct mutex lock;
+ void __iomem *regs;
+ int irq;
+ wait_queue_head_t waitq;
+ enum drv_ipa_state status;
+ unsigned int poll_event;
+ int id;
+ char name[16];
+ bool dma_coherent;
+ struct hwd_dnn_status hwd_status;
+
+ u32 *list_vaddr[DRV_DNN_BASE_ADDR_NUM];
+ dma_addr_t list_paddr[DRV_DNN_BASE_ADDR_NUM];
+ unsigned int err_flags[HWD_DNN_EER_FLAG_NUM];
+
+ struct dma_buf_attachment *dba[DRV_DNN_BUFFER_INDEX_MAX];
+ struct sg_table *sgt[DRV_DNN_BUFFER_INDEX_MAX];
+ enum dma_data_direction dma_dir[DRV_DNN_BUFFER_INDEX_MAX];
+ unsigned int dma_count;
+
+ dma_addr_t buffer_iova[DRV_DNN_BUFFER_INDEX_MAX];
+
+ struct drv_ipa_addr temp_list[HWD_DNN_LIST_NUM_MAX];
+};
+
+static uint32_t dnn_ipa_addr_to_iova(struct dnn_priv *priv, struct drv_ipa_addr addr)
+{
+ u32 iova = 0;
+
+ if (addr.buffer_index < priv->dma_count &&
+ addr.offset < priv->dba[addr.buffer_index]->dmabuf->size)
+ iova = priv->buffer_iova[addr.buffer_index] + addr.offset;
+ return iova;
+}
+
+static int dnn_attach_dma_buf(struct dnn_priv *priv, unsigned int buffer_index,
+ struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX])
+{
+ int ret = 0;
+ dma_addr_t addr;
+
+ if (buffer_index >= DRV_DNN_BUFFER_INDEX_MAX) {
+ dev_err(priv->dev, "Buffer index invalid: index=%d\n", buffer_index);
+ return -EINVAL;
+ }
+
+ switch (buffer_info[buffer_index].direction) {
+ case DRV_IPA_DIR_NONE:
+ priv->dma_dir[priv->dma_count] = DMA_NONE;
+ break;
+ case DRV_IPA_DIR_TO_DEVICE:
+ priv->dma_dir[priv->dma_count] = DMA_TO_DEVICE;
+ break;
+ case DRV_IPA_DIR_FROM_DEVICE:
+ priv->dma_dir[priv->dma_count] = DMA_FROM_DEVICE;
+ break;
+ case DRV_IPA_DIR_BIDIRECTION:
+ priv->dma_dir[priv->dma_count] = DMA_BIDIRECTIONAL;
+ break;
+ default:
+ dev_err(priv->dev, "DMA direction invalid: index=%d dir=%d\n", buffer_index,
+ buffer_info[buffer_index].direction);
+ return -EINVAL;
+ }
+
+ if (!buffer_info[buffer_index].coherent) {
+ priv->dev->dma_coherent = false;
+ if (priv->dma_coherent)
+ priv->dma_dir[priv->dma_count] = DMA_NONE;
+ }
+
+ ret = ipa_attach_dmabuf(priv->dev, buffer_info[buffer_index].fd,
+ &priv->dba[priv->dma_count], &priv->sgt[priv->dma_count], &addr,
+ priv->dma_dir[priv->dma_count]);
+ if (ret == 0) {
+ priv->dma_count++;
+ priv->buffer_iova[buffer_index] = addr;
+ }
+
+ priv->dev->dma_coherent = priv->dma_coherent;
+
+ return ret;
+}
+
+static void dnn_detach_dma_buf(struct dnn_priv *priv)
+{
+ struct dma_buf *dmabuf;
+ int i;
+
+ for (i = 0; i < priv->dma_count; i++) {
+ dmabuf = priv->dba[i]->dmabuf;
+ dma_buf_unmap_attachment(priv->dba[i], priv->sgt[i], priv->dma_dir[i]);
+ dma_buf_detach(dmabuf, priv->dba[i]);
+ dma_buf_put(dmabuf);
+ }
+}
+
+static irqreturn_t dnn_irq(int irq, void *dev_id)
+{
+ struct dnn_priv *priv = dev_id;
+ unsigned int event;
+
+ event = hwd_dnn_irq_handler(priv->id);
+
+ if (event & HWD_DNN_EVENT_EXEC_DONE) {
+ disable_irq_nosync(priv->irq);
+ return IRQ_WAKE_THREAD;
+ }
+
+ return IRQ_NONE;
+}
+
+static irqreturn_t dnn_irq_thread(int irq, void *dev_id)
+{
+ struct dnn_priv *priv = dev_id;
+ unsigned long delay = 1;
+
+ mutex_lock(&priv->lock);
+ dnn_detach_dma_buf(priv);
+
+ hwd_dnn_get_status(priv->id, &priv->hwd_status);
+
+ priv->status = DRV_IPA_STATE_IDLE;
+
+ /* status should be updated before poll_event so that
+ * when poll() returns, user context must observe state as idle
+ */
+ smp_wmb();
+
+ if (priv->hwd_status.eer)
+ priv->poll_event = IPA_POLL_EVENT_ERROR;
+ else
+ priv->poll_event = IPA_POLL_EVENT_DONE;
+
+ /* General barrier to avoid re-ordering of priv->poll_event=N and
+ * waitqueue_active()
+ */
+ smp_mb();
+
+ /* Threads going to sleep in poll() can miss wakeup, when wakeup is done
+ * between event check in ipa_poll() and sleeping. Wakeup repeatedly.
+ */
+ while (waitqueue_active(&priv->waitq)) {
+ wake_up_interruptible(&priv->waitq);
+
+ WARN_ON(delay > IPA_WAKEUP_RETRY_DELAY);
+ usleep_range(delay, delay + 1);
+ delay += delay;
+ }
+
+ mutex_unlock(&priv->lock);
+
+ return IRQ_HANDLED;
+}
+
+static void dnn_start(struct dnn_priv *priv, struct hwd_dnn_descriptor *desc)
+{
+ hwd_dnn_start(priv->id, desc);
+}
+
+static int dnn_parse_entry_addr(struct dnn_priv *priv, int idx, struct drv_dnn_descriptor *desc,
+ struct hwd_dnn_descriptor *hwd_desc)
+{
+ dma_addr_t addr;
+ struct drv_ipa_addr ipa_addr;
+ int j, ret = 0;
+
+ switch (desc->base_addr[idx].purpose) {
+ case DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT:
+ case DRV_DNN_BASE_ADDR_PURPOSE_INPUT:
+ if (copy_from_user(priv->temp_list,
+ (void __user *)desc->base_addr[idx].addr.list_addr,
+ sizeof(struct drv_ipa_addr) * (desc->list_num + 1))) {
+ dev_err(priv->dev, "IPA address to iova conversion error: %d\n", __LINE__);
+ ret = -EFAULT;
+ break;
+ }
+
+ hwd_desc->base_addr[idx] = priv->list_paddr[idx];
+ for (j = 0; j < desc->list_num + 1; j++) {
+ ipa_addr = priv->temp_list[j];
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev,
+ "@LIST@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ dev_dbg(priv->dev, "@LIST@ %d: fd=%d %llx %x\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd,
+ (u64)hwd_desc->base_addr[idx], (u32)addr);
+ priv->list_vaddr[idx][j] = addr;
+ }
+ break;
+ case DRV_DNN_BASE_ADDR_PURPOSE_AWB:
+ ipa_addr = desc->base_addr[idx].addr.ipa_addr;
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev, "@AWB@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ hwd_desc->base_addr[idx] = addr;
+ dev_dbg(priv->dev, "@AWB@ %d: fd=%d %llx\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]);
+ break;
+ case DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY:
+ ipa_addr = desc->base_addr[idx].addr.ipa_addr;
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev, "@TEMP@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ hwd_desc->base_addr[idx] = addr;
+ dev_dbg(priv->dev, "@TEMP@ %d: fd=%d %llx\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]);
+ break;
+ default:
+ hwd_desc->base_addr[idx] = 0;
+ break;
+ }
+ return ret;
+}
+
+static int dnn_ioctl_start(struct dnn_priv *priv, unsigned long arg)
+{
+ struct drv_dnn_descriptor desc;
+ struct hwd_dnn_descriptor hwd_desc;
+ int i, ret = 0;
+
+ ret = mutex_lock_interruptible(&priv->lock);
+ if (ret)
+ return ret;
+
+ if (priv->status == DRV_IPA_STATE_BUSY) {
+ dev_dbg(priv->dev, "busy: %d\n", priv->status);
+ mutex_unlock(&priv->lock);
+ return -EBUSY;
+ }
+
+ if (copy_from_user(&desc, (void __user *)arg, sizeof(struct drv_dnn_descriptor))) {
+ dev_err(priv->dev, "Descriptor memory access error\n");
+ ret = -EFAULT;
+ goto err1;
+ }
+
+ if (DRV_DNN_BIT_CONFIG_DESC_FINAL != (desc.config_done & DRV_DNN_BIT_CONFIG_DESC_FINAL)) {
+ dev_err(priv->dev, "Descriptor configuration not complete\n");
+ ret = -EINVAL;
+ goto err1;
+ }
+
+ priv->dma_count = 0;
+
+ /* setup buffer */
+ for (i = 0; i < desc.buffer_info_num; i++) {
+ ret = dnn_attach_dma_buf(priv, i, desc.buffer_info);
+ if (ret) {
+ dev_err(priv->dev, "dma buf attach error: index=%d\n", i);
+ goto err2;
+ }
+ dev_dbg(priv->dev, "@buffer[%d]@: fd=%d %s iova=%llx\n", i, desc.buffer_info[i].fd,
+ desc.buffer_info[i].coherent ? "coherent" : "non-coherent",
+ (uint64_t)priv->buffer_iova[i]);
+ }
+
+ /* get iova address of configuration area */
+ hwd_desc.configuration = dnn_ipa_addr_to_iova(priv, desc.configuration);
+ if (hwd_desc.configuration == 0) {
+ dev_err(priv->dev, "Invalid IPA Address: configuration %s: %d\n", __func__,
+ __LINE__);
+ ret = -EINVAL;
+ goto err2;
+ }
+
+ dev_dbg(priv->dev, "@config@: fd=%d %llx\n",
+ desc.buffer_info[desc.configuration.buffer_index].fd,
+ (uint64_t)hwd_desc.configuration);
+
+ for (i = 0; i < DRV_DNN_BASE_ADDR_NUM; i++) {
+ ret = dnn_parse_entry_addr(priv, i, &desc, &hwd_desc);
+ if (ret)
+ goto err2;
+ }
+
+ hwd_desc.configuration_size = desc.configuration_size;
+ hwd_desc.list_num = desc.list_num;
+ hwd_desc.eer_flags_addr = priv->err_flags;
+ hwd_desc.base_addr_flag = desc.base_addr_flag;
+ hwd_desc.config_done = desc.config_done;
+
+ hwd_desc.configuration += desc.configuration_offset;
+
+ dnn_start(priv, &hwd_desc);
+
+ priv->poll_event = IPA_POLL_EVENT_NONE;
+ priv->status = DRV_IPA_STATE_BUSY;
+ enable_irq(priv->irq);
+
+ mutex_unlock(&priv->lock);
+
+ return ret;
+
+err2:
+ dnn_detach_dma_buf(priv);
+err1:
+ mutex_unlock(&priv->lock);
+ return ret;
+}
+
+static int dnn_ioctl_get_status(struct dnn_priv *priv, unsigned long arg)
+{
+ struct drv_dnn_status status;
+ struct hwd_dnn_status hwd_status;
+ int ret = 0, i;
+
+ ret = mutex_lock_interruptible(&priv->lock);
+ if (ret)
+ return ret;
+
+ if (priv->status == DRV_IPA_STATE_BUSY)
+ hwd_dnn_get_status(priv->id, &hwd_status);
+ else
+ hwd_status = priv->hwd_status;
+
+ status.state = priv->status;
+ mutex_unlock(&priv->lock);
+
+ status.eer_cmd = hwd_status.eer_cmd;
+ status.eer = hwd_status.eer;
+ for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++)
+ status.eer_flags[i] = priv->err_flags[i];
+
+ if (copy_to_user((void __user *)arg, &status, sizeof(struct drv_dnn_status))) {
+ dev_err(priv->dev, "status memory access error\n");
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+static long dnn_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev);
+ int ret = 0;
+
+ switch (cmd) {
+ case IOC_IPA_START:
+ ret = dnn_ioctl_start(priv, arg);
+ break;
+ case IOC_IPA_GET_STATUS:
+ ret = dnn_ioctl_get_status(priv, arg);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+static __poll_t dnn_poll(struct file *fp, poll_table *wait)
+{
+ struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev);
+ __poll_t mask = 0;
+ unsigned int poll_event;
+
+ poll_wait(fp, &priv->waitq, wait);
+
+ /* Barrier to avoid re-ordering of poll_wait() and event load
+ * Read barrier here and release barrier in poll_wait() together will
+ * prevent re-ordering
+ */
+ smp_rmb();
+ poll_event = priv->poll_event;
+ if (poll_event != IPA_POLL_EVENT_NONE) {
+ mask = EPOLLIN | EPOLLRDNORM;
+ if (poll_event == IPA_POLL_EVENT_ERROR)
+ mask |= EPOLLERR;
+ }
+ return mask;
+}
+
+static const struct file_operations dnn_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = dnn_ioctl,
+ .poll = dnn_poll,
+};
+
+static int dnn_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct dnn_priv *priv;
+ int i, ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ mutex_init(&priv->lock);
+
+ /* update DMA mask */
+ priv->dma_coherent = dev->dma_coherent;
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
+ if (ret)
+ return ret;
+
+ priv->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->regs))
+ return PTR_ERR(priv->regs);
+
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq < 0)
+ return priv->irq;
+
+ ret = devm_request_threaded_irq(dev, priv->irq, dnn_irq, dnn_irq_thread, 0, "dnn", priv);
+ if (ret) {
+ dev_err(dev, "irq request failed\n");
+ return ret;
+ }
+ disable_irq(priv->irq);
+
+ ret = of_property_read_u32(dev->of_node, "index", &priv->id);
+ if (ret) {
+ dev_err(dev, "failed to acquire irq resource\n");
+ return ret;
+ }
+
+ /*
+ * allocate uncached-memory to hold address-list for DNN operation
+ * to hold ptr to 32bit addr memory block
+ * for each base address (upto HWD_DNN_BASE_ADDR_NUM)
+ * for each iteration (up to HWD_DNN_LIST_NUM_MAX)
+ * uint32_t list_addr [HWD_DNN_BASE_ADDR_NUM] [HWD_DNN_LIST_NUM_MAX];
+ */
+ priv->list_vaddr[0] =
+ dma_alloc_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ &priv->list_paddr[0], GFP_KERNEL);
+ if (!priv->list_vaddr[0]) {
+ dev_err(dev, "dma_alloc_wc failed\n");
+ return -ENOMEM;
+ }
+ for (i = 1; i < HWD_DNN_BASE_ADDR_NUM; i++) {
+ priv->list_vaddr[i] = priv->list_vaddr[0] + HWD_DNN_LIST_NUM_MAX * i;
+ priv->list_paddr[i] = priv->list_paddr[0] + sizeof(u32) * HWD_DNN_LIST_NUM_MAX * i;
+ }
+
+ hwd_dnn_initialize(priv->id, priv->regs);
+
+ snprintf(priv->name, sizeof(priv->name), "dnn%d", priv->id);
+ priv->miscdev.minor = MISC_DYNAMIC_MINOR;
+ priv->miscdev.name = priv->name;
+ priv->miscdev.fops = &dnn_fops;
+ ret = misc_register(&priv->miscdev);
+ if (ret) {
+ dev_err(dev, "misc registration failed\n");
+ hwd_dnn_uninitialize(priv->id);
+ dma_free_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ priv->list_vaddr[0], priv->list_paddr[0]);
+ return ret;
+ }
+
+ priv->dev = dev;
+ platform_set_drvdata(pdev, priv);
+
+ init_waitqueue_head(&priv->waitq);
+
+ priv->status = DRV_IPA_STATE_IDLE;
+ return 0;
+}
+
+static int dnn_remove(struct platform_device *pdev)
+{
+ struct dnn_priv *priv = platform_get_drvdata(pdev);
+
+ misc_deregister(&priv->miscdev);
+ hwd_dnn_uninitialize(priv->id);
+ dma_free_wc(&pdev->dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ priv->list_vaddr[0], priv->list_paddr[0]);
+ return 0;
+}
+
+static const struct of_device_id dnn_of_match[] = {
+ {
+ .compatible = "toshiba,visconti-dnn",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(platform, dnn_of_match);
+
+static struct platform_driver dnn_driver = {
+ .probe = dnn_probe,
+ .remove = dnn_remove,
+ .driver = {
+ .name = "visconti_dnn",
+ .of_match_table = of_match_ptr(dnn_of_match),
+ },
+};
+module_platform_driver(dnn_driver);
+
+MODULE_AUTHOR("Yuji Ishikawa <yuji2.ishikawa at toshiba.co.jp>");
+MODULE_DESCRIPTION("Toshiba Visconti DNN driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/soc/visconti/dnn/hwd_dnn.c b/drivers/soc/visconti/dnn/hwd_dnn.c
new file mode 100644
index 000000000..832040329
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#include "hwd_dnn.h"
+#include "hwd_dnn_reg.h"
+
+#define HWD_DNN_INT_END BIT(0)
+#define HWD_DNN_INT_EER BIT(24)
+#define HWD_DNN_INT_UNUSED (0x003f0100U)
+#define HWD_DNN_INT_MASK_CONFIG (HWD_DNN_INT_UNUSED | HWD_DNN_INT_EER)
+#define HWD_DNN_INT_CLEAR_ALL (HWD_DNN_INT_END | HWD_DNN_INT_EER)
+#define HWD_DNN_EXE_CONFIG_MODE (0x3U)
+
+#define HWD_DNN_CGATE_ORD_CONFIG BIT(17)
+
+/**
+ * struct hwd_dnn_resources - HWD driver internal resource structure
+ *
+ * @reg: control register
+ * @status: driver status
+ * @eer_flags_addr: Address of execution error flags
+ */
+struct hwd_dnn_resources {
+ struct hwd_dnn_reg *reg;
+ struct hwd_dnn_status status;
+ u32 *eer_flags_addr;
+};
+
+/* HWD driver internal resource */
+static struct hwd_dnn_resources hwd_dnn_resources[HWD_DNN_DEVICE_MAX] = {};
+
+/**
+ * hwd_dnn_initialize() - Initialize DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @vaddr: register base virtual address
+ * Return: 0 operation completed successfully
+ */
+void hwd_dnn_initialize(u32 module_id, void *vaddr)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ /* Initialize the device */
+ res->reg = (struct hwd_dnn_reg *)vaddr;
+}
+
+/**
+ * hwd_dnn_uninitialize() - Uninitialize DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ */
+void hwd_dnn_uninitialize(u32 module_id)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ res->reg = NULL;
+}
+
+/**
+ * hwd_dnn_start() - Start DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @desc: Pointer to descriptor structure
+ */
+void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+ struct hwd_dnn_reg *reg;
+ u32 base_addr_flag;
+ int i;
+
+ reg = res->reg;
+
+ /* Configure the registers with user provided values */
+ writel(desc->configuration & GENMASK(31, 0), ®->cfg_addr);
+ writel(desc->configuration_size, ®->cfg_size);
+ writel(desc->list_num, ®->cfg_rpt);
+
+ base_addr_flag = desc->base_addr_flag;
+ for (i = 0; i < HWD_DNN_BASE_ADDR_NUM; i++) {
+ if (base_addr_flag & BIT(i)) {
+ /* 1: address list is specified */
+ writel(0U, ®->base_addr[i]);
+ writel(desc->base_addr[i] & GENMASK(31, 0), ®->base_list[i]);
+ } else {
+ /* 0: region address is specified */
+ writel(desc->base_addr[i] & GENMASK(31, 0), ®->base_addr[i]);
+ writel(0U, ®->base_list[i]);
+ }
+ }
+
+ writel(base_addr_flag, ®->base_list_cfg);
+
+ /* clear and unmask interrupts */
+ writel(HWD_DNN_INT_CLEAR_ALL, ®->intstat);
+ writel(HWD_DNN_INT_MASK_CONFIG, ®->intmask);
+
+ res->status.eer_cmd = 0;
+ res->status.eer = 0;
+ res->eer_flags_addr = desc->eer_flags_addr;
+
+ /*Invalidate load command in Clock Gating Control Register */
+ writel(HWD_DNN_CGATE_ORD_CONFIG, ®->cgate_ord);
+
+ /* Sync memory barrier */
+ dsb(st);
+
+ /* Start the device */
+ writel(HWD_DNN_EXE_CONFIG_MODE, ®->exe);
+}
+
+/**
+ * hwd_dnn_irq_handler() - HWD DNN interrupt handler
+ *
+ * @module_id: id of the h/w module
+ * Return: HWD_DNN_EVENT_EXEC_DONE Bit field saying Configuration processing is completed
+ */
+u32 hwd_dnn_irq_handler(u32 module_id)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+ struct hwd_dnn_reg *reg;
+ u32 event = 0;
+ u32 int_stat;
+ u32 cause;
+ int i;
+
+ reg = res->reg;
+
+ /* Read the interrupt causes */
+ cause = readl(®->mask_intstat);
+
+ /* Read the interrupt status register */
+ int_stat = readl(®->intstat);
+
+ /*
+ * Read the error command.
+ * Clearing DNN_INTSTAT.EER interrupt cause initializes the error command register.
+ * Read error command before clearing the interrupt cause.
+ */
+ res->status.eer_cmd = readl(®->int_eer_cmd);
+
+ /* Clear the interrupt causes */
+ writel(cause, ®->intstat);
+
+ if (cause & HWD_DNN_INT_END) {
+ /* Get EER flags, if requestd */
+ if (res->eer_flags_addr)
+ for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++)
+ res->eer_flags_addr[i] = readl(®->eer_flag[i]);
+
+ event = HWD_DNN_EVENT_EXEC_DONE;
+ }
+
+ if (int_stat & HWD_DNN_INT_EER) {
+ /* Operation error occurred */
+ res->status.eer = 1;
+ }
+
+ return event;
+}
+
+/**
+ * hwd_dnn_get_status() - HWD DNN Get Status
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @status: Pointer to status structure
+ */
+void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status)
+{
+ const struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ /* Update device status */
+ status->eer_cmd = res->status.eer_cmd;
+ status->eer = res->status.eer;
+}
diff --git a/drivers/soc/visconti/dnn/hwd_dnn.h b/drivers/soc/visconti/dnn/hwd_dnn.h
new file mode 100644
index 000000000..84425990c
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef HWD_DNN_H
+#define HWD_DNN_H
+
+/**
+ * enum hwd_dnn_device_id - device ids for available DNN Accelerators
+ */
+enum hwd_dnn_device_id { HWD_DNN_DEVICE_0 = 0, HWD_DNN_DEVICE_1 = 1, HWD_DNN_DEVICE_MAX = 2 };
+
+/* DNN events returned by irq handler */
+#define HWD_DNN_EVENT_EXEC_DONE (1U)
+
+/* number of base address available for DNN */
+#define HWD_DNN_BASE_ADDR_NUM (8U)
+
+/* number of error flag registers */
+#define HWD_DNN_EER_FLAG_NUM (32U)
+
+/* DNN hardware can execute 1024 iterations for different dataset at a time */
+#define HWD_DNN_LIST_NUM_MAX (1024U)
+
+/**
+ * struct hwd_dnn_status - DNN HWD Driver Status
+ *
+ * @eer_cmd: Error Command
+ * @eer: Execution Error
+ * @reserved: Padding
+ */
+struct hwd_dnn_status {
+ u32 eer_cmd;
+ u32 eer : 1;
+ u32 reserved : 31;
+};
+
+/**
+ * struct hwd_dnn_descriptor - HWD DNN Descriptor
+ *
+ * @configuration: Configuration data
+ * @configuration_size: Configuration data size
+ * @list_num: Number of input/output list
+ * @base_addr: Base Address
+ * @eer_flags_addr: Address of storing execution error flags
+ * @base_addr_flag: Bit-fields of Base Address list config. If 1, address list. If 0, fixed address
+ * @config_done: Flags of called configuration
+ */
+struct hwd_dnn_descriptor {
+ u32 configuration;
+ u32 configuration_size;
+ u32 list_num;
+ u32 base_addr[HWD_DNN_BASE_ADDR_NUM];
+ u32 *eer_flags_addr;
+ u32 base_addr_flag;
+ u16 config_done;
+};
+
+void hwd_dnn_initialize(u32 module_id, void *vaddr);
+void hwd_dnn_uninitialize(u32 module_id);
+void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc);
+u32 hwd_dnn_irq_handler(u32 module_id);
+void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status);
+
+#endif /* HWD_DNN_H */
diff --git a/drivers/soc/visconti/dnn/hwd_dnn_reg.h b/drivers/soc/visconti/dnn/hwd_dnn_reg.h
new file mode 100644
index 000000000..c1271e092
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn_reg.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef HWD_DNN_REG_H
+#define HWD_DNN_REG_H
+
+/**
+ * struct hwd_dnn_reg - DNN register address struct
+ */
+struct hwd_dnn_reg {
+ u32 version;
+ u32 exe;
+ u32 reserved_a_1;
+ u32 intstat;
+ u32 intmask;
+ u32 mask_intstat;
+ u32 reserved_a_2;
+ u32 int_eer_cmd;
+ u32 reserved_b_1[8];
+ u32 cfg_cid;
+ u32 cfg_cnm;
+ u32 reserved_b_2[10];
+ u32 reserved_a_3;
+ u32 cgate_ord;
+ u32 reserved_b_3[2];
+ u32 eer_flag[32];
+ u32 cfg_addr;
+ u32 cfg_size;
+ u32 cfg_rpt;
+ u32 reserved_b_4[1];
+ u32 base_list_cfg;
+ u32 reserved_b_5[11];
+ u32 base_addr[8];
+ u32 reserved_b_6[24];
+ u32 base_list[8];
+ u32 reserved_b_7[8];
+ u32 cmd_load;
+ u32 ld_format;
+ u32 reserved_b_8[14];
+ u32 ld_g_size0;
+ u32 ld_g_size1;
+ u32 reserved_b_9[2];
+ u32 reserved_a_4;
+ u32 reserved_b_10[1];
+ u32 ld_g_base_id;
+ u32 ld_g_offset;
+ u32 ld_g_lptch;
+ u32 ld_g_cptch;
+ u32 ld_g_vptch;
+ u32 reserved_b_11[5];
+ u32 ld_l_size0;
+ u32 ld_l_size1;
+ u32 reserved_b_12[2];
+ u32 ld_l_addr;
+ u32 reserved_b_13[3];
+ u32 ld_l_lptch;
+ u32 ld_l_cptch;
+ u32 ld_l_vptch;
+ u32 reserved_b_14[17];
+ u32 reserved_a_5;
+ u32 reserved_b_15[3];
+ u32 cmd_store;
+ u32 st_format;
+ u32 reserved_b_16[14];
+ u32 st_g_size0;
+ u32 st_g_size1;
+ u32 reserved_b_17[2];
+ u32 reserved_a_6;
+ u32 reserved_b_18[1];
+ u32 st_g_base_id;
+ u32 st_g_offset;
+ u32 st_g_lptch;
+ u32 st_g_cptch;
+ u32 st_g_vptch;
+ u32 reserved_b_19[5];
+ u32 st_l_size0;
+ u32 st_l_size1;
+ u32 reserved_b_20[2];
+ u32 st_l_addr;
+ u32 reserved_b_21[3];
+ u32 st_l_lptch;
+ u32 st_l_cptch;
+ u32 st_l_vptch;
+ u32 reserved_b_22[17];
+ u32 reserved_a_7;
+ u32 reserved_b_23[3];
+ u32 cmd_copy;
+ u32 reserved_b_24[15];
+ u32 cp_src_size0;
+ u32 cp_src_size1;
+ u32 reserved_b_25[2];
+ u32 cp_src_addr;
+ u32 reserved_b_26[3];
+ u32 cp_src_lptch;
+ u32 cp_src_cptch;
+ u32 reserved_a_8;
+ u32 reserved_b_27[5];
+ u32 cp_dst_size0;
+ u32 cp_dst_size1;
+ u32 reserved_b_28[2];
+ u32 cp_dst_addr;
+ u32 reserved_b_29[3];
+ u32 cp_dst_lptch;
+ u32 cp_dst_cptch;
+ u32 reserved_a_9;
+ u32 reserved_b_30[17];
+ u32 reserved_a_10;
+ u32 reserved_b_31[3];
+ u32 cmd_fill;
+ u32 reserved_b_32[1];
+ u32 fl_cfg;
+ u32 reserved_b_33[13];
+ u32 fl_src_size0;
+ u32 fl_src_size1;
+ u32 reserved_b_34[2];
+ u32 fl_src_addr;
+ u32 reserved_b_35[3];
+ u32 fl_src_lptch;
+ u32 fl_src_cptch;
+ u32 reserved_b_36[6];
+ u32 fl_dst_size0;
+ u32 fl_dst_size1;
+ u32 reserved_b_37[2];
+ u32 fl_dst_addr;
+ u32 reserved_b_38[3];
+ u32 fl_dst_lptch;
+ u32 fl_dst_cptch;
+ u32 reserved_b_39[22];
+ u32 cmd_sync;
+ u32 reserved_b_40[1];
+ u32 cmd_srst;
+ u32 reserved_b_41[1];
+ u32 cmd_halt;
+ u32 reserved_b_42[3];
+ u32 cmd_finish;
+ u32 reserved_b_43[119];
+ u32 cmd_exe;
+ u32 exe_mode;
+ u32 reserved_b_44[14];
+ u32 exe_inf_size0;
+ u32 exe_inf_size1;
+ u32 reserved_b_45[2];
+ u32 exe_inf_addr;
+ u32 reserved_b_46[3];
+ u32 exe_inf_lptch;
+ u32 exe_inf_cptch;
+ u32 reserved_b_47[26];
+ u32 exe_conv_bias_addr;
+ u32 reserved_b_48[27];
+ u32 exe_conv_wt_size0;
+ u32 exe_conv_wt_size1;
+ u32 reserved_b_49[2];
+ u32 exe_conv_wt_addr;
+ u32 reserved_b_50[3];
+ u32 exe_conv_wt_lptch;
+ u32 exe_conv_wt_cptch;
+ u32 exe_conv_wt_vptch;
+ u32 reserved_b_51[9];
+ u32 exe_conv_acv_addr;
+ u32 reserved_b_52[3];
+ u32 exe_conv_acv_lptch;
+ u32 exe_conv_acv_cptch;
+ u32 reserved_b_53[18];
+ u32 exe_conv_out;
+ u32 reserved_b_54[11];
+ u32 actv_tbl_range;
+ u32 reserved_a_11;
+ u32 actv_tbl_prm;
+ u32 reserved_b_55[5];
+ u32 exe_pool_size;
+ u32 reserved_b_56[43];
+ u32 exe_pool_out;
+ u32 exe_pool_pe_crop;
+ u32 reserved_b_57[10];
+ u32 exe_norm_g_prm;
+ u32 reserved_b_58[15];
+ u32 exe_sav_max_prm;
+ u32 reserved_b_59[35];
+ u32 exe_norm_out;
+ u32 reserved_b_60[23];
+ u32 exe_outf_addr;
+ u32 reserved_b_61[3];
+ u32 exe_outf_lptch;
+ u32 exe_outf_cptch;
+ u32 reserved_b_62[102];
+ u32 reserved_a_12;
+ u32 reserved_b_63[8];
+ u32 reserved_a_13;
+ u32 reserved_a_14;
+ u32 reserved_a_15;
+ u32 reserved_b_64[4];
+ u32 reserved_a_16;
+ u32 reserved_a_17;
+ u32 reserved_b_65[46];
+ u32 exe_pool_pa3_0;
+ u32 exe_pool_pa3_2;
+ u32 exe_pool_pa3_4;
+ u32 exe_pool_pa3_6;
+ u32 exe_pool_pa3_8;
+ u32 exe_pool_pa3_10;
+ u32 exe_pool_pa3_12;
+ u32 exe_pool_pa3_14;
+ u32 exe_pool_pa3_16;
+ u32 exe_pool_pa3_18;
+ u32 exe_pool_pa3_20;
+ u32 exe_pool_pa3_22;
+ u32 exe_pool_pa3_24;
+ u32 reserved_b_66[3];
+ u32 exe_pool_pa2_0;
+ u32 exe_pool_pa2_2;
+ u32 exe_pool_pa2_4;
+ u32 exe_pool_pa2_6;
+ u32 exe_pool_pa2_8;
+ u32 exe_pool_pa2_10;
+ u32 exe_pool_pa2_12;
+ u32 exe_pool_pa2_14;
+ u32 exe_pool_pa2_16;
+ u32 exe_pool_pa2_18;
+ u32 exe_pool_pa2_20;
+ u32 exe_pool_pa2_22;
+ u32 exe_pool_pa2_24;
+};
+
+#endif /* HWD_DNN_REG_H */
diff --git a/drivers/soc/visconti/uapi/dnn.h b/drivers/soc/visconti/uapi/dnn.h
new file mode 100644
index 000000000..972c39655
--- /dev/null
+++ b/drivers/soc/visconti/uapi/dnn.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef _UAPI_LINUX_DNN_H
+#define _UAPI_LINUX_DNN_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include "ipa.h"
+
+#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U)
+#define DRV_DNN_BUFFER_INDEX_MAX (15)
+
+#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base address */
+
+#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U)
+
+/**
+ * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS
+ *
+ * @state: State of driver
+ * @eer_cmd: Execution error command
+ * @eer: Execution error
+ * @reserved: Padding
+ * @eer_flags: Execution error flags
+ */
+struct drv_dnn_status {
+ enum drv_ipa_state state;
+ u32 eer_cmd;
+ u32 eer : 1;
+ u32 reserved : 31;
+ u32 eer_flags[32];
+};
+
+struct drv_dnn_base_addr {
+ u32 purpose;
+ union {
+ struct drv_ipa_addr ipa_addr;
+ uintptr_t list_addr;
+ } addr;
+};
+
+/**
+ * struct drv_dnn_descriptor - DNN IPA Descriptor for IOC_IPA_START
+ *
+ * @configuration: Address of configuration data
+ * @configuration_offset: Configuration offset
+ * @configuration_size: Configuration data size
+ * @list_num: Number of input/output list
+ * @base_addr: Base addresses
+ * @base_addr_flag: Bit-fields of base_addr list config
+ * - if 0, fixed address
+ * - if 1, address list
+ * @config_done: Flags of called configuration
+ * @buffer_info: Table of buffer information
+ * @buffer_info_num: Number of buffer_info
+ */
+struct drv_dnn_descriptor {
+ struct drv_ipa_addr configuration;
+ u32 configuration_offset;
+ u32 configuration_size;
+ u32 list_num;
+ struct drv_dnn_base_addr base_addr[DRV_DNN_BASE_ADDR_NUM];
+ u32 base_addr_flag;
+ u16 config_done;
+ struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX];
+ s32 buffer_info_num;
+};
+
+#endif /* _UAPI_LINUX_DNN_H */
--
2.17.1
More information about the linux-arm-kernel
mailing list