[PATCH 04/18] crypto: caam - handle core endianness != caam endianness

Marcin Niestroj m.niestroj at grinn-global.com
Mon Sep 3 04:23:54 PDT 2018


Pick commit 261ea058f016bc04fa064348ad9bf39d94379381 from Linux
upstream.

    crypto: caam - handle core endianness != caam endianness

    There are SoCs like LS1043A where CAAM endianness (BE) does not match
    the default endianness of the core (LE).
    Moreover, there are requirements for the driver to handle cases like
    CPU_BIG_ENDIAN=y on ARM-based SoCs.
    This requires for a complete rewrite of the I/O accessors.

    PPC-specific accessors - {in,out}_{le,be}XX - are replaced with
    generic ones - io{read,write}[be]XX.

    Endianness is detected dynamically (at runtime) to allow for
    multiplatform kernels, for e.g. running the same kernel image
    on LS1043A (BE CAAM) and LS2080A (LE CAAM) armv8-based SoCs.

    While here: debugfs entries need to take into consideration the
    endianness of the core when displaying data. Add the necessary
    glue code so the entries remain the same, but they are properly
    read, regardless of the core and/or SEC endianness.

    Note: pdb.h fixes only what is currently being used (IPsec).

    Reviewed-by: Tudor Ambarus <tudor-dan.ambarus at nxp.com>
    Signed-off-by: Horia Geantă <horia.geanta at nxp.com>
    Signed-off-by: Alex Porosanu <alexandru.porosanu at nxp.com>
    Signed-off-by: Herbert Xu <herbert at gondor.apana.org.au>

As part of this patch rd_reg[8,16,32,64]() and wr_reg[8,16,32,64]()
helper functions are introduced. All readl() calls are replaced by
rd_reg32() and all writel() calls are replaced by wr_reg32().

Signed-off-by: Marcin Niestroj <m.niestroj at grinn-global.com>
---
 drivers/crypto/caam/ctrl.c        |  87 +++++++++++-----------
 drivers/crypto/caam/desc.h        |   3 +-
 drivers/crypto/caam/desc_constr.h |  42 +++++++----
 drivers/crypto/caam/jr.c          |  62 +++++-----------
 drivers/crypto/caam/regs.h        | 117 +++++++++++++++++++++++++++++-
 5 files changed, 206 insertions(+), 105 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 8ce9a859e..0baddf733 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -20,6 +20,9 @@
 #include "error.h"
 #include "ctrl.h"
 
+bool caam_little_end;
+EXPORT_SYMBOL(caam_little_end);
+
 /*
  * Descriptor to instantiate RNG State Handle 0 in normal mode and
  * load the JDKEK, TDKEK and TDSK registers
@@ -83,10 +86,10 @@ static inline int run_descriptor_deco0(struct device_d *ctrldev, u32 *desc,
 	deco = ctrlpriv->deco;
 
 	if (ctrlpriv->virt_en == 1) {
-		setbits32(&ctrl->deco_rsr, DECORSR_JR0);
+		clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0);
 
 		start = get_time_ns();
-		while (!(readl(&ctrl->deco_rsr) & DECORSR_VALID)) {
+		while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID)) {
 			if (is_timeout(start, 100 * MSECOND)) {
 				dev_err(ctrldev, "DECO timed out\n");
 				return -ETIMEDOUT;
@@ -94,19 +97,19 @@ static inline int run_descriptor_deco0(struct device_d *ctrldev, u32 *desc,
 		}
 	}
 
-	setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+	clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE);
 
 	start = get_time_ns();
-	while (!(readl(&ctrl->deco_rq) & DECORR_DEN0)) {
+	while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0)) {
 		if (is_timeout(start, 100 * MSECOND)) {
 			dev_err(ctrldev, "failed to acquire DECO 0\n");
-			clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+			clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0);
 			return -ETIMEDOUT;
 		}
 	}
 
 	for (i = 0; i < desc_len(desc); i++)
-		writel(*(desc + i), &deco->descbuf[i]);
+		wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i)));
 
 	flags = DECO_JQCR_WHL;
 	/*
@@ -117,10 +120,10 @@ static inline int run_descriptor_deco0(struct device_d *ctrldev, u32 *desc,
 		flags |= DECO_JQCR_FOUR;
 
 	/* Instruct the DECO to execute it */
-	setbits32(&deco->jr_ctl_hi, flags);
+	clrsetbits_32(&deco->jr_ctl_hi, 0, flags);
 
 	start = get_time_ns();
-	while ((deco_dbg_reg = readl(&deco->desc_dbg)) &
+	while ((deco_dbg_reg = rd_reg32(&deco->desc_dbg)) &
 		 DESC_DBG_DECO_STAT_VALID) {
 		/*
 		 * If an error occured in the descriptor, then
@@ -131,14 +134,14 @@ static inline int run_descriptor_deco0(struct device_d *ctrldev, u32 *desc,
 			break;
 	}
 
-	*status = readl(&deco->op_status_hi) &
+	*status = rd_reg32(&deco->op_status_hi) &
 		  DECO_OP_STATUS_HI_ERR_MASK;
 
 	if (ctrlpriv->virt_en == 1)
-		clrbits32(&ctrl->deco_rsr, DECORSR_JR0);
+		clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0);
 
 	/* Mark the DECO as free */
-	clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+	clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0);
 
 	if (is_timeout(start, 100 * MSECOND))
 		return -EAGAIN;
@@ -199,7 +202,7 @@ static int instantiate_rng(struct device_d *ctrldev, int state_handle_mask,
 		 * without any error (HW optimizations for later
 		 * CAAM eras), then try again.
 		 */
-		rdsta_val = readl(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+		rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
 		if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
 		    !(rdsta_val & (1 << sh_idx)))
 			ret = -EAGAIN;
@@ -241,7 +244,7 @@ static void kick_trng(struct device_d *ctrldev, int ent_delay)
 	r4tst = &ctrl->r4tst[0];
 
 	/* put RNG4 into program mode */
-	setbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+	clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM);
 
 	/*
 	 * Performance-wise, it does not make sense to
@@ -251,33 +254,33 @@ static void kick_trng(struct device_d *ctrldev, int ent_delay)
 	 * time trying to set the values controlling the sample
 	 * frequency, the function simply returns.
 	 */
-	val = (readl(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK)
+	val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK)
 	      >> RTSDCTL_ENT_DLY_SHIFT;
 	if (ent_delay <= val) {
 		/* put RNG4 into run mode */
-		clrbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+		clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0);
 		return;
 	}
 
-	val = readl(&r4tst->rtsdctl);
+	val = rd_reg32(&r4tst->rtsdctl);
 	val = (val & ~RTSDCTL_ENT_DLY_MASK) |
 	      (ent_delay << RTSDCTL_ENT_DLY_SHIFT);
-	writel(val, &r4tst->rtsdctl);
+	wr_reg32(&r4tst->rtsdctl, val);
 	/* min. freq. count, equal to 1/4 of the entropy sample length */
-	writel(ent_delay >> 2, &r4tst->rtfrqmin);
+	wr_reg32(&r4tst->rtfrqmin, ent_delay >> 2);
 	/* disable maximum frequency count */
-	writel(RTFRQMAX_DISABLE, &r4tst->rtfrqmax);
+	wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
 	/* read the control register */
-	val = readl(&r4tst->rtmctl);
+	val = rd_reg32(&r4tst->rtmctl);
 	/*
 	 * select raw sampling in both entropy shifter
 	 * and statistical checker
 	 */
-	setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC);
+	clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC);
 	/* put RNG4 into run mode */
-	clrbits32(&val, RTMCTL_PRGM);
+	clrsetbits_32(&val, RTMCTL_PRGM, 0);
 	/* write back the control register */
-	writel(val, &r4tst->rtmctl);
+	wr_reg32(&r4tst->rtmctl, val);
 }
 
 /**
@@ -379,8 +382,12 @@ static int caam_probe(struct device_d *dev)
 		dev_err(dev, "caam: of_iomap() failed\n");
 		return -ENOMEM;
 	}
+
+	caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) &
+				  (CSTA_PLEND | CSTA_ALT_PLEND));
+
 	/* Finding the page size for using the CTPR_MS register */
-	comp_params = readl(&ctrl->perfmon.comp_parms_ms);
+	comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms);
 	pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT;
 
 	/* Allocating the BLOCK_OFFSET based on the supported page size on
@@ -403,7 +410,7 @@ static int caam_probe(struct device_d *dev)
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
 	 * long pointers in master configuration register
 	 */
-	clrsetbits_be32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_ARCACHE_MASK,
+	clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_ARCACHE_MASK,
 			MCFGR_AWCACHE_CACH | MCFGR_ARCACHE_MASK |
 			MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
 					  MCFGR_LONG_PTR : 0));
@@ -412,7 +419,7 @@ static int caam_probe(struct device_d *dev)
 	 *  Read the Compile Time paramters and SCFGR to determine
 	 * if Virtualization is enabled for this platform
 	 */
-	scfgr = readl(&ctrl->scfgr);
+	scfgr = rd_reg32(&ctrl->scfgr);
 
 	ctrlpriv->virt_en = 0;
 	if (comp_params & CTPR_MS_VIRT_EN_INCL) {
@@ -430,9 +437,9 @@ static int caam_probe(struct device_d *dev)
 	}
 
 	if (ctrlpriv->virt_en == 1)
-		setbits32(&ctrl->jrstart, JRSTART_JR0_START |
-			  JRSTART_JR1_START | JRSTART_JR2_START |
-			  JRSTART_JR3_START);
+		clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START |
+			      JRSTART_JR1_START | JRSTART_JR2_START |
+			      JRSTART_JR3_START);
 
 	/*
 	 * ERRATA:  mx6 devices have an issue wherein AXI bus transactions
@@ -443,8 +450,8 @@ static int caam_probe(struct device_d *dev)
 	 * to a depth of 1 (from it's default of 4) to preclude this situation
 	 * from occurring.
 	 */
-	writel((readl(&ctrl->mcr) & ~(MCFGR_AXIPIPE_MASK)) |
-	       ((1 << MCFGR_AXIPIPE_SHIFT) & MCFGR_AXIPIPE_MASK), &ctrl->mcr);
+	wr_reg32(&ctrl->mcr, (rd_reg32(&ctrl->mcr) & ~(MCFGR_AXIPIPE_MASK)) |
+			((1 << MCFGR_AXIPIPE_SHIFT) & MCFGR_AXIPIPE_MASK));
 
 	/*
 	 * Detect and enable JobRs
@@ -487,15 +494,13 @@ static int caam_probe(struct device_d *dev)
 	}
 
 	/* Check to see if QI present. If so, enable */
-	ctrlpriv->qi_present =
-			!!(readl(&ctrl->perfmon.comp_parms_ms) &
-			   CTPR_MS_QI_MASK);
+	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
 	if (ctrlpriv->qi_present) {
 		ctrlpriv->qi = (struct caam_queue_if __force *)
 			       ((uint8_t *)ctrl +
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER);
 		/* This is all that's required to physically enable QI */
-		writel(QICTL_DQEN, &ctrlpriv->qi->qi_control_lo);
+		wr_reg32(&ctrlpriv->qi->qi_control_lo, QICTL_DQEN);
 	}
 
 	/* If no QI and no rings specified, quit and go home */
@@ -505,7 +510,7 @@ static int caam_probe(struct device_d *dev)
 		return -ENOMEM;
 	}
 
-	cha_vid_ls = readl(&ctrl->perfmon.cha_id_ls);
+	cha_vid_ls = rd_reg32(&ctrl->perfmon.cha_id_ls);
 
 	/*
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
@@ -513,7 +518,7 @@ static int caam_probe(struct device_d *dev)
 	 */
 	if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
 		ctrlpriv->rng4_sh_init =
-			readl(&ctrl->r4tst[0].rdsta);
+			rd_reg32(&ctrl->r4tst[0].rdsta);
 		/*
 		 * If the secure keys (TDKEK, JDKEK, TDSK), were already
 		 * generated, signal this to the function that is instantiating
@@ -524,7 +529,7 @@ static int caam_probe(struct device_d *dev)
 		ctrlpriv->rng4_sh_init &= RDSTA_IFMASK;
 		do {
 			int inst_handles =
-				readl(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+				rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
 			/*
 			 * If either SH were instantiated by somebody else
 			 * (e.g. u-boot) then it is assumed that the entropy
@@ -560,7 +565,7 @@ static int caam_probe(struct device_d *dev)
 		ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK;
 
 		/* Enable RDB bit so that RNG works faster */
-		setbits32(&ctrl->scfgr, SCFGR_RDBENABLE);
+		clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE);
 	}
 
 	if (IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG)) {
@@ -573,8 +578,8 @@ static int caam_probe(struct device_d *dev)
 	}
 
 	/* NOTE: RTIC detection ought to go here, around Si time */
-	caam_id = (u64)readl(&ctrl->perfmon.caam_id_ms) << 32 |
-		  (u64)readl(&ctrl->perfmon.caam_id_ls);
+	caam_id = (u64)rd_reg32(&ctrl->perfmon.caam_id_ms) << 32 |
+		  (u64)rd_reg32(&ctrl->perfmon.caam_id_ls);
 
 	/* Report "alive" for developer to see */
 	dev_dbg(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index a12eb8603..e964a5a93 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -30,8 +30,7 @@ struct sec4_sg_entry {
 	u32 ptr;
 #endif
 	u32 len;
-	u16 buf_pool_id;
-	u16 offset;
+	u32 bpid_offset;
 };
 
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index c06ee734d..cf86cc83b 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -5,6 +5,7 @@
  */
 
 #include "desc.h"
+#include "regs.h"
 
 #define IMMEDIATE (1 << 23)
 #define CAAM_CMD_SZ sizeof(u32)
@@ -32,7 +33,7 @@
 
 static inline int desc_len(u32 *desc)
 {
-	return *desc & HDR_DESCLEN_MASK;
+	return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
 }
 
 static inline int desc_bytes(void *desc)
@@ -52,7 +53,7 @@ static inline void *sh_desc_pdb(u32 *desc)
 
 static inline void init_desc(u32 *desc, u32 options)
 {
-	*desc = (options | HDR_ONE) + 1;
+	*desc = cpu_to_caam32((options | HDR_ONE) + 1);
 }
 
 static inline void init_sh_desc(u32 *desc, u32 options)
@@ -78,9 +79,10 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr)
 {
 	dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
 
-	*offset = ptr;
+	*offset = cpu_to_caam_dma(ptr);
 
-	(*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+				CAAM_PTR_SZ / CAAM_CMD_SZ);
 }
 
 static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
@@ -99,16 +101,17 @@ static inline void append_data(u32 *desc, void *data, int len)
 	if (len) /* avoid sparse warning: memcpy with byte count of 0 */
 		memcpy(offset, data, len);
 
-	(*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+				(len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
 }
 
 static inline void append_cmd(u32 *desc, u32 command)
 {
 	u32 *cmd = desc_end(desc);
 
-	*cmd = command;
+	*cmd = cpu_to_caam32(command);
 
-	(*desc)++;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1);
 }
 
 #define append_u32 append_cmd
@@ -117,16 +120,22 @@ static inline void append_u64(u32 *desc, u64 data)
 {
 	u32 *offset = desc_end(desc);
 
-	*offset = upper_32_bits(data);
-	*(++offset) = lower_32_bits(data);
+	/* Only 32-bit alignment is guaranteed in descriptor buffer */
+	if (caam_little_end) {
+		*offset = cpu_to_caam32(lower_32_bits(data));
+		*(++offset) = cpu_to_caam32(upper_32_bits(data));
+	} else {
+		*offset = cpu_to_caam32(upper_32_bits(data));
+		*(++offset) = cpu_to_caam32(lower_32_bits(data));
+	}
 
-	(*desc) += 2;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2);
 }
 
 /* Write command without affecting header, and return pointer to next word */
 static inline u32 *write_cmd(u32 *desc, u32 command)
 {
-	*desc = command;
+	*desc = cpu_to_caam32(command);
 
 	return desc + 1;
 }
@@ -168,14 +177,17 @@ APPEND_CMD_RET(move, MOVE)
 
 static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
 {
-	*jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc));
+	*jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
+				(desc_len(desc) - (jump_cmd - desc)));
 }
 
 static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
 {
-	*move_cmd &= ~MOVE_OFFSET_MASK;
-	*move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) &
-				 MOVE_OFFSET_MASK);
+	u32 val = caam32_to_cpu(*move_cmd);
+
+	val &= ~MOVE_OFFSET_MASK;
+	val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK;
+	*move_cmd = cpu_to_caam32(val);
 }
 
 #define APPEND_CMD(cmd, op) \
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 8f169d406..84396e41e 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -21,43 +21,16 @@
 #include "desc.h"
 #include "intern.h"
 
-/*
- * The DMA address registers in the JR are a pair of 32-bit registers.
- * The layout is:
- *
- *    base + 0x0000 : most-significant 32 bits
- *    base + 0x0004 : least-significant 32 bits
- *
- * The 32-bit version of this core therefore has to write to base + 0x0004
- * to set the 32-bit wide DMA address. This seems to be independent of the
- * endianness of the written/read data.
- */
-
-#define REG64_MS32(reg) ((u32 __iomem *)(reg))
-#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1)
-
-static inline void wr_reg64(u64 __iomem *reg, u64 data)
-{
-	writel(data >> 32, REG64_MS32(reg));
-	writel(data, REG64_LS32(reg));
-}
-
-static inline u64 rd_reg64(u64 __iomem *reg)
-{
-	return ((u64)readl(REG64_MS32(reg)) << 32 |
-		(u64)readl(REG64_LS32(reg)));
-}
-
 static int caam_reset_hw_jr(struct device_d *dev)
 {
 	struct caam_drv_private_jr *jrp = dev->priv;
 	uint64_t start;
 
 	/* initiate flush (required prior to reset) */
-	writel(JRCR_RESET, &jrp->rregs->jrcommand);
+	wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET);
 
 	start = get_time_ns();
-	while ((readl(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) ==
+	while ((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) ==
 	        JRINT_ERR_HALT_INPROGRESS) {
 		if (is_timeout(start, 100 * MSECOND)) {
 			dev_err(dev, "job ring %d timed out on flush\n",
@@ -67,10 +40,10 @@ static int caam_reset_hw_jr(struct device_d *dev)
 	}
 
 	/* initiate reset */
-	writel(JRCR_RESET, &jrp->rregs->jrcommand);
+	wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET);
 
 	start = get_time_ns();
-	while (readl(&jrp->rregs->jrcommand) & JRCR_RESET) {
+	while (rd_reg32(&jrp->rregs->jrcommand) & JRCR_RESET) {
 		if (is_timeout(start, 100 * MSECOND)) {
 			dev_err(dev, "job ring %d timed out on reset\n",
 				jrp->ridx);
@@ -90,7 +63,7 @@ static int caam_jr_dequeue(struct caam_drv_private_jr *jrp)
 	void *userarg;
 	int found;
 
-	while (readl(&jrp->rregs->outring_used)) {
+	while (rd_reg32(&jrp->rregs->outring_used)) {
 		head = jrp->head;
 
 		sw_idx = tail = jrp->tail;
@@ -102,7 +75,7 @@ static int caam_jr_dequeue(struct caam_drv_private_jr *jrp)
 			sw_idx = (tail + i) & (JOBR_DEPTH - 1);
 
 			if (jrp->outring[hw_idx].desc ==
-			    jrp->entinfo[sw_idx].desc_addr_dma) {
+			    caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma)) {
 				found = 1;
 				break; /* found */
 			}
@@ -120,12 +93,12 @@ static int caam_jr_dequeue(struct caam_drv_private_jr *jrp)
 		usercall = jrp->entinfo[sw_idx].callbk;
 		userarg = jrp->entinfo[sw_idx].cbkarg;
 		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
-		userstatus = jrp->outring[hw_idx].jrstatus;
+		userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus);
 
 		barrier();
 
 		/* set done */
-		writel(1, &jrp->rregs->outring_rmvd);
+		wr_reg32(&jrp->rregs->outring_rmvd, 1);
 
 		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
 					   (JOBR_DEPTH - 1);
@@ -158,7 +131,7 @@ static int caam_jr_interrupt(struct caam_drv_private_jr *jrp)
 	u32 irqstate;
 
 	start = get_time_ns();
-	while (!(irqstate = readl(&jrp->rregs->jrintstatus))) {
+	while (!(irqstate = rd_reg32(&jrp->rregs->jrintstatus))) {
 		if (is_timeout(start, 100 * MSECOND)) {
 			dev_err(jrp->dev, "timeout waiting for interrupt\n");
 			return -ETIMEDOUT;
@@ -176,7 +149,7 @@ static int caam_jr_interrupt(struct caam_drv_private_jr *jrp)
 	}
 
 	/* Have valid interrupt at this point, just ACK and trigger */
-	writel(irqstate, &jrp->rregs->jrintstatus);
+	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
 	return caam_jr_dequeue(jrp);
 }
@@ -218,7 +191,7 @@ int caam_jr_enqueue(struct device_d *dev, u32 *desc,
 	struct caam_jrentry_info *head_entry;
 	int head, tail, desc_size;
 
-	desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32);
+	desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32);
 
 	if (!dev->priv)
 		return -ENODEV;
@@ -227,7 +200,7 @@ int caam_jr_enqueue(struct device_d *dev, u32 *desc,
 
 	head = jrp->head;
 	tail = jrp->tail;
-	if (!readl(&jrp->rregs->inpring_avail) ||
+	if (!rd_reg32(&jrp->rregs->inpring_avail) ||
 	    CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
 		return -EBUSY;
 	}
@@ -242,7 +215,8 @@ int caam_jr_enqueue(struct device_d *dev, u32 *desc,
 	if (!jrp->inpring)
 		return -EIO;
 
-	jrp->inpring[jrp->inp_ring_write_index] = (dma_addr_t)desc;
+	jrp->inpring[jrp->inp_ring_write_index] =
+		cpu_to_caam_dma((dma_addr_t)desc);
 
 	barrier();
 
@@ -251,9 +225,9 @@ int caam_jr_enqueue(struct device_d *dev, u32 *desc,
 	jrp->head = (head + 1) & (JOBR_DEPTH - 1);
 
 	barrier();
-	writel(1, &jrp->rregs->inpring_jobadd);
+	wr_reg32(&jrp->rregs->inpring_jobadd, 1);
 
-	clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
 
 	return caam_jr_interrupt(jrp);
 }
@@ -301,8 +275,8 @@ static int caam_jr_init(struct device_d *dev)
 
 	wr_reg64(&jrp->rregs->inpring_base, dma_inpring);
 	wr_reg64(&jrp->rregs->outring_base, dma_outring);
-	writel(JOBR_DEPTH, &jrp->rregs->inpring_size);
-	writel(JOBR_DEPTH, &jrp->rregs->outring_size);
+	wr_reg32(&jrp->rregs->inpring_size, JOBR_DEPTH);
+	wr_reg32(&jrp->rregs->outring_size, JOBR_DEPTH);
 
 	jrp->ringsize = JOBR_DEPTH;
 
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index b8ca5e396..d50e9ad69 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -10,9 +10,118 @@
 #include <linux/types.h>
 #include <io.h>
 
-/* These are common macros for Power, put here for ARMs */
-#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr))
-#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr))
+extern bool caam_little_end;
+
+#define caam_to_cpu(len)				\
+static inline u##len caam##len ## _to_cpu(u##len val)	\
+{							\
+	if (caam_little_end)				\
+		return le##len ## _to_cpu(val);		\
+	else						\
+		return be##len ## _to_cpu(val);		\
+}
+
+#define cpu_to_caam(len)				\
+static inline u##len cpu_to_caam##len(u##len val)	\
+{							\
+	if (caam_little_end)				\
+		return cpu_to_le##len(val);		\
+	else						\
+		return cpu_to_be##len(val);		\
+}
+
+caam_to_cpu(16)
+caam_to_cpu(32)
+caam_to_cpu(64)
+cpu_to_caam(16)
+cpu_to_caam(32)
+cpu_to_caam(64)
+
+static inline void wr_reg32(void __iomem *reg, u32 data)
+{
+	if (caam_little_end)
+		iowrite32(data, reg);
+	else
+		iowrite32be(data, reg);
+}
+
+static inline u32 rd_reg32(void __iomem *reg)
+{
+	if (caam_little_end)
+		return ioread32(reg);
+
+	return ioread32be(reg);
+}
+
+static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set)
+{
+	if (caam_little_end)
+		iowrite32((ioread32(reg) & ~clear) | set, reg);
+	else
+		iowrite32be((ioread32be(reg) & ~clear) | set, reg);
+}
+
+/*
+ * The DMA address registers in the JR are a pair of 32-bit registers.
+ * The layout is:
+ *
+ *    base + 0x0000 : most-significant 32 bits
+ *    base + 0x0004 : least-significant 32 bits
+ *
+ * The 32-bit version of this core therefore has to write to base + 0x0004
+ * to set the 32-bit wide DMA address. This seems to be independent of the
+ * endianness of the written/read data.
+ */
+
+#ifdef CONFIG_64BIT
+static inline void wr_reg64(void __iomem *reg, u64 data)
+{
+	if (caam_little_end)
+		iowrite64(data, reg);
+	else
+		iowrite64be(data, reg);
+}
+
+static inline void rd_reg64(void __iomem *reg)
+{
+	if (caam_little_end)
+		ioread64(reg);
+	else
+		ioread64be(reg);
+}
+#else /* CONFIG_64BIT */
+static inline void wr_reg64(void __iomem *reg, u64 data)
+{
+	wr_reg32((u32 __iomem *)(reg), data >> 32);
+	wr_reg32((u32 __iomem *)(reg) + 1, data);
+}
+
+static inline u64 rd_reg64(void __iomem *reg)
+{
+	return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
+		(u64)rd_reg32((u32 __iomem *)(reg) + 1));
+}
+#endif /* CONFIG_64BIT */
+
+static inline u64 cpu_to_caam_dma64(dma_addr_t value)
+{
+	return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
+		 (u64)cpu_to_caam32(upper_32_bits(value)));
+}
+
+static inline u64 caam_dma64_to_cpu(u64 value)
+{
+	return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
+		 (u64)caam32_to_cpu(upper_32_bits(value)));
+}
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
+#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
+#else
+#define cpu_to_caam_dma(value) cpu_to_caam32(value)
+#define caam_dma_to_cpu(value) caam32_to_cpu(value)
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
 
 /*
  * jr_outentry
@@ -190,6 +299,8 @@ struct caam_perfmon {
 	u32 faultliodn;	/* FALR - Fault Address LIODN	*/
 	u32 faultdetail;	/* FADR - Fault Addr Detail	*/
 	u32 rsvd3;
+#define CSTA_PLEND		BIT(10)
+#define CSTA_ALT_PLEND		BIT(18)
 	u32 status;		/* CSTA - CAAM Status */
 	u32 smpart;		/* Secure Memory Partition Parameters */
 	u32 smvid;		/* Secure Memory Version ID */
-- 
2.18.0




More information about the barebox mailing list