[PATCH v3 09/20] drivers: crypto: meson: process more than MAXDESCS descriptors
neil.armstrong at linaro.org
neil.armstrong at linaro.org
Tue Feb 6 05:13:52 PST 2024
Hi,
On 05/02/2024 16:55, Alexey Romanov wrote:
> 1. The old alhorithm was not designed to process a large
> amount of memory, and therefore gave incorrect results.
>
> 2. Not all Amlogic SoC's use 3 KEY/IV descriptors.
> Add keyiv descriptors count parameter to platform data.
>
> Signed-off-by: Alexey Romanov <avromanov at salutedevices.com>
> ---
> drivers/crypto/amlogic/amlogic-gxl-cipher.c | 443 ++++++++++++--------
> drivers/crypto/amlogic/amlogic-gxl-core.c | 1 +
> drivers/crypto/amlogic/amlogic-gxl.h | 2 +
> 3 files changed, 281 insertions(+), 165 deletions(-)
>
> diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
> index c662c4b86e97..9c96e7b65e1e 100644
> --- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
> +++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
> @@ -17,35 +17,41 @@
> #include <crypto/internal/skcipher.h>
> #include "amlogic-gxl.h"
>
> -static bool meson_cipher_need_fallback(struct skcipher_request *areq)
> +static bool meson_cipher_need_fallback_sg(struct skcipher_request *areq,
> + struct scatterlist *sg)
> {
> - struct scatterlist *src_sg = areq->src;
> - struct scatterlist *dst_sg = areq->dst;
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
> + unsigned int blocksize = crypto_skcipher_blocksize(tfm);
> + unsigned int cryptlen = areq->cryptlen;
> +
> + while (cryptlen) {
> + unsigned int len = min(cryptlen, sg->length);
> +
> + if (!IS_ALIGNED(sg->offset, sizeof(u32)))
> + return true;
> + if (len % blocksize != 0)
> + return true;
> +
> + cryptlen -= len;
> + sg = sg_next(sg);
> + }
> +
> + return false;
> +}
>
> +static bool meson_cipher_need_fallback(struct skcipher_request *areq)
> +{
> if (areq->cryptlen == 0)
> return true;
>
> - if (sg_nents(src_sg) != sg_nents(dst_sg))
> + if (meson_cipher_need_fallback_sg(areq, areq->src))
> return true;
>
> - /* KEY/IV descriptors use 3 desc */
> - if (sg_nents(src_sg) > MAXDESC - 3 || sg_nents(dst_sg) > MAXDESC - 3)
> - return true;
> + if (areq->dst == areq->src)
> + return false;
>
> - while (src_sg && dst_sg) {
> - if ((src_sg->length % 16) != 0)
> - return true;
> - if ((dst_sg->length % 16) != 0)
> - return true;
> - if (src_sg->length != dst_sg->length)
> - return true;
> - if (!IS_ALIGNED(src_sg->offset, sizeof(u32)))
> - return true;
> - if (!IS_ALIGNED(dst_sg->offset, sizeof(u32)))
> - return true;
> - src_sg = sg_next(src_sg);
> - dst_sg = sg_next(dst_sg);
> - }
> + if (meson_cipher_need_fallback_sg(areq, areq->dst))
> + return true;
>
> return false;
> }
> @@ -76,6 +82,211 @@ static int meson_cipher_do_fallback(struct skcipher_request *areq)
> return err;
> }
>
> +struct cipher_ctx {
> + struct {
> + dma_addr_t addr;
> + unsigned int len;
> + } keyiv;
> +
> + struct skcipher_request *areq;
> + struct scatterlist *src_sg;
> + struct scatterlist *dst_sg;
> +
> + unsigned int src_offset;
> + unsigned int dst_offset;
> + unsigned int cryptlen;
> + unsigned int tloffset;
> +};
> +
> +static int meson_map_scatterlist(struct skcipher_request *areq, struct meson_dev *mc)
> +{
> + int nr_sgs, nr_sgd;
> +
> + if (areq->src == areq->dst) {
> + nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
> + DMA_BIDIRECTIONAL);
> + if (!nr_sgs) {
> + dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
> + return -EINVAL;
> + }
> + } else {
> + nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
> + DMA_TO_DEVICE);
> + if (!nr_sgs) {
> + dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
> + return -EINVAL;
> + }
> +
> + nr_sgd = dma_map_sg(mc->dev, areq->dst, sg_nents(areq->dst),
> + DMA_FROM_DEVICE);
> + if (!nr_sgd) {
> + dev_err(mc->dev, "Invalid SG count %d\n", nr_sgd);
> + return -EINVAL;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void meson_unmap_scatterlist(struct skcipher_request *areq, struct meson_dev *mc)
> +{
> + if (areq->src == areq->dst) {
> + dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_BIDIRECTIONAL);
> + } else {
> + dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
> + dma_unmap_sg(mc->dev, areq->dst, sg_nents(areq->dst), DMA_FROM_DEVICE);
> + }
> +}
> +
> +static void meson_setup_keyiv_descs(struct cipher_ctx *ctx)
> +{
> + struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
> + struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
> + struct meson_alg_template *algt = container_of(alg,
> + struct meson_alg_template, alg.skcipher.base);
> + struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
> + struct meson_dev *mc = op->mc;
> + unsigned int ivsize = crypto_skcipher_ivsize(tfm);
> + unsigned int blockmode = algt->blockmode;
> + int i;
> +
> + if (ctx->tloffset)
> + return;
> +
> + if (blockmode == DESC_OPMODE_CBC) {
> + memcpy(op->key + AES_MAX_KEY_SIZE, ctx->areq->iv, ivsize);
> + ctx->keyiv.len = AES_MAX_KEY_SIZE + ivsize;
> + dma_sync_single_for_device(mc->dev, ctx->keyiv.addr,
> + ctx->keyiv.len, DMA_TO_DEVICE);
> + }
> +
> + for (i = 0; i < mc->pdata->setup_desc_cnt; i++) {
> + struct meson_desc *desc =
> + &mc->chanlist[rctx->flow].tl[ctx->tloffset];
> + int offset = i * 16;
> +
> + desc->t_src = cpu_to_le32(ctx->keyiv.addr + offset);
> + desc->t_dst = cpu_to_le32(offset);
> + desc->t_status = cpu_to_le32(DESC_OWN | DESC_MODE_KEY | ctx->keyiv.len);
> +
> + ctx->tloffset++;
> + }
> +}
> +
> +static bool meson_setup_data_descs(struct cipher_ctx *ctx)
> +{
> + struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
> + struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
> + struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
> + struct meson_alg_template *algt = container_of(alg,
> + struct meson_alg_template,
> + alg.skcipher.base);
> + struct meson_dev *mc = op->mc;
> + struct meson_desc *desc = &mc->chanlist[rctx->flow].tl[ctx->tloffset];
> + unsigned int blocksize = crypto_skcipher_blocksize(tfm);
> + unsigned int blockmode = algt->blockmode;
> + unsigned int maxlen = rounddown(DESC_MAXLEN, blocksize);
> + unsigned int todo;
> + u32 v;
> +
> + ctx->tloffset++;
> +
> + todo = min(ctx->cryptlen, maxlen);
> + todo = min(todo, ctx->cryptlen);
> + todo = min(todo, sg_dma_len(ctx->src_sg) - ctx->src_offset);
> + todo = min(todo, sg_dma_len(ctx->dst_sg) - ctx->dst_offset);
> +
> + desc->t_src = cpu_to_le32(sg_dma_address(ctx->src_sg) + ctx->src_offset);
> + desc->t_dst = cpu_to_le32(sg_dma_address(ctx->dst_sg) + ctx->dst_offset);
> +
> + ctx->cryptlen -= todo;
> + ctx->src_offset += todo;
> + ctx->dst_offset += todo;
> +
> + v = DESC_OWN | blockmode | op->keymode | todo;
> + if (rctx->op_dir == MESON_ENCRYPT)
> + v |= DESC_ENCRYPTION;
> +
> + if (!ctx->cryptlen || ctx->tloffset == MAXDESC)
> + v |= DESC_LAST;
> +
> + desc->t_status = cpu_to_le32(v);
> +
> + return v & DESC_LAST;
> +}
> +
> +static int meson_kick_hardware(struct cipher_ctx *ctx)
> +{
> + struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
> + struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
> + struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
> + struct meson_alg_template *algt = container_of(alg,
> + struct meson_alg_template,
> + alg.skcipher.base);
> + struct meson_dev *mc = op->mc;
> + unsigned int ivsize = crypto_skcipher_ivsize(tfm);
> + unsigned int blockmode = algt->blockmode;
> + enum dma_data_direction new_iv_dir;
> + dma_addr_t new_iv_phys;
> + void *new_iv;
> + int err;
> +
> + if (blockmode == DESC_OPMODE_CBC) {
> + struct scatterlist *sg;
> + unsigned int offset;
> +
> + if (rctx->op_dir == MESON_ENCRYPT) {
> + sg = ctx->dst_sg;
> + offset = ctx->dst_offset;
> + new_iv_dir = DMA_FROM_DEVICE;
> + } else {
> + sg = ctx->src_sg;
> + offset = ctx->src_offset;
> + new_iv_dir = DMA_TO_DEVICE;
> + }
> +
> + if (ctx->areq->src == ctx->areq->dst)
> + new_iv_dir = DMA_BIDIRECTIONAL;
> +
> + offset -= ivsize;
> + new_iv = sg_virt(sg) + offset;
> + new_iv_phys = sg_dma_address(sg) + offset;
> + }
> +
> + if (blockmode == DESC_OPMODE_CBC &&
> + rctx->op_dir == MESON_DECRYPT) {
> + dma_sync_single_for_cpu(mc->dev, new_iv_phys,
> + ivsize, new_iv_dir);
> + memcpy(ctx->areq->iv, new_iv, ivsize);
> + }
> +
> + reinit_completion(&mc->chanlist[rctx->flow].complete);
> + meson_dma_start(mc, rctx->flow);
> + err = wait_for_completion_interruptible_timeout(
> + &mc->chanlist[rctx->flow].complete, msecs_to_jiffies(500));
> + if (err == 0) {
> + dev_err(mc->dev, "DMA timeout for flow %d\n", rctx->flow);
> + return -EINVAL;
> + } else if (err < 0) {
> + dev_err(mc->dev, "Waiting for DMA completion is failed (%d)\n", err);
> + return err;
> + }
> +
> + if (blockmode == DESC_OPMODE_CBC &&
> + rctx->op_dir == MESON_ENCRYPT) {
> + dma_sync_single_for_cpu(mc->dev, new_iv_phys,
> + ivsize, new_iv_dir);
> + memcpy(ctx->areq->iv, new_iv, ivsize);
> + }
> +
> + ctx->tloffset = 0;
> +
> + return 0;
> +}
> +
> static int meson_cipher(struct skcipher_request *areq)
> {
> struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
> @@ -84,176 +295,78 @@ static int meson_cipher(struct skcipher_request *areq)
> struct meson_dev *mc = op->mc;
> struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
> struct meson_alg_template *algt;
> - int flow = rctx->flow;
> - unsigned int todo, eat, len;
> - struct scatterlist *src_sg = areq->src;
> - struct scatterlist *dst_sg = areq->dst;
> - struct meson_desc *desc;
> - int nr_sgs, nr_sgd;
> - int i, err = 0;
> - unsigned int keyivlen, ivsize, offset, tloffset;
> - dma_addr_t phykeyiv;
> - void *backup_iv = NULL, *bkeyiv;
> - u32 v;
> -
> - algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
> + struct cipher_ctx ctx = {
> + .areq = areq,
> + .src_offset = 0,
> + .dst_offset = 0,
> + .src_sg = areq->src,
> + .dst_sg = areq->dst,
> + .cryptlen = areq->cryptlen,
> + };
> + unsigned int ivsize = crypto_skcipher_ivsize(tfm);
I'm getting build errors with W=1 build:
drivers/crypto/amlogic/amlogic-gxl-cipher.c:306:22: error: unused variable ‘ivsize’ [-Werror=unused-variable]
306 | unsigned int ivsize = crypto_skcipher_ivsize(tfm);
| ^~~~~~
> + int err;
>
> - dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u flow=%d\n", __func__,
> + dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u ctx.flow=%d\n", __func__,
> crypto_tfm_alg_name(areq->base.tfm),
> areq->cryptlen,
> rctx->op_dir, crypto_skcipher_ivsize(tfm),
> - op->keylen, flow);
> + op->keylen, rctx->flow);
> +
> + algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
>
> #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
> algt->stat_req++;
> - mc->chanlist[flow].stat_req++;
> + mc->chanlist[rctx->flow].stat_req++;
> #endif
>
> - /*
> - * The hardware expect a list of meson_desc structures.
> - * The 2 first structures store key
> - * The third stores IV
> - */
> - bkeyiv = kzalloc(48, GFP_KERNEL | GFP_DMA);
> - if (!bkeyiv)
> + op->key = kzalloc(48, GFP_KERNEL | GFP_DMA);
> + if (!op.key)
> return -ENOMEM;
drivers/crypto/amlogic/amlogic-gxl-cipher.c: In function ‘meson_cipher’:
drivers/crypto/amlogic/amlogic-gxl-cipher.c:323:16: error: ‘op’ is a pointer; did you mean to use ‘->’?
323 | if (!op.key)
| ^
>
> - memcpy(bkeyiv, op->key, op->keylen);
> - keyivlen = op->keylen;
> + memcpy(op->key, op->key, op->keylen);
> + ctx.keyiv.len = op->keylen;
> + if (ctx.keyiv.len == AES_KEYSIZE_192)
> + ctx.keyiv.len = AES_MAX_KEY_SIZE;
>
> - ivsize = crypto_skcipher_ivsize(tfm);
> - if (areq->iv && ivsize > 0) {
> - if (ivsize > areq->cryptlen) {
> - dev_err(mc->dev, "invalid ivsize=%d vs len=%d\n", ivsize, areq->cryptlen);
> - err = -EINVAL;
> - goto theend;
> - }
> - memcpy(bkeyiv + 32, areq->iv, ivsize);
> - keyivlen = 48;
> - if (rctx->op_dir == MESON_DECRYPT) {
> - backup_iv = kzalloc(ivsize, GFP_KERNEL);
> - if (!backup_iv) {
> - err = -ENOMEM;
> - goto theend;
> - }
> - offset = areq->cryptlen - ivsize;
> - scatterwalk_map_and_copy(backup_iv, areq->src, offset,
> - ivsize, 0);
> - }
> - }
> - if (keyivlen == AES_KEYSIZE_192)
> - keyivlen = AES_MAX_KEY_SIZE;
> -
> - phykeyiv = dma_map_single(mc->dev, bkeyiv, keyivlen,
> + ctx.keyiv.addr = dma_map_single(mc->dev, op->key, ctx.keyiv.len,
> DMA_TO_DEVICE);
> - err = dma_mapping_error(mc->dev, phykeyiv);
> + err = dma_mapping_error(mc->dev, ctx.keyiv.addr);
> if (err) {
> dev_err(mc->dev, "Cannot DMA MAP KEY IV\n");
> goto theend;
> }
>
> - tloffset = 0;
> - eat = 0;
> - i = 0;
> - while (keyivlen > eat) {
> - desc = &mc->chanlist[flow].tl[tloffset];
> - memset(desc, 0, sizeof(struct meson_desc));
> - todo = min(keyivlen - eat, 16u);
> - desc->t_src = cpu_to_le32(phykeyiv + i * 16);
> - desc->t_dst = cpu_to_le32(i * 16);
> - v = DESC_MODE_KEY | DESC_OWN | 16;
> - desc->t_status = cpu_to_le32(v);
> -
> - eat += todo;
> - i++;
> - tloffset++;
> - }
> -
> - if (areq->src == areq->dst) {
> - nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
> - DMA_BIDIRECTIONAL);
> - if (!nr_sgs) {
> - dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
> - err = -EINVAL;
> - goto theend;
> - }
> - nr_sgd = nr_sgs;
> - } else {
> - nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
> - DMA_TO_DEVICE);
> - if (!nr_sgs || nr_sgs > MAXDESC - 3) {
> - dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
> - err = -EINVAL;
> - goto theend;
> - }
> - nr_sgd = dma_map_sg(mc->dev, areq->dst, sg_nents(areq->dst),
> - DMA_FROM_DEVICE);
> - if (!nr_sgd || nr_sgd > MAXDESC - 3) {
> - dev_err(mc->dev, "Invalid SG count %d\n", nr_sgd);
> - err = -EINVAL;
> - goto theend;
> - }
> - }
> -
> - src_sg = areq->src;
> - dst_sg = areq->dst;
> - len = areq->cryptlen;
> - while (src_sg) {
> - desc = &mc->chanlist[flow].tl[tloffset];
> - memset(desc, 0, sizeof(struct meson_desc));
> -
> - desc->t_src = cpu_to_le32(sg_dma_address(src_sg));
> - desc->t_dst = cpu_to_le32(sg_dma_address(dst_sg));
> - todo = min(len, sg_dma_len(src_sg));
> - v = op->keymode | DESC_OWN | todo | algt->blockmode;
> - if (rctx->op_dir)
> - v |= DESC_ENCRYPTION;
> - len -= todo;
> -
> - if (!sg_next(src_sg))
> - v |= DESC_LAST;
> - desc->t_status = cpu_to_le32(v);
> - tloffset++;
> - src_sg = sg_next(src_sg);
> - dst_sg = sg_next(dst_sg);
> - }
> + err = meson_map_scatterlist(areq, mc);
> + if (err)
> + goto theend;
>
> - reinit_completion(&mc->chanlist[flow].complete);
> - meson_dma_start(mc, flow);
> + ctx.tloffset = 0;
>
> - err = wait_for_completion_interruptible_timeout(&mc->chanlist[flow].complete,
> - msecs_to_jiffies(500));
> - if (err == 0) {
> - dev_err(mc->dev, "DMA timeout for flow %d\n", flow);
> - err = -EINVAL;
> - } else if (err < 0) {
> - dev_err(mc->dev, "Waiting for DMA completion is failed (%d)\n", err);
> - } else {
> - /* No error */
> - err = 0;
> - }
> + while (ctx.cryptlen) {
> + meson_setup_keyiv_descs(&ctx);
>
> - dma_unmap_single(mc->dev, phykeyiv, keyivlen, DMA_TO_DEVICE);
> + if (meson_setup_data_descs(&ctx)) {
> + err = meson_kick_hardware(&ctx);
> + if (err)
> + break;
> + }
>
> - if (areq->src == areq->dst) {
> - dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_BIDIRECTIONAL);
> - } else {
> - dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
> - dma_unmap_sg(mc->dev, areq->dst, sg_nents(areq->dst), DMA_FROM_DEVICE);
> - }
> + if (ctx.src_offset == sg_dma_len(ctx.src_sg)) {
> + ctx.src_offset = 0;
> + ctx.src_sg = sg_next(ctx.src_sg);
> + }
>
> - if (areq->iv && ivsize > 0) {
> - if (rctx->op_dir == MESON_DECRYPT) {
> - memcpy(areq->iv, backup_iv, ivsize);
> - } else {
> - scatterwalk_map_and_copy(areq->iv, areq->dst,
> - areq->cryptlen - ivsize,
> - ivsize, 0);
> + if (ctx.dst_offset == sg_dma_len(ctx.dst_sg)) {
> + ctx.dst_offset = 0;
> + ctx.dst_sg = sg_next(ctx.dst_sg);
> }
> }
> +
> + dma_unmap_single(mc->dev, ctx.keyiv.addr, ctx.keyiv.len, DMA_TO_DEVICE);
> + meson_unmap_scatterlist(areq, mc);
> +
> theend:
> - kfree_sensitive(bkeyiv);
> - kfree_sensitive(backup_iv);
> + kfree_sensitive(op->key);
>
> return err;
> }
> diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
> index 22ff2768b5e5..f93e14f5717d 100644
> --- a/drivers/crypto/amlogic/amlogic-gxl-core.c
> +++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
> @@ -199,6 +199,7 @@ static const struct meson_pdata meson_gxl_pdata = {
> .descs_reg = 0x0,
> .status_reg = 0x4,
> .need_clk = true,
> + .setup_desc_cnt = 3,
> };
>
> static const struct of_device_id meson_crypto_of_match_table[] = {
> diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
> index a0d83c82906d..eb2f8cd72b65 100644
> --- a/drivers/crypto/amlogic/amlogic-gxl.h
> +++ b/drivers/crypto/amlogic/amlogic-gxl.h
> @@ -83,11 +83,13 @@ struct meson_flow {
> * @reg_descs: offset to descriptors register
> * @reg_status: offset to status register
> * @need_clk: clock input is needed
> + * @setup_desc_cnt: number of setup descriptor to configure.
> */
> struct meson_pdata {
> u32 descs_reg;
> u32 status_reg;
> bool need_clk;
> + u32 setup_desc_cnt;
> };
>
> /*
Thanks,
Neil
More information about the linux-amlogic
mailing list