[PATCH 1/2] zstd: Add zstd decompression logic

Pingfan Liu piliu at redhat.com
Wed Dec 11 04:16:15 PST 2024


On Wed, Dec 11, 2024 at 10:58 AM Jeremy Linton <jeremy.linton at arm.com> wrote:
>
> zstd is becoming a popular zlib replacement because it both tends to
> get a better compression ratio as well performs considerably
> better. As such, zstd is now one of the options that can be used to
> compress the Linux kernel.
>
> Lets enable it by using a system provided shared library, creating the
> required boilerplate to match the existing zlib/lzma function
> signatures, and creating build options to enable/disable it.
>
> Signed-off-by: Jeremy Linton <jeremy.linton at arm.com>
> ---
>  configure.ac       |  10 +++
>  kexec/Makefile     |   4 +-
>  kexec/kexec-zstd.h |   7 ++
>  kexec/zstd.c       | 191 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 211 insertions(+), 1 deletion(-)
>  create mode 100644 kexec/kexec-zstd.h
>  create mode 100644 kexec/zstd.c
>
> diff --git a/configure.ac b/configure.ac
> index e030302..43cdb64 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -106,6 +106,9 @@ AC_ARG_WITH([zlib], AS_HELP_STRING([--without-zlib],[disable zlib support]),
>  AC_ARG_WITH([lzma], AS_HELP_STRING([--without-lzma],[disable lzma support]),
>         [ with_lzma="$withval"], [ with_lzma=yes ] )
>
> +AC_ARG_WITH([zstd], AS_HELP_STRING([--without-zstd],[disable zstd support]),
> +       [ with_zstd="$withval"], [ with_zstd=yes ] )
> +
>  AC_ARG_WITH([xen], AS_HELP_STRING([--without-xen],
>         [disable extended xen support]), [ with_xen="$withval"], [ with_xen=yes ] )
>
> @@ -180,6 +183,13 @@ if test "$with_lzma" = yes ; then
>                 AC_MSG_NOTICE([lzma support disabled]))])
>  fi
>
> +dnl See if I have a usable copy of zstd available
> +if test "$with_zstd" = yes ; then
> +       AC_CHECK_HEADER(zstd.h,
> +               [AC_CHECK_LIB(zstd, ZSTD_decompress, ,
> +               AC_MSG_NOTICE([zstd support disabled]))])
> +fi
> +
>  dnl find Xen control stack libraries
>  if test "$with_xen" = yes ; then
>         AC_CHECK_HEADER(xenctrl.h,
> diff --git a/kexec/Makefile b/kexec/Makefile
> index d4f26d7..e969d1e 100644
> --- a/kexec/Makefile
> +++ b/kexec/Makefile
> @@ -26,6 +26,7 @@ KEXEC_SRCS_base += kexec/crashdump-xen.c
>  KEXEC_SRCS_base += kexec/phys_arch.c
>  KEXEC_SRCS_base += kexec/lzma.c
>  KEXEC_SRCS_base += kexec/zlib.c
> +KEXEC_SRCS_base += kexec/zstd.c
>  KEXEC_SRCS_base += kexec/kexec-xen.c
>  KEXEC_SRCS_base += kexec/symbols.c
>
> @@ -36,7 +37,8 @@ dist += kexec/Makefile                                                \
>         kexec/crashdump.h kexec/firmware_memmap.h               \
>         kexec/kexec-elf-boot.h                                  \
>         kexec/kexec-elf.h kexec/kexec-sha256.h                  \
> -       kexec/kexec-zlib.h kexec/kexec-lzma.h                   \
> +       kexec/kexec-zlib.h kexec/kexec-lzma.h                   \
> +       kexec/kexec-zstd.h                                      \
>         kexec/kexec-xen.h                                       \
>         kexec/kexec-syscall.h kexec/kexec.h kexec/kexec.8
>
> diff --git a/kexec/kexec-zstd.h b/kexec/kexec-zstd.h
> new file mode 100644
> index 0000000..c5dbb24
> --- /dev/null
> +++ b/kexec/kexec-zstd.h
> @@ -0,0 +1,7 @@
> +#ifndef __KEXEC_ZSTD_H
> +#define __KEXEC_ZSTD_H
> +
> +#include <sys/types.h>
> +
> +char *zstd_decompress_file(const char *filename, off_t *r_size);
> +#endif /* __KEXEC_ZSTD_H */
> diff --git a/kexec/zstd.c b/kexec/zstd.c
> new file mode 100644
> index 0000000..9309251
> --- /dev/null
> +++ b/kexec/zstd.c
> @@ -0,0 +1,191 @@
> +#include "kexec-zstd.h"
> +#include "kexec.h"
> +
> +#include "config.h"
> +
> +#ifdef HAVE_LIBZSTD
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +#include <zstd.h>
> +
> +
> +/*
> + * Reimplementation of private function available if lib is statically linked.
> + * Remove when said function becomes public.
> + */
> +unsigned ZSTD_isFrame(const void* buffer, size_t size)
> +{
> +       uint8_t *buf = (uint8_t *)buffer;
> +       /* Magic zstd frame header value */
> +       if ((buf[0] == 0x28) &&
> +           (buf[1] == 0xB5) &&
> +           (buf[2] == 0x2F) &&
> +           (buf[3] == 0xFD))
> +               return 1;
> +
> +       return 0;
> +}
> +
> +/*
> + * A guess at the nax compression ratio for buffer overallocation
> + * Real values are frequently around 4:1, if this is wrong
> + * it just results in buffer reallocation and the decompression
> + * operation being restarted from where it left off.
> + */
> +#define EXPECTED_RATIO 8
> +
> +/*
> + * This supports the streaming compression mode the kernel uses
> + * that can result in multiple zstd frames comprising a single
> + * compressed image. In order too be a bit more efficient than
> + * the libz/lzma implementations, we attempt to discover the input
> + * and output image sizes before performing the decompression.
> + * But, in streaming mode the first frame may not have a length
> + * or it seems the length could be incorrect if multiple frames
> + * are appended together. So, its written with buffer resize logic
> + * but a guess at the compression ratio is made to avoid the resize/copy
> + * operation. Ideally this code efficiently allocates the
> + * correct input buffer, and no more than 2-3x the output buffer
> + * size so that it can perform the decompress operation with a
> + * single decompress call.
> + */
> +char *zstd_decompress_file(const char *filename, off_t *r_size)
> +{
> +       bool again;
> +       void *cBuff = NULL;
> +       FILE *fp = NULL; /* use c streams to match gzip/lzma logic */
> +       struct stat fp_stats;
> +       uint8_t magic[4];
> +       ZSTD_DCtx* dctx = NULL;
> +
> +       ZSTD_outBuffer output = { NULL, 0, 0 };
> +       ZSTD_inBuffer input = { NULL, 0, 0 };
> +
> +
> +       *r_size = 0;
> +       if (!filename) {
> +               return NULL;
> +       }
> +       if (stat(filename, &fp_stats))
> +       {
            ^^^ inconsistent coding style. I prefer the kernel coding style.

> +               dbgprintf("Cannot stat `%s'\n", filename);
> +               return NULL;
> +       }
> +       if (fp_stats.st_size < sizeof(magic))
> +       {
            ^^^ ditto and the following brace.


Thanks,

Pingfan

> +               dbgprintf("short file\n");
> +               return NULL;
> +       }
> +       input.size = fp_stats.st_size;
> +
> +       fp = fopen(filename, "rb");
> +       if (fp == 0) {
> +               dbgprintf("Cannot open `%s'\n", filename);
> +               goto fail;
> +       }
> +       /* before we read the whole buffer see if this looks like a zstd frame */
> +       if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) {
> +               dbgprintf("Unable to read zstd header\n");
> +               goto fail;
> +       }
> +
> +       if (!ZSTD_isFrame((void*)&magic, sizeof(magic)))
> +       {
> +               dbgprintf("Not zstd compressed\n");
> +               goto fail;
> +       }
> +
> +       cBuff = xmalloc(input.size);
> +       input.src = cBuff; /* use cBuff ptr to avoid const/mismatches */
> +       rewind(fp);
> +       if (fread(cBuff, 1, input.size, fp) != input.size)
> +       {
> +               dbgprintf("Unable to read compressed data\n");
> +               goto fail;
> +       }
> +       fclose(fp);
> +       fp = NULL;
> +
> +
> +       output.size = ZSTD_getFrameContentSize(input.src, input.size);
> +       if (output.size == ZSTD_CONTENTSIZE_ERROR)
> +       {
> +               dbgprintf("not compressed by zstd\n");
> +               goto fail;
> +       }
> +
> +       if (output.size == ZSTD_CONTENTSIZE_UNKNOWN)
> +       {
> +               /*
> +                * The compressed size is an optional field in the header
> +                * So we guess at the compression ratio to avoid reallocating
> +                * the buffer, but this can fail so we still have code to
> +                * handle that case.
> +                */
> +               dbgprintf("original zstd size unknown!\n");
> +               output.size = fp_stats.st_size * EXPECTED_RATIO;
> +       }
> +       output.dst = xmalloc(output.size);
> +
> +       dctx = ZSTD_createDCtx();
> +       if (dctx == NULL) {
> +               dbgprintf("zstd context allocation error\n");
> +               goto fail;
> +       }
> +
> +       do {
> +               again = false;
> +               size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
> +               if (ZSTD_isError(ret)) {
> +                       dbgprintf("zstd error %s\n", ZSTD_getErrorName(ret));
> +                       goto fail;
> +               }
> +               dbgprintf("zstd decompressed to input=%ld output=%ld\n", input.pos, output.pos);
> +               if (output.pos == output.size) {
> +                       /* uh the output buffer wasn't large enough, double it */
> +                       output.size <<= 1;
> +                       output.dst = xrealloc(output.dst, output.size);
> +                       /*
> +                        * handle case where all the input was consumed but not all the
> +                        * output was written because of insufficient output buffer
> +                        */
> +                       again = true;
> +               }
> +
> +       } while ((input.pos < input.size) || again);
> +
> +       free(cBuff);
> +       ZSTD_freeDCtx(dctx);
> +
> +       *r_size = output.pos;
> +
> +       return output.dst;
> +
> +fail:
> +       if (fp)
> +               fclose(fp);
> +       if (dctx)
> +               ZSTD_freeDCtx(dctx);
> +       if (output.dst)
> +               free(output.dst);
> +       if (cBuff)
> +               free(cBuff);
> +
> +       return NULL;
> +
> +}
> +#else
> +
> +char *zstd_decompress_file(const char *filename, off_t *r_size)
> +{
> +       return NULL;
> +}
> +#endif
> --
> 2.47.0
>
>




More information about the kexec mailing list