[PATCH 1/2] zstd: Add zstd decompression logic
Pingfan Liu
piliu at redhat.com
Wed Dec 11 04:16:15 PST 2024
On Wed, Dec 11, 2024 at 10:58 AM Jeremy Linton <jeremy.linton at arm.com> wrote:
>
> zstd is becoming a popular zlib replacement because it both tends to
> get a better compression ratio as well performs considerably
> better. As such, zstd is now one of the options that can be used to
> compress the Linux kernel.
>
> Lets enable it by using a system provided shared library, creating the
> required boilerplate to match the existing zlib/lzma function
> signatures, and creating build options to enable/disable it.
>
> Signed-off-by: Jeremy Linton <jeremy.linton at arm.com>
> ---
> configure.ac | 10 +++
> kexec/Makefile | 4 +-
> kexec/kexec-zstd.h | 7 ++
> kexec/zstd.c | 191 +++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 211 insertions(+), 1 deletion(-)
> create mode 100644 kexec/kexec-zstd.h
> create mode 100644 kexec/zstd.c
>
> diff --git a/configure.ac b/configure.ac
> index e030302..43cdb64 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -106,6 +106,9 @@ AC_ARG_WITH([zlib], AS_HELP_STRING([--without-zlib],[disable zlib support]),
> AC_ARG_WITH([lzma], AS_HELP_STRING([--without-lzma],[disable lzma support]),
> [ with_lzma="$withval"], [ with_lzma=yes ] )
>
> +AC_ARG_WITH([zstd], AS_HELP_STRING([--without-zstd],[disable zstd support]),
> + [ with_zstd="$withval"], [ with_zstd=yes ] )
> +
> AC_ARG_WITH([xen], AS_HELP_STRING([--without-xen],
> [disable extended xen support]), [ with_xen="$withval"], [ with_xen=yes ] )
>
> @@ -180,6 +183,13 @@ if test "$with_lzma" = yes ; then
> AC_MSG_NOTICE([lzma support disabled]))])
> fi
>
> +dnl See if I have a usable copy of zstd available
> +if test "$with_zstd" = yes ; then
> + AC_CHECK_HEADER(zstd.h,
> + [AC_CHECK_LIB(zstd, ZSTD_decompress, ,
> + AC_MSG_NOTICE([zstd support disabled]))])
> +fi
> +
> dnl find Xen control stack libraries
> if test "$with_xen" = yes ; then
> AC_CHECK_HEADER(xenctrl.h,
> diff --git a/kexec/Makefile b/kexec/Makefile
> index d4f26d7..e969d1e 100644
> --- a/kexec/Makefile
> +++ b/kexec/Makefile
> @@ -26,6 +26,7 @@ KEXEC_SRCS_base += kexec/crashdump-xen.c
> KEXEC_SRCS_base += kexec/phys_arch.c
> KEXEC_SRCS_base += kexec/lzma.c
> KEXEC_SRCS_base += kexec/zlib.c
> +KEXEC_SRCS_base += kexec/zstd.c
> KEXEC_SRCS_base += kexec/kexec-xen.c
> KEXEC_SRCS_base += kexec/symbols.c
>
> @@ -36,7 +37,8 @@ dist += kexec/Makefile \
> kexec/crashdump.h kexec/firmware_memmap.h \
> kexec/kexec-elf-boot.h \
> kexec/kexec-elf.h kexec/kexec-sha256.h \
> - kexec/kexec-zlib.h kexec/kexec-lzma.h \
> + kexec/kexec-zlib.h kexec/kexec-lzma.h \
> + kexec/kexec-zstd.h \
> kexec/kexec-xen.h \
> kexec/kexec-syscall.h kexec/kexec.h kexec/kexec.8
>
> diff --git a/kexec/kexec-zstd.h b/kexec/kexec-zstd.h
> new file mode 100644
> index 0000000..c5dbb24
> --- /dev/null
> +++ b/kexec/kexec-zstd.h
> @@ -0,0 +1,7 @@
> +#ifndef __KEXEC_ZSTD_H
> +#define __KEXEC_ZSTD_H
> +
> +#include <sys/types.h>
> +
> +char *zstd_decompress_file(const char *filename, off_t *r_size);
> +#endif /* __KEXEC_ZSTD_H */
> diff --git a/kexec/zstd.c b/kexec/zstd.c
> new file mode 100644
> index 0000000..9309251
> --- /dev/null
> +++ b/kexec/zstd.c
> @@ -0,0 +1,191 @@
> +#include "kexec-zstd.h"
> +#include "kexec.h"
> +
> +#include "config.h"
> +
> +#ifdef HAVE_LIBZSTD
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +#include <zstd.h>
> +
> +
> +/*
> + * Reimplementation of private function available if lib is statically linked.
> + * Remove when said function becomes public.
> + */
> +unsigned ZSTD_isFrame(const void* buffer, size_t size)
> +{
> + uint8_t *buf = (uint8_t *)buffer;
> + /* Magic zstd frame header value */
> + if ((buf[0] == 0x28) &&
> + (buf[1] == 0xB5) &&
> + (buf[2] == 0x2F) &&
> + (buf[3] == 0xFD))
> + return 1;
> +
> + return 0;
> +}
> +
> +/*
> + * A guess at the nax compression ratio for buffer overallocation
> + * Real values are frequently around 4:1, if this is wrong
> + * it just results in buffer reallocation and the decompression
> + * operation being restarted from where it left off.
> + */
> +#define EXPECTED_RATIO 8
> +
> +/*
> + * This supports the streaming compression mode the kernel uses
> + * that can result in multiple zstd frames comprising a single
> + * compressed image. In order too be a bit more efficient than
> + * the libz/lzma implementations, we attempt to discover the input
> + * and output image sizes before performing the decompression.
> + * But, in streaming mode the first frame may not have a length
> + * or it seems the length could be incorrect if multiple frames
> + * are appended together. So, its written with buffer resize logic
> + * but a guess at the compression ratio is made to avoid the resize/copy
> + * operation. Ideally this code efficiently allocates the
> + * correct input buffer, and no more than 2-3x the output buffer
> + * size so that it can perform the decompress operation with a
> + * single decompress call.
> + */
> +char *zstd_decompress_file(const char *filename, off_t *r_size)
> +{
> + bool again;
> + void *cBuff = NULL;
> + FILE *fp = NULL; /* use c streams to match gzip/lzma logic */
> + struct stat fp_stats;
> + uint8_t magic[4];
> + ZSTD_DCtx* dctx = NULL;
> +
> + ZSTD_outBuffer output = { NULL, 0, 0 };
> + ZSTD_inBuffer input = { NULL, 0, 0 };
> +
> +
> + *r_size = 0;
> + if (!filename) {
> + return NULL;
> + }
> + if (stat(filename, &fp_stats))
> + {
^^^ inconsistent coding style. I prefer the kernel coding style.
> + dbgprintf("Cannot stat `%s'\n", filename);
> + return NULL;
> + }
> + if (fp_stats.st_size < sizeof(magic))
> + {
^^^ ditto and the following brace.
Thanks,
Pingfan
> + dbgprintf("short file\n");
> + return NULL;
> + }
> + input.size = fp_stats.st_size;
> +
> + fp = fopen(filename, "rb");
> + if (fp == 0) {
> + dbgprintf("Cannot open `%s'\n", filename);
> + goto fail;
> + }
> + /* before we read the whole buffer see if this looks like a zstd frame */
> + if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) {
> + dbgprintf("Unable to read zstd header\n");
> + goto fail;
> + }
> +
> + if (!ZSTD_isFrame((void*)&magic, sizeof(magic)))
> + {
> + dbgprintf("Not zstd compressed\n");
> + goto fail;
> + }
> +
> + cBuff = xmalloc(input.size);
> + input.src = cBuff; /* use cBuff ptr to avoid const/mismatches */
> + rewind(fp);
> + if (fread(cBuff, 1, input.size, fp) != input.size)
> + {
> + dbgprintf("Unable to read compressed data\n");
> + goto fail;
> + }
> + fclose(fp);
> + fp = NULL;
> +
> +
> + output.size = ZSTD_getFrameContentSize(input.src, input.size);
> + if (output.size == ZSTD_CONTENTSIZE_ERROR)
> + {
> + dbgprintf("not compressed by zstd\n");
> + goto fail;
> + }
> +
> + if (output.size == ZSTD_CONTENTSIZE_UNKNOWN)
> + {
> + /*
> + * The compressed size is an optional field in the header
> + * So we guess at the compression ratio to avoid reallocating
> + * the buffer, but this can fail so we still have code to
> + * handle that case.
> + */
> + dbgprintf("original zstd size unknown!\n");
> + output.size = fp_stats.st_size * EXPECTED_RATIO;
> + }
> + output.dst = xmalloc(output.size);
> +
> + dctx = ZSTD_createDCtx();
> + if (dctx == NULL) {
> + dbgprintf("zstd context allocation error\n");
> + goto fail;
> + }
> +
> + do {
> + again = false;
> + size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
> + if (ZSTD_isError(ret)) {
> + dbgprintf("zstd error %s\n", ZSTD_getErrorName(ret));
> + goto fail;
> + }
> + dbgprintf("zstd decompressed to input=%ld output=%ld\n", input.pos, output.pos);
> + if (output.pos == output.size) {
> + /* uh the output buffer wasn't large enough, double it */
> + output.size <<= 1;
> + output.dst = xrealloc(output.dst, output.size);
> + /*
> + * handle case where all the input was consumed but not all the
> + * output was written because of insufficient output buffer
> + */
> + again = true;
> + }
> +
> + } while ((input.pos < input.size) || again);
> +
> + free(cBuff);
> + ZSTD_freeDCtx(dctx);
> +
> + *r_size = output.pos;
> +
> + return output.dst;
> +
> +fail:
> + if (fp)
> + fclose(fp);
> + if (dctx)
> + ZSTD_freeDCtx(dctx);
> + if (output.dst)
> + free(output.dst);
> + if (cBuff)
> + free(cBuff);
> +
> + return NULL;
> +
> +}
> +#else
> +
> +char *zstd_decompress_file(const char *filename, off_t *r_size)
> +{
> + return NULL;
> +}
> +#endif
> --
> 2.47.0
>
>
More information about the kexec
mailing list