[PATCH 1/2] zstd: Add zstd decompression logic

Jeremy Linton jeremy.linton at arm.com
Wed Dec 11 06:53:04 PST 2024


Hi,

On 12/11/24 6:16 AM, Pingfan Liu wrote:
> On Wed, Dec 11, 2024 at 10:58 AM Jeremy Linton <jeremy.linton at arm.com> wrote:
>>
>> zstd is becoming a popular zlib replacement because it both tends to
>> get a better compression ratio as well performs considerably
>> better. As such, zstd is now one of the options that can be used to
>> compress the Linux kernel.
>>
>> Lets enable it by using a system provided shared library, creating the
>> required boilerplate to match the existing zlib/lzma function
>> signatures, and creating build options to enable/disable it.
>>
>> Signed-off-by: Jeremy Linton <jeremy.linton at arm.com>
>> ---
>>   configure.ac       |  10 +++
>>   kexec/Makefile     |   4 +-
>>   kexec/kexec-zstd.h |   7 ++
>>   kexec/zstd.c       | 191 +++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 211 insertions(+), 1 deletion(-)
>>   create mode 100644 kexec/kexec-zstd.h
>>   create mode 100644 kexec/zstd.c
>>
>> diff --git a/configure.ac b/configure.ac
>> index e030302..43cdb64 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -106,6 +106,9 @@ AC_ARG_WITH([zlib], AS_HELP_STRING([--without-zlib],[disable zlib support]),
>>   AC_ARG_WITH([lzma], AS_HELP_STRING([--without-lzma],[disable lzma support]),
>>          [ with_lzma="$withval"], [ with_lzma=yes ] )
>>
>> +AC_ARG_WITH([zstd], AS_HELP_STRING([--without-zstd],[disable zstd support]),
>> +       [ with_zstd="$withval"], [ with_zstd=yes ] )
>> +
>>   AC_ARG_WITH([xen], AS_HELP_STRING([--without-xen],
>>          [disable extended xen support]), [ with_xen="$withval"], [ with_xen=yes ] )
>>
>> @@ -180,6 +183,13 @@ if test "$with_lzma" = yes ; then
>>                  AC_MSG_NOTICE([lzma support disabled]))])
>>   fi
>>
>> +dnl See if I have a usable copy of zstd available
>> +if test "$with_zstd" = yes ; then
>> +       AC_CHECK_HEADER(zstd.h,
>> +               [AC_CHECK_LIB(zstd, ZSTD_decompress, ,
>> +               AC_MSG_NOTICE([zstd support disabled]))])
>> +fi
>> +
>>   dnl find Xen control stack libraries
>>   if test "$with_xen" = yes ; then
>>          AC_CHECK_HEADER(xenctrl.h,
>> diff --git a/kexec/Makefile b/kexec/Makefile
>> index d4f26d7..e969d1e 100644
>> --- a/kexec/Makefile
>> +++ b/kexec/Makefile
>> @@ -26,6 +26,7 @@ KEXEC_SRCS_base += kexec/crashdump-xen.c
>>   KEXEC_SRCS_base += kexec/phys_arch.c
>>   KEXEC_SRCS_base += kexec/lzma.c
>>   KEXEC_SRCS_base += kexec/zlib.c
>> +KEXEC_SRCS_base += kexec/zstd.c
>>   KEXEC_SRCS_base += kexec/kexec-xen.c
>>   KEXEC_SRCS_base += kexec/symbols.c
>>
>> @@ -36,7 +37,8 @@ dist += kexec/Makefile                                                \
>>          kexec/crashdump.h kexec/firmware_memmap.h               \
>>          kexec/kexec-elf-boot.h                                  \
>>          kexec/kexec-elf.h kexec/kexec-sha256.h                  \
>> -       kexec/kexec-zlib.h kexec/kexec-lzma.h                   \
>> +       kexec/kexec-zlib.h kexec/kexec-lzma.h                   \
>> +       kexec/kexec-zstd.h                                      \
>>          kexec/kexec-xen.h                                       \
>>          kexec/kexec-syscall.h kexec/kexec.h kexec/kexec.8
>>
>> diff --git a/kexec/kexec-zstd.h b/kexec/kexec-zstd.h
>> new file mode 100644
>> index 0000000..c5dbb24
>> --- /dev/null
>> +++ b/kexec/kexec-zstd.h
>> @@ -0,0 +1,7 @@
>> +#ifndef __KEXEC_ZSTD_H
>> +#define __KEXEC_ZSTD_H
>> +
>> +#include <sys/types.h>
>> +
>> +char *zstd_decompress_file(const char *filename, off_t *r_size);
>> +#endif /* __KEXEC_ZSTD_H */
>> diff --git a/kexec/zstd.c b/kexec/zstd.c
>> new file mode 100644
>> index 0000000..9309251
>> --- /dev/null
>> +++ b/kexec/zstd.c
>> @@ -0,0 +1,191 @@
>> +#include "kexec-zstd.h"
>> +#include "kexec.h"
>> +
>> +#include "config.h"
>> +
>> +#ifdef HAVE_LIBZSTD
>> +#include <stdio.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <errno.h>
>> +#include <limits.h>
>> +#include <stdbool.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <unistd.h>
>> +#include <ctype.h>
>> +#include <zstd.h>
>> +
>> +
>> +/*
>> + * Reimplementation of private function available if lib is statically linked.
>> + * Remove when said function becomes public.
>> + */
>> +unsigned ZSTD_isFrame(const void* buffer, size_t size)
>> +{
>> +       uint8_t *buf = (uint8_t *)buffer;
>> +       /* Magic zstd frame header value */
>> +       if ((buf[0] == 0x28) &&
>> +           (buf[1] == 0xB5) &&
>> +           (buf[2] == 0x2F) &&
>> +           (buf[3] == 0xFD))
>> +               return 1;
>> +
>> +       return 0;
>> +}
>> +
>> +/*
>> + * A guess at the nax compression ratio for buffer overallocation
>> + * Real values are frequently around 4:1, if this is wrong
>> + * it just results in buffer reallocation and the decompression
>> + * operation being restarted from where it left off.
>> + */
>> +#define EXPECTED_RATIO 8
>> +
>> +/*
>> + * This supports the streaming compression mode the kernel uses
>> + * that can result in multiple zstd frames comprising a single
>> + * compressed image. In order too be a bit more efficient than
>> + * the libz/lzma implementations, we attempt to discover the input
>> + * and output image sizes before performing the decompression.
>> + * But, in streaming mode the first frame may not have a length
>> + * or it seems the length could be incorrect if multiple frames
>> + * are appended together. So, its written with buffer resize logic
>> + * but a guess at the compression ratio is made to avoid the resize/copy
>> + * operation. Ideally this code efficiently allocates the
>> + * correct input buffer, and no more than 2-3x the output buffer
>> + * size so that it can perform the decompress operation with a
>> + * single decompress call.
>> + */
>> +char *zstd_decompress_file(const char *filename, off_t *r_size)
>> +{
>> +       bool again;
>> +       void *cBuff = NULL;
>> +       FILE *fp = NULL; /* use c streams to match gzip/lzma logic */
>> +       struct stat fp_stats;
>> +       uint8_t magic[4];
>> +       ZSTD_DCtx* dctx = NULL;
>> +
>> +       ZSTD_outBuffer output = { NULL, 0, 0 };
>> +       ZSTD_inBuffer input = { NULL, 0, 0 };
>> +
>> +
>> +       *r_size = 0;
>> +       if (!filename) {
>> +               return NULL;
>> +       }
>> +       if (stat(filename, &fp_stats))
>> +       {
>              ^^^ inconsistent coding style. I prefer the kernel coding style.

I think I noted to someone a while back I've lost my ability to see this.

But you right, I will repost it with consistent bracing style.

Thanks for taking a look at this.


> 
>> +               dbgprintf("Cannot stat `%s'\n", filename);
>> +               return NULL;
>> +       }
>> +       if (fp_stats.st_size < sizeof(magic))
>> +       {
>              ^^^ ditto and the following brace.
> 
> 
> Thanks,
> 
> Pingfan
> 
>> +               dbgprintf("short file\n");
>> +               return NULL;
>> +       }
>> +       input.size = fp_stats.st_size;
>> +
>> +       fp = fopen(filename, "rb");
>> +       if (fp == 0) {
>> +               dbgprintf("Cannot open `%s'\n", filename);
>> +               goto fail;
>> +       }
>> +       /* before we read the whole buffer see if this looks like a zstd frame */
>> +       if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) {
>> +               dbgprintf("Unable to read zstd header\n");
>> +               goto fail;
>> +       }
>> +
>> +       if (!ZSTD_isFrame((void*)&magic, sizeof(magic)))
>> +       {
>> +               dbgprintf("Not zstd compressed\n");
>> +               goto fail;
>> +       }
>> +
>> +       cBuff = xmalloc(input.size);
>> +       input.src = cBuff; /* use cBuff ptr to avoid const/mismatches */
>> +       rewind(fp);
>> +       if (fread(cBuff, 1, input.size, fp) != input.size)
>> +       {
>> +               dbgprintf("Unable to read compressed data\n");
>> +               goto fail;
>> +       }
>> +       fclose(fp);
>> +       fp = NULL;
>> +
>> +
>> +       output.size = ZSTD_getFrameContentSize(input.src, input.size);
>> +       if (output.size == ZSTD_CONTENTSIZE_ERROR)
>> +       {
>> +               dbgprintf("not compressed by zstd\n");
>> +               goto fail;
>> +       }
>> +
>> +       if (output.size == ZSTD_CONTENTSIZE_UNKNOWN)
>> +       {
>> +               /*
>> +                * The compressed size is an optional field in the header
>> +                * So we guess at the compression ratio to avoid reallocating
>> +                * the buffer, but this can fail so we still have code to
>> +                * handle that case.
>> +                */
>> +               dbgprintf("original zstd size unknown!\n");
>> +               output.size = fp_stats.st_size * EXPECTED_RATIO;
>> +       }
>> +       output.dst = xmalloc(output.size);
>> +
>> +       dctx = ZSTD_createDCtx();
>> +       if (dctx == NULL) {
>> +               dbgprintf("zstd context allocation error\n");
>> +               goto fail;
>> +       }
>> +
>> +       do {
>> +               again = false;
>> +               size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
>> +               if (ZSTD_isError(ret)) {
>> +                       dbgprintf("zstd error %s\n", ZSTD_getErrorName(ret));
>> +                       goto fail;
>> +               }
>> +               dbgprintf("zstd decompressed to input=%ld output=%ld\n", input.pos, output.pos);
>> +               if (output.pos == output.size) {
>> +                       /* uh the output buffer wasn't large enough, double it */
>> +                       output.size <<= 1;
>> +                       output.dst = xrealloc(output.dst, output.size);
>> +                       /*
>> +                        * handle case where all the input was consumed but not all the
>> +                        * output was written because of insufficient output buffer
>> +                        */
>> +                       again = true;
>> +               }
>> +
>> +       } while ((input.pos < input.size) || again);
>> +
>> +       free(cBuff);
>> +       ZSTD_freeDCtx(dctx);
>> +
>> +       *r_size = output.pos;
>> +
>> +       return output.dst;
>> +
>> +fail:
>> +       if (fp)
>> +               fclose(fp);
>> +       if (dctx)
>> +               ZSTD_freeDCtx(dctx);
>> +       if (output.dst)
>> +               free(output.dst);
>> +       if (cBuff)
>> +               free(cBuff);
>> +
>> +       return NULL;
>> +
>> +}
>> +#else
>> +
>> +char *zstd_decompress_file(const char *filename, off_t *r_size)
>> +{
>> +       return NULL;
>> +}
>> +#endif
>> --
>> 2.47.0
>>
>>
> 




More information about the kexec mailing list