[PATCH 1/2] zstd: Add zstd decompression logic
Jeremy Linton
jeremy.linton at arm.com
Wed Dec 11 06:53:04 PST 2024
Hi,
On 12/11/24 6:16 AM, Pingfan Liu wrote:
> On Wed, Dec 11, 2024 at 10:58 AM Jeremy Linton <jeremy.linton at arm.com> wrote:
>>
>> zstd is becoming a popular zlib replacement because it both tends to
>> get a better compression ratio as well performs considerably
>> better. As such, zstd is now one of the options that can be used to
>> compress the Linux kernel.
>>
>> Lets enable it by using a system provided shared library, creating the
>> required boilerplate to match the existing zlib/lzma function
>> signatures, and creating build options to enable/disable it.
>>
>> Signed-off-by: Jeremy Linton <jeremy.linton at arm.com>
>> ---
>> configure.ac | 10 +++
>> kexec/Makefile | 4 +-
>> kexec/kexec-zstd.h | 7 ++
>> kexec/zstd.c | 191 +++++++++++++++++++++++++++++++++++++++++++++
>> 4 files changed, 211 insertions(+), 1 deletion(-)
>> create mode 100644 kexec/kexec-zstd.h
>> create mode 100644 kexec/zstd.c
>>
>> diff --git a/configure.ac b/configure.ac
>> index e030302..43cdb64 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -106,6 +106,9 @@ AC_ARG_WITH([zlib], AS_HELP_STRING([--without-zlib],[disable zlib support]),
>> AC_ARG_WITH([lzma], AS_HELP_STRING([--without-lzma],[disable lzma support]),
>> [ with_lzma="$withval"], [ with_lzma=yes ] )
>>
>> +AC_ARG_WITH([zstd], AS_HELP_STRING([--without-zstd],[disable zstd support]),
>> + [ with_zstd="$withval"], [ with_zstd=yes ] )
>> +
>> AC_ARG_WITH([xen], AS_HELP_STRING([--without-xen],
>> [disable extended xen support]), [ with_xen="$withval"], [ with_xen=yes ] )
>>
>> @@ -180,6 +183,13 @@ if test "$with_lzma" = yes ; then
>> AC_MSG_NOTICE([lzma support disabled]))])
>> fi
>>
>> +dnl See if I have a usable copy of zstd available
>> +if test "$with_zstd" = yes ; then
>> + AC_CHECK_HEADER(zstd.h,
>> + [AC_CHECK_LIB(zstd, ZSTD_decompress, ,
>> + AC_MSG_NOTICE([zstd support disabled]))])
>> +fi
>> +
>> dnl find Xen control stack libraries
>> if test "$with_xen" = yes ; then
>> AC_CHECK_HEADER(xenctrl.h,
>> diff --git a/kexec/Makefile b/kexec/Makefile
>> index d4f26d7..e969d1e 100644
>> --- a/kexec/Makefile
>> +++ b/kexec/Makefile
>> @@ -26,6 +26,7 @@ KEXEC_SRCS_base += kexec/crashdump-xen.c
>> KEXEC_SRCS_base += kexec/phys_arch.c
>> KEXEC_SRCS_base += kexec/lzma.c
>> KEXEC_SRCS_base += kexec/zlib.c
>> +KEXEC_SRCS_base += kexec/zstd.c
>> KEXEC_SRCS_base += kexec/kexec-xen.c
>> KEXEC_SRCS_base += kexec/symbols.c
>>
>> @@ -36,7 +37,8 @@ dist += kexec/Makefile \
>> kexec/crashdump.h kexec/firmware_memmap.h \
>> kexec/kexec-elf-boot.h \
>> kexec/kexec-elf.h kexec/kexec-sha256.h \
>> - kexec/kexec-zlib.h kexec/kexec-lzma.h \
>> + kexec/kexec-zlib.h kexec/kexec-lzma.h \
>> + kexec/kexec-zstd.h \
>> kexec/kexec-xen.h \
>> kexec/kexec-syscall.h kexec/kexec.h kexec/kexec.8
>>
>> diff --git a/kexec/kexec-zstd.h b/kexec/kexec-zstd.h
>> new file mode 100644
>> index 0000000..c5dbb24
>> --- /dev/null
>> +++ b/kexec/kexec-zstd.h
>> @@ -0,0 +1,7 @@
>> +#ifndef __KEXEC_ZSTD_H
>> +#define __KEXEC_ZSTD_H
>> +
>> +#include <sys/types.h>
>> +
>> +char *zstd_decompress_file(const char *filename, off_t *r_size);
>> +#endif /* __KEXEC_ZSTD_H */
>> diff --git a/kexec/zstd.c b/kexec/zstd.c
>> new file mode 100644
>> index 0000000..9309251
>> --- /dev/null
>> +++ b/kexec/zstd.c
>> @@ -0,0 +1,191 @@
>> +#include "kexec-zstd.h"
>> +#include "kexec.h"
>> +
>> +#include "config.h"
>> +
>> +#ifdef HAVE_LIBZSTD
>> +#include <stdio.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <errno.h>
>> +#include <limits.h>
>> +#include <stdbool.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <unistd.h>
>> +#include <ctype.h>
>> +#include <zstd.h>
>> +
>> +
>> +/*
>> + * Reimplementation of private function available if lib is statically linked.
>> + * Remove when said function becomes public.
>> + */
>> +unsigned ZSTD_isFrame(const void* buffer, size_t size)
>> +{
>> + uint8_t *buf = (uint8_t *)buffer;
>> + /* Magic zstd frame header value */
>> + if ((buf[0] == 0x28) &&
>> + (buf[1] == 0xB5) &&
>> + (buf[2] == 0x2F) &&
>> + (buf[3] == 0xFD))
>> + return 1;
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * A guess at the nax compression ratio for buffer overallocation
>> + * Real values are frequently around 4:1, if this is wrong
>> + * it just results in buffer reallocation and the decompression
>> + * operation being restarted from where it left off.
>> + */
>> +#define EXPECTED_RATIO 8
>> +
>> +/*
>> + * This supports the streaming compression mode the kernel uses
>> + * that can result in multiple zstd frames comprising a single
>> + * compressed image. In order too be a bit more efficient than
>> + * the libz/lzma implementations, we attempt to discover the input
>> + * and output image sizes before performing the decompression.
>> + * But, in streaming mode the first frame may not have a length
>> + * or it seems the length could be incorrect if multiple frames
>> + * are appended together. So, its written with buffer resize logic
>> + * but a guess at the compression ratio is made to avoid the resize/copy
>> + * operation. Ideally this code efficiently allocates the
>> + * correct input buffer, and no more than 2-3x the output buffer
>> + * size so that it can perform the decompress operation with a
>> + * single decompress call.
>> + */
>> +char *zstd_decompress_file(const char *filename, off_t *r_size)
>> +{
>> + bool again;
>> + void *cBuff = NULL;
>> + FILE *fp = NULL; /* use c streams to match gzip/lzma logic */
>> + struct stat fp_stats;
>> + uint8_t magic[4];
>> + ZSTD_DCtx* dctx = NULL;
>> +
>> + ZSTD_outBuffer output = { NULL, 0, 0 };
>> + ZSTD_inBuffer input = { NULL, 0, 0 };
>> +
>> +
>> + *r_size = 0;
>> + if (!filename) {
>> + return NULL;
>> + }
>> + if (stat(filename, &fp_stats))
>> + {
> ^^^ inconsistent coding style. I prefer the kernel coding style.
I think I noted to someone a while back I've lost my ability to see this.
But you right, I will repost it with consistent bracing style.
Thanks for taking a look at this.
>
>> + dbgprintf("Cannot stat `%s'\n", filename);
>> + return NULL;
>> + }
>> + if (fp_stats.st_size < sizeof(magic))
>> + {
> ^^^ ditto and the following brace.
>
>
> Thanks,
>
> Pingfan
>
>> + dbgprintf("short file\n");
>> + return NULL;
>> + }
>> + input.size = fp_stats.st_size;
>> +
>> + fp = fopen(filename, "rb");
>> + if (fp == 0) {
>> + dbgprintf("Cannot open `%s'\n", filename);
>> + goto fail;
>> + }
>> + /* before we read the whole buffer see if this looks like a zstd frame */
>> + if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) {
>> + dbgprintf("Unable to read zstd header\n");
>> + goto fail;
>> + }
>> +
>> + if (!ZSTD_isFrame((void*)&magic, sizeof(magic)))
>> + {
>> + dbgprintf("Not zstd compressed\n");
>> + goto fail;
>> + }
>> +
>> + cBuff = xmalloc(input.size);
>> + input.src = cBuff; /* use cBuff ptr to avoid const/mismatches */
>> + rewind(fp);
>> + if (fread(cBuff, 1, input.size, fp) != input.size)
>> + {
>> + dbgprintf("Unable to read compressed data\n");
>> + goto fail;
>> + }
>> + fclose(fp);
>> + fp = NULL;
>> +
>> +
>> + output.size = ZSTD_getFrameContentSize(input.src, input.size);
>> + if (output.size == ZSTD_CONTENTSIZE_ERROR)
>> + {
>> + dbgprintf("not compressed by zstd\n");
>> + goto fail;
>> + }
>> +
>> + if (output.size == ZSTD_CONTENTSIZE_UNKNOWN)
>> + {
>> + /*
>> + * The compressed size is an optional field in the header
>> + * So we guess at the compression ratio to avoid reallocating
>> + * the buffer, but this can fail so we still have code to
>> + * handle that case.
>> + */
>> + dbgprintf("original zstd size unknown!\n");
>> + output.size = fp_stats.st_size * EXPECTED_RATIO;
>> + }
>> + output.dst = xmalloc(output.size);
>> +
>> + dctx = ZSTD_createDCtx();
>> + if (dctx == NULL) {
>> + dbgprintf("zstd context allocation error\n");
>> + goto fail;
>> + }
>> +
>> + do {
>> + again = false;
>> + size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
>> + if (ZSTD_isError(ret)) {
>> + dbgprintf("zstd error %s\n", ZSTD_getErrorName(ret));
>> + goto fail;
>> + }
>> + dbgprintf("zstd decompressed to input=%ld output=%ld\n", input.pos, output.pos);
>> + if (output.pos == output.size) {
>> + /* uh the output buffer wasn't large enough, double it */
>> + output.size <<= 1;
>> + output.dst = xrealloc(output.dst, output.size);
>> + /*
>> + * handle case where all the input was consumed but not all the
>> + * output was written because of insufficient output buffer
>> + */
>> + again = true;
>> + }
>> +
>> + } while ((input.pos < input.size) || again);
>> +
>> + free(cBuff);
>> + ZSTD_freeDCtx(dctx);
>> +
>> + *r_size = output.pos;
>> +
>> + return output.dst;
>> +
>> +fail:
>> + if (fp)
>> + fclose(fp);
>> + if (dctx)
>> + ZSTD_freeDCtx(dctx);
>> + if (output.dst)
>> + free(output.dst);
>> + if (cBuff)
>> + free(cBuff);
>> +
>> + return NULL;
>> +
>> +}
>> +#else
>> +
>> +char *zstd_decompress_file(const char *filename, off_t *r_size)
>> +{
>> + return NULL;
>> +}
>> +#endif
>> --
>> 2.47.0
>>
>>
>
More information about the kexec
mailing list