Re: Add LZ4 compression in pg_dump - Mailing list pgsql-hackers

From Justin Pryzby
Subject Re: Add LZ4 compression in pg_dump
Date
Msg-id ZBOfIs5AIiM7EDV6@telsasoft.com
Whole thread Raw
In response to Re: Add LZ4 compression in pg_dump  (Tomas Vondra <tomas.vondra@enterprisedb.com>)
Responses Re: Add LZ4 compression in pg_dump  (Tomas Vondra <tomas.vondra@enterprisedb.com>)
List pgsql-hackers
On Thu, Mar 16, 2023 at 11:30:50PM +0100, Tomas Vondra wrote:
> On 3/16/23 01:20, Justin Pryzby wrote:
> > But try reading the diff while looking for the cause of a bug.  It's the
> > difference between reading 50, two-line changes, and reading a hunk that
> > replaces 100 lines with a different 100 lines, with empty/unrelated
> > lines randomly thrown in as context.
>
> I don't know, maybe I'm doing something wrong or maybe I just am bad at
> looking at diffs, but if I apply the patch you submitted on 8/3 and do
> the git-diff above (output attached), it seems pretty incomprehensible
> to me :-( I don't see 50 two-line changes (I certainly wouldn't be able
> to identify the root cause of the bug based on that).

It's true that most of the diff is still incomprehensible...

But look at the part relevant to the "empty-data" bug:

[... incomprehensible changes elided ...]
>  static void
> -InitCompressorZlib(CompressorState *cs, int level)
> +DeflateCompressorInit(CompressorState *cs)
>  {
> +    GzipCompressorState *gzipcs;
>      z_streamp    zp;
>  
> -    zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
> +    gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
> +    zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
>      zp->zalloc = Z_NULL;
>      zp->zfree = Z_NULL;
>      zp->opaque = Z_NULL;
>  
>      /*
> -     * zlibOutSize is the buffer size we tell zlib it can output to.  We
> -     * actually allocate one extra byte because some routines want to append a
> -     * trailing zero byte to the zlib output.
> +     * outsize is the buffer size we tell zlib it can output to.  We actually
> +     * allocate one extra byte because some routines want to append a trailing
> +     * zero byte to the zlib output.
>       */
> -    cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
> -    cs->zlibOutSize = ZLIB_OUT_SIZE;
> +    gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1);
> +    gzipcs->outsize = ZLIB_OUT_SIZE;
>  
> -    if (deflateInit(zp, level) != Z_OK)
> -        pg_fatal("could not initialize compression library: %s",
> -                 zp->msg);
> +    /* -Z 0 uses the "None" compressor -- not zlib with no compression */
> +    Assert(cs->compression_spec.level != 0);
> +
> +    if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
> +        pg_fatal("could not initialize compression library: %s", zp->msg);
>  
>      /* Just be paranoid - maybe End is called after Start, with no Write */
> -    zp->next_out = (void *) cs->zlibOut;
> -    zp->avail_out = cs->zlibOutSize;
> +    zp->next_out = gzipcs->outbuf;
> +    zp->avail_out = gzipcs->outsize;
> +
> +    /* Keep track of gzipcs */
> +    cs->private_data = gzipcs;
>  }
>  
>  static void
> -EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
> +DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs)
>  {
> -    z_streamp    zp = cs->zp;
> +    GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> +    z_streamp    zp;
>  
> +    zp = gzipcs->zp;
>      zp->next_in = NULL;
>      zp->avail_in = 0;
>  
>      /* Flush any remaining data from zlib buffer */
> -    DeflateCompressorZlib(AH, cs, true);
> +    DeflateCompressorCommon(AH, cs, true);
>  
>      if (deflateEnd(zp) != Z_OK)
>          pg_fatal("could not close compression stream: %s", zp->msg);
>  
> -    free(cs->zlibOut);
> -    free(cs->zp);
> +    pg_free(gzipcs->outbuf);
> +    pg_free(gzipcs->zp);
> +    pg_free(gzipcs);
> +    cs->private_data = NULL;
>  }
>  
>  static void
> -DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
> +DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
>  {
> -    z_streamp    zp = cs->zp;
> -    char       *out = cs->zlibOut;
> +    GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> +    z_streamp    zp = gzipcs->zp;
> +    void       *out = gzipcs->outbuf;
>      int            res = Z_OK;
>  
> -    while (cs->zp->avail_in != 0 || flush)
> +    while (gzipcs->zp->avail_in != 0 || flush)
>      {
>          res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
>          if (res == Z_STREAM_ERROR)
>              pg_fatal("could not compress data: %s", zp->msg);
> -        if ((flush && (zp->avail_out < cs->zlibOutSize))
> +        if ((flush && (zp->avail_out < gzipcs->outsize))
>              || (zp->avail_out == 0)
>              || (zp->avail_in != 0)
>              )
> @@ -289,18 +122,18 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
>               * chunk is the EOF marker in the custom format. This should never
>               * happen but ...
>               */
> -            if (zp->avail_out < cs->zlibOutSize)
> +            if (zp->avail_out < gzipcs->outsize)
>              {
>                  /*
>                   * Any write function should do its own error checking but to
>                   * make sure we do a check here as well ...
>                   */
> -                size_t        len = cs->zlibOutSize - zp->avail_out;
> +                size_t        len = gzipcs->outsize - zp->avail_out;
>  
> -                cs->writeF(AH, out, len);
> +                cs->writeF(AH, (char *) out, len);
>              }
> -            zp->next_out = (void *) out;
> -            zp->avail_out = cs->zlibOutSize;
> +            zp->next_out = out;
> +            zp->avail_out = gzipcs->outsize;
>          }
>  
>          if (res == Z_STREAM_END)
> @@ -309,16 +142,26 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
>  }
>  
>  static void
> -WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
> -                       const char *data, size_t dLen)
> +EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
>  {
> -    cs->zp->next_in = (void *) unconstify(char *, data);
> -    cs->zp->avail_in = dLen;
> -    DeflateCompressorZlib(AH, cs, false);
> +    /* If deflation was initialized, finalize it */
> +    if (cs->private_data)
> +        DeflateCompressorEnd(AH, cs);
>  }
>  
>  static void
> -ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
> +WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
> +                       const void *data, size_t dLen)
> +{
> +    GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> +
> +    gzipcs->zp->next_in = (void *) unconstify(void *, data);
> +    gzipcs->zp->avail_in = dLen;
> +    DeflateCompressorCommon(AH, cs, false);
> +}
> +
> +static void
> +ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
>  {
>      z_streamp    zp;
>      char       *out;
> @@ -342,7 +185,7 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
>                   zp->msg);
>  
>      /* no minimal chunk size for zlib */
> -    while ((cnt = readF(AH, &buf, &buflen)))
> +    while ((cnt = cs->readF(AH, &buf, &buflen)))
>      {
>          zp->next_in = (void *) buf;
>          zp->avail_in = cnt;
> @@ -382,389 +225,196 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
>      free(out);
>      free(zp);
>  }
[... more incomprehensible changes elided ...]



pgsql-hackers by date:

Previous
From: Tomas Vondra
Date:
Subject: Re: Add LZ4 compression in pg_dump
Next
From: Melanie Plageman
Date:
Subject: Re: Refactor calculations to use instr_time