Memory leaks in record_out and record_send - Mailing list pgsql-hackers

From Tom Lane
Subject Memory leaks in record_out and record_send
Date
Msg-id 8810.1352783901@sss.pgh.pa.us
Whole thread Raw
Responses Re: Memory leaks in record_out and record_send  (Dimitri Fontaine <dimitri@2ndQuadrant.fr>)
Re: Memory leaks in record_out and record_send  (Robert Haas <robertmhaas@gmail.com>)
List pgsql-hackers
I looked into the problem complained of here:
http://archives.postgresql.org/pgsql-general/2012-11/msg00279.php
which turns out to have nothing to do with joins and everything to do
with the fact that record_out() leaks memory like mad.  It leaks both
the strings returned by the per-column output functions and any column
values that have to be detoasted.  You can easily reproduce this with
an example like

create table leak (f1 int, f2 text);

insert into leak select x, 'foo' from generate_series(1,1000000) x;

select leak from leak;

The attached patch against HEAD fixes this, as well as a similar
leakage in record_send().  The added code is lifted directly from
printtup() so it's not adding any new assumptions to the system.

I wonder though if we ought to think about running output functions in
a short-lived memory context instead of the executor's main context.
We've considered that before, I think, and it's always been the path
of least resistance to fix the output functions instead --- but there
will always be another leak I'm afraid.

OTOH I can't see trying to back-patch a solution like that.   If we want
to fix this in the back branches (and note the complaint linked above is
against 8.3), I think we have to do it as attached.

Thoughts?

            regards, tom lane

diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index 13e574d4e82b4412f7ab32e02398d1671d381097..d4ed7d0ca06ac1a0dee6b4963989a5b41713fb3b 100644
*** a/src/backend/utils/adt/rowtypes.c
--- b/src/backend/utils/adt/rowtypes.c
*************** typedef struct ColumnIOData
*** 32,37 ****
--- 32,38 ----
      Oid            column_type;
      Oid            typiofunc;
      Oid            typioparam;
+     bool        typisvarlena;
      FmgrInfo    proc;
  } ColumnIOData;

*************** record_out(PG_FUNCTION_ARGS)
*** 364,369 ****
--- 365,371 ----
      {
          ColumnIOData *column_info = &my_extra->columns[i];
          Oid            column_type = tupdesc->attrs[i]->atttypid;
+         Datum        attr;
          char       *value;
          char       *tmp;
          bool        nq;
*************** record_out(PG_FUNCTION_ARGS)
*** 387,403 ****
           */
          if (column_info->column_type != column_type)
          {
-             bool        typIsVarlena;
-
              getTypeOutputInfo(column_type,
                                &column_info->typiofunc,
!                               &typIsVarlena);
              fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
                            fcinfo->flinfo->fn_mcxt);
              column_info->column_type = column_type;
          }

!         value = OutputFunctionCall(&column_info->proc, values[i]);

          /* Detect whether we need double quotes for this value */
          nq = (value[0] == '\0');    /* force quotes for empty string */
--- 389,412 ----
           */
          if (column_info->column_type != column_type)
          {
              getTypeOutputInfo(column_type,
                                &column_info->typiofunc,
!                               &column_info->typisvarlena);
              fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
                            fcinfo->flinfo->fn_mcxt);
              column_info->column_type = column_type;
          }

!         /*
!          * If we have a toasted datum, forcibly detoast it here to avoid
!          * memory leakage inside the type's output routine.
!          */
!         if (column_info->typisvarlena)
!             attr = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
!         else
!             attr = values[i];
!
!         value = OutputFunctionCall(&column_info->proc, attr);

          /* Detect whether we need double quotes for this value */
          nq = (value[0] == '\0');    /* force quotes for empty string */
*************** record_out(PG_FUNCTION_ARGS)
*** 416,432 ****

          /* And emit the string */
          if (nq)
!             appendStringInfoChar(&buf, '"');
          for (tmp = value; *tmp; tmp++)
          {
              char        ch = *tmp;

              if (ch == '"' || ch == '\\')
!                 appendStringInfoChar(&buf, ch);
!             appendStringInfoChar(&buf, ch);
          }
          if (nq)
!             appendStringInfoChar(&buf, '"');
      }

      appendStringInfoChar(&buf, ')');
--- 425,447 ----

          /* And emit the string */
          if (nq)
!             appendStringInfoCharMacro(&buf, '"');
          for (tmp = value; *tmp; tmp++)
          {
              char        ch = *tmp;

              if (ch == '"' || ch == '\\')
!                 appendStringInfoCharMacro(&buf, ch);
!             appendStringInfoCharMacro(&buf, ch);
          }
          if (nq)
!             appendStringInfoCharMacro(&buf, '"');
!
!         pfree(value);
!
!         /* Clean up detoasted copy, if any */
!         if (DatumGetPointer(attr) != DatumGetPointer(values[i]))
!             pfree(DatumGetPointer(attr));
      }

      appendStringInfoChar(&buf, ')');
*************** record_send(PG_FUNCTION_ARGS)
*** 714,719 ****
--- 729,735 ----
      {
          ColumnIOData *column_info = &my_extra->columns[i];
          Oid            column_type = tupdesc->attrs[i]->atttypid;
+         Datum        attr;
          bytea       *outputbytes;

          /* Ignore dropped columns in datatype */
*************** record_send(PG_FUNCTION_ARGS)
*** 734,756 ****
           */
          if (column_info->column_type != column_type)
          {
-             bool        typIsVarlena;
-
              getTypeBinaryOutputInfo(column_type,
                                      &column_info->typiofunc,
!                                     &typIsVarlena);
              fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
                            fcinfo->flinfo->fn_mcxt);
              column_info->column_type = column_type;
          }

!         outputbytes = SendFunctionCall(&column_info->proc, values[i]);

          /* We assume the result will not have been toasted */
          pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
          pq_sendbytes(&buf, VARDATA(outputbytes),
                       VARSIZE(outputbytes) - VARHDRSZ);
          pfree(outputbytes);
      }

      pfree(values);
--- 750,784 ----
           */
          if (column_info->column_type != column_type)
          {
              getTypeBinaryOutputInfo(column_type,
                                      &column_info->typiofunc,
!                                     &column_info->typisvarlena);
              fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
                            fcinfo->flinfo->fn_mcxt);
              column_info->column_type = column_type;
          }

!         /*
!          * If we have a toasted datum, forcibly detoast it here to avoid
!          * memory leakage inside the type's output routine.
!          */
!         if (column_info->typisvarlena)
!             attr = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
!         else
!             attr = values[i];
!
!         outputbytes = SendFunctionCall(&column_info->proc, attr);

          /* We assume the result will not have been toasted */
          pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
          pq_sendbytes(&buf, VARDATA(outputbytes),
                       VARSIZE(outputbytes) - VARHDRSZ);
+
          pfree(outputbytes);
+
+         /* Clean up detoasted copy, if any */
+         if (DatumGetPointer(attr) != DatumGetPointer(values[i]))
+             pfree(DatumGetPointer(attr));
      }

      pfree(values);

pgsql-hackers by date:

Previous
From: Amit kapila
Date:
Subject: Re: [PATCH] Patch to compute Max LSN of Data Pages
Next
From: Amit kapila
Date:
Subject: Re: Proof of concept: standalone backend with full FE/BE protocol