Re: Reference Leak with type - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Reference Leak with type
Date
Msg-id 2512554.1618091863@sss.pgh.pa.us
Whole thread Raw
In response to Re: Reference Leak with type  (Zhihong Yu <zyu@yugabyte.com>)
List pgsql-hackers
Here's a proposed patch for this problem.

The core problem in this test case is that the refcount is logged in the
Portal resowner, which is a child of the initial transaction's resowner,
so it goes away in the COMMIT (after warning of a resource leak); but
the expression tree is still there and still thinks it has a refcount.
By chance a new ResourceOwner is created in the same place where the old
one was, so that when the expression tree is finally destroyed at the
end of the DO block, we see an error about "this refcount isn't logged
here" rather than a crash.  Unrelated-looking code changes could turn
that into a real crash, of course.

I spent quite a bit of time fruitlessly trying to fix it by manipulating
which resowner the tupledesc refcount is logged in, specifically by
running plpgsql "simple expressions" with the simple_eval_resowner as
CurrentResourceOwner.  But this just causes other problems to appear,
because then that resowner becomes responsible for more stuff than
just the plancache refcounts that plpgsql is expecting it to hold.
Some of that stuff needs to be released at subtransaction abort,
which is problematic because most of what plpgsql wants it to deal
in needs to survive until end of main transaction --- in particular,
the plancache refcounts need to live that long, and so do the tupdesc
refcounts we're concerned with here, because those are associated with
"simple expression" trees that are supposed to have that lifespan.
It's possible that we could make this approach work, but at minimum
it'd require creating and destroying an additional resowner per
subtransaction; and maybe we'd have to give up on sharing "simple
expression" trees across subtransactions.  So the potential performance
hit is pretty bad, and I'm not even 100% sure it'd work at all.

So the alternative proposed in the attached is to give up on associating
a long-lived tupdesc refcount with these expression nodes at all.
Intead, we can use a method that plpgsql has been using for a few
years, which is to rely on the fact that typcache entries never go
away once made, and just save a pointer into the typcache.  We can
detect possible changes in the cache entry by watching for changes
in its tupDesc_identifier counter.

This infrastructure exists as far back as v11, so using it doesn't
present any problems for back-patchability.  It is slightly
nervous-making that we have to change some fields in struct ExprEvalStep
--- but the overall struct size isn't changing, and I can't really
see a reason why extensions would be interested in the contents of
these particular subfield types.

            regards, tom lane

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 74fcfbea56..77c9d785d9 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -1375,7 +1375,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
                 scratch.opcode = EEOP_FIELDSELECT;
                 scratch.d.fieldselect.fieldnum = fselect->fieldnum;
                 scratch.d.fieldselect.resulttype = fselect->resulttype;
-                scratch.d.fieldselect.argdesc = NULL;
+                scratch.d.fieldselect.rowcache.cacheptr = NULL;

                 ExprEvalPushStep(state, &scratch);
                 break;
@@ -1385,7 +1385,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
             {
                 FieldStore *fstore = (FieldStore *) node;
                 TupleDesc    tupDesc;
-                TupleDesc  *descp;
+                ExprEvalRowtypeCache *rowcachep;
                 Datum       *values;
                 bool       *nulls;
                 int            ncolumns;
@@ -1401,9 +1401,9 @@ ExecInitExprRec(Expr *node, ExprState *state,
                 values = (Datum *) palloc(sizeof(Datum) * ncolumns);
                 nulls = (bool *) palloc(sizeof(bool) * ncolumns);

-                /* create workspace for runtime tupdesc cache */
-                descp = (TupleDesc *) palloc(sizeof(TupleDesc));
-                *descp = NULL;
+                /* create shared composite-type-lookup cache struct */
+                rowcachep = palloc(sizeof(ExprEvalRowtypeCache));
+                rowcachep->cacheptr = NULL;

                 /* emit code to evaluate the composite input value */
                 ExecInitExprRec(fstore->arg, state, resv, resnull);
@@ -1411,7 +1411,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
                 /* next, deform the input tuple into our workspace */
                 scratch.opcode = EEOP_FIELDSTORE_DEFORM;
                 scratch.d.fieldstore.fstore = fstore;
-                scratch.d.fieldstore.argdesc = descp;
+                scratch.d.fieldstore.rowcache = rowcachep;
                 scratch.d.fieldstore.values = values;
                 scratch.d.fieldstore.nulls = nulls;
                 scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1469,7 +1469,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
                 /* finally, form result tuple */
                 scratch.opcode = EEOP_FIELDSTORE_FORM;
                 scratch.d.fieldstore.fstore = fstore;
-                scratch.d.fieldstore.argdesc = descp;
+                scratch.d.fieldstore.rowcache = rowcachep;
                 scratch.d.fieldstore.values = values;
                 scratch.d.fieldstore.nulls = nulls;
                 scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1615,17 +1615,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
         case T_ConvertRowtypeExpr:
             {
                 ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node;
+                ExprEvalRowtypeCache *rowcachep;
+
+                /* cache structs must be out-of-line for space reasons */
+                rowcachep = palloc(2 * sizeof(ExprEvalRowtypeCache));
+                rowcachep[0].cacheptr = NULL;
+                rowcachep[1].cacheptr = NULL;

                 /* evaluate argument into step's result area */
                 ExecInitExprRec(convert->arg, state, resv, resnull);

                 /* and push conversion step */
                 scratch.opcode = EEOP_CONVERT_ROWTYPE;
-                scratch.d.convert_rowtype.convert = convert;
-                scratch.d.convert_rowtype.indesc = NULL;
-                scratch.d.convert_rowtype.outdesc = NULL;
+                scratch.d.convert_rowtype.inputtype =
+                    exprType((Node *) convert->arg);
+                scratch.d.convert_rowtype.outputtype = convert->resulttype;
+                scratch.d.convert_rowtype.incache = &rowcachep[0];
+                scratch.d.convert_rowtype.outcache = &rowcachep[1];
                 scratch.d.convert_rowtype.map = NULL;
-                scratch.d.convert_rowtype.initialized = false;

                 ExprEvalPushStep(state, &scratch);
                 break;
@@ -2250,7 +2257,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
                          (int) ntest->nulltesttype);
                 }
                 /* initialize cache in case it's a row test */
-                scratch.d.nulltest_row.argdesc = NULL;
+                scratch.d.nulltest_row.rowcache.cacheptr = NULL;

                 /* first evaluate argument into result variable */
                 ExecInitExprRec(ntest->arg, state,
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 07948c1b13..094e22d392 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -145,8 +145,8 @@ static void ExecInitInterpreter(void);
 static void CheckVarSlotCompatibility(TupleTableSlot *slot, int attnum, Oid vartype);
 static void CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot);
 static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
-                                    TupleDesc *cache_field, ExprContext *econtext);
-static void ShutdownTupleDescRef(Datum arg);
+                                    ExprEvalRowtypeCache *rowcache,
+                                    bool *changed);
 static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
                                ExprContext *econtext, bool checkisnull);

@@ -1959,56 +1959,78 @@ CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot)
  * get_cached_rowtype: utility function to lookup a rowtype tupdesc
  *
  * type_id, typmod: identity of the rowtype
- * cache_field: where to cache the TupleDesc pointer in expression state node
- *        (field must be initialized to NULL)
- * econtext: expression context we are executing in
+ * rowcache: space for caching identity info
+ *        (rowcache->cacheptr must be initialized to NULL)
+ * changed: if not NULL, *changed is set to true on any update
  *
- * NOTE: because the shutdown callback will be called during plan rescan,
- * must be prepared to re-do this during any node execution; cannot call
- * just once during expression initialization.
+ * The returned TupleDesc is not guaranteed pinned; caller must pin it
+ * to use it across any operation that might incur cache invalidation.
+ * (The TupleDesc is always refcounted, so just use IncrTupleDescRefCount.)
+ *
+ * NOTE: because composite types can change contents, we must be prepared
+ * to re-do this during any node execution; cannot call just once during
+ * expression initialization.
  */
 static TupleDesc
 get_cached_rowtype(Oid type_id, int32 typmod,
-                   TupleDesc *cache_field, ExprContext *econtext)
+                   ExprEvalRowtypeCache *rowcache,
+                   bool *changed)
 {
-    TupleDesc    tupDesc = *cache_field;
-
-    /* Do lookup if no cached value or if requested type changed */
-    if (tupDesc == NULL ||
-        type_id != tupDesc->tdtypeid ||
-        typmod != tupDesc->tdtypmod)
+    if (type_id != RECORDOID)
     {
-        tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
+        /*
+         * It's a named composite type, so use the regular typcache.  Do a
+         * lookup first time through, or if the composite type changed.  Note:
+         * "tupdesc_id == 0" may look redundant, but it protects against the
+         * admittedly-theoretical possibility that type_id was RECORDOID the
+         * last time through, so that the cacheptr isn't TypeCacheEntry *.
+         */
+        TypeCacheEntry *typentry = (TypeCacheEntry *) rowcache->cacheptr;

-        if (*cache_field)
+        if (unlikely(typentry == NULL ||
+                     rowcache->tupdesc_id == 0 ||
+                     typentry->tupDesc_identifier != rowcache->tupdesc_id))
         {
-            /* Release old tupdesc; but callback is already registered */
-            ReleaseTupleDesc(*cache_field);
-        }
-        else
-        {
-            /* Need to register shutdown callback to release tupdesc */
-            RegisterExprContextCallback(econtext,
-                                        ShutdownTupleDescRef,
-                                        PointerGetDatum(cache_field));
-        }
-        *cache_field = tupDesc;
+            typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC);
+            if (typentry->tupDesc == NULL)
+                ereport(ERROR,
+                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                         errmsg("type %s is not composite",
+                                format_type_be(type_id))));
+            rowcache->cacheptr = (void *) typentry;
+            rowcache->tupdesc_id = typentry->tupDesc_identifier;
+            if (changed)
+                *changed = true;
+        }
+        return typentry->tupDesc;
+    }
+    else
+    {
+        /*
+         * A RECORD type, once registered, doesn't change for the life of the
+         * backend.  So we don't need a typcache entry as such, which is good
+         * because there isn't one.  It's possible that the caller is asking
+         * about a different type than before, though.
+         */
+        TupleDesc    tupDesc = (TupleDesc) rowcache->cacheptr;
+
+        if (unlikely(tupDesc == NULL ||
+                     rowcache->tupdesc_id != 0 ||
+                     type_id != tupDesc->tdtypeid ||
+                     typmod != tupDesc->tdtypmod))
+        {
+            tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
+            /* Drop pin acquired by lookup_rowtype_tupdesc */
+            ReleaseTupleDesc(tupDesc);
+            rowcache->cacheptr = (void *) tupDesc;
+            rowcache->tupdesc_id = 0;    /* not a valid value for non-RECORD */
+            if (changed)
+                *changed = true;
+        }
+        return tupDesc;
     }
-    return tupDesc;
 }

-/*
- * Callback function to release a tupdesc refcount at econtext shutdown
- */
-static void
-ShutdownTupleDescRef(Datum arg)
-{
-    TupleDesc  *cache_field = (TupleDesc *) DatumGetPointer(arg);
-
-    if (*cache_field)
-        ReleaseTupleDesc(*cache_field);
-    *cache_field = NULL;
-}

 /*
  * Fast-path functions, for very simple expressions
@@ -2600,8 +2622,7 @@ ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,

     /* Lookup tupdesc if first time through or if type changes */
     tupDesc = get_cached_rowtype(tupType, tupTypmod,
-                                 &op->d.nulltest_row.argdesc,
-                                 econtext);
+                                 &op->d.nulltest_row.rowcache, NULL);

     /*
      * heap_attisnull needs a HeapTuple not a bare HeapTupleHeader.
@@ -3034,8 +3055,7 @@ ExecEvalFieldSelect(ExprState *state, ExprEvalStep *op, ExprContext *econtext)

         /* Lookup tupdesc if first time through or if type changes */
         tupDesc = get_cached_rowtype(tupType, tupTypmod,
-                                     &op->d.fieldselect.argdesc,
-                                     econtext);
+                                     &op->d.fieldselect.rowcache, NULL);

         /*
          * Find field's attr record.  Note we don't support system columns
@@ -3093,9 +3113,9 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
 {
     TupleDesc    tupDesc;

-    /* Lookup tupdesc if first time through or after rescan */
+    /* Lookup tupdesc if first time through or if type changes */
     tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
-                                 op->d.fieldstore.argdesc, econtext);
+                                 op->d.fieldstore.rowcache, NULL);

     /* Check that current tupdesc doesn't have more fields than we allocated */
     if (unlikely(tupDesc->natts > op->d.fieldstore.ncolumns))
@@ -3137,10 +3157,14 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
 void
 ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
+    TupleDesc    tupDesc;
     HeapTuple    tuple;

-    /* argdesc should already be valid from the DeForm step */
-    tuple = heap_form_tuple(*op->d.fieldstore.argdesc,
+    /* Lookup tupdesc (should be valid already) */
+    tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
+                                 op->d.fieldstore.rowcache, NULL);
+
+    tuple = heap_form_tuple(tupDesc,
                             op->d.fieldstore.values,
                             op->d.fieldstore.nulls);

@@ -3157,13 +3181,13 @@ ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext
 void
 ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
-    ConvertRowtypeExpr *convert = op->d.convert_rowtype.convert;
     HeapTuple    result;
     Datum        tupDatum;
     HeapTupleHeader tuple;
     HeapTupleData tmptup;
     TupleDesc    indesc,
                 outdesc;
+    bool        changed = false;

     /* NULL in -> NULL out */
     if (*op->resnull)
@@ -3172,24 +3196,19 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
     tupDatum = *op->resvalue;
     tuple = DatumGetHeapTupleHeader(tupDatum);

-    /* Lookup tupdescs if first time through or after rescan */
-    if (op->d.convert_rowtype.indesc == NULL)
-    {
-        get_cached_rowtype(exprType((Node *) convert->arg), -1,
-                           &op->d.convert_rowtype.indesc,
-                           econtext);
-        op->d.convert_rowtype.initialized = false;
-    }
-    if (op->d.convert_rowtype.outdesc == NULL)
-    {
-        get_cached_rowtype(convert->resulttype, -1,
-                           &op->d.convert_rowtype.outdesc,
-                           econtext);
-        op->d.convert_rowtype.initialized = false;
-    }
-
-    indesc = op->d.convert_rowtype.indesc;
-    outdesc = op->d.convert_rowtype.outdesc;
+    /*
+     * Lookup tupdescs if first time through or if type changes.  We'd better
+     * pin them since type conversion functions could do catalog lookups and
+     * hence cause cache invalidation.
+     */
+    indesc = get_cached_rowtype(op->d.convert_rowtype.inputtype, -1,
+                                op->d.convert_rowtype.incache,
+                                &changed);
+    IncrTupleDescRefCount(indesc);
+    outdesc = get_cached_rowtype(op->d.convert_rowtype.outputtype, -1,
+                                 op->d.convert_rowtype.outcache,
+                                 &changed);
+    IncrTupleDescRefCount(outdesc);

     /*
      * We used to be able to assert that incoming tuples are marked with
@@ -3200,8 +3219,8 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
     Assert(HeapTupleHeaderGetTypeId(tuple) == indesc->tdtypeid ||
            HeapTupleHeaderGetTypeId(tuple) == RECORDOID);

-    /* if first time through, initialize conversion map */
-    if (!op->d.convert_rowtype.initialized)
+    /* if first time through, or after change, initialize conversion map */
+    if (changed)
     {
         MemoryContext old_cxt;

@@ -3210,7 +3229,6 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext

         /* prepare map from old to new attribute numbers */
         op->d.convert_rowtype.map = convert_tuples_by_name(indesc, outdesc);
-        op->d.convert_rowtype.initialized = true;

         MemoryContextSwitchTo(old_cxt);
     }
@@ -3240,6 +3258,9 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
          */
         *op->resvalue = heap_copy_tuple_as_datum(&tmptup, outdesc);
     }
+
+    DecrTupleDescRefCount(indesc);
+    DecrTupleDescRefCount(outdesc);
 }

 /*
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 2449cde7ad..9697f98d92 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -38,6 +38,20 @@ typedef bool (*ExecEvalBoolSubroutine) (ExprState *state,
                                         struct ExprEvalStep *op,
                                         ExprContext *econtext);

+/* ExprEvalSteps that cache a composite type's tupdesc need one of these */
+/* (it fits in-line in some step types, otherwise allocate out-of-line) */
+typedef struct ExprEvalRowtypeCache
+{
+    /*
+     * cacheptr points to composite type's TypeCacheEntry if tupdesc_id is not
+     * 0, or for an anonymous RECORD type, it points directly at the cached
+     * tupdesc for the type, and tupdesc_id is 0.  (We'd use separate fields
+     * if space were not at a premium.)  Initial state is cacheptr == NULL.
+     */
+    void       *cacheptr;
+    uint64        tupdesc_id;        /* last-seen tupdesc identifier, or 0 */
+} ExprEvalRowtypeCache;
+
 /*
  * Discriminator for ExprEvalSteps.
  *
@@ -355,8 +369,8 @@ typedef struct ExprEvalStep
         /* for EEOP_NULLTEST_ROWIS[NOT]NULL */
         struct
         {
-            /* cached tupdesc pointer - filled at runtime */
-            TupleDesc    argdesc;
+            /* cached descriptor for composite type - filled at runtime */
+            ExprEvalRowtypeCache rowcache;
         }            nulltest_row;

         /* for EEOP_PARAM_EXEC/EXTERN */
@@ -481,8 +495,8 @@ typedef struct ExprEvalStep
         {
             AttrNumber    fieldnum;    /* field number to extract */
             Oid            resulttype; /* field's type */
-            /* cached tupdesc pointer - filled at runtime */
-            TupleDesc    argdesc;
+            /* cached descriptor for composite type - filled at runtime */
+            ExprEvalRowtypeCache rowcache;
         }            fieldselect;

         /* for EEOP_FIELDSTORE_DEFORM / FIELDSTORE_FORM */
@@ -491,9 +505,9 @@ typedef struct ExprEvalStep
             /* original expression node */
             FieldStore *fstore;

-            /* cached tupdesc pointer - filled at runtime */
-            /* note that a DEFORM and FORM pair share the same tupdesc */
-            TupleDesc  *argdesc;
+            /* cached descriptor for composite type - filled at runtime */
+            /* note that a DEFORM and FORM pair share the same cache */
+            ExprEvalRowtypeCache *rowcache;

             /* workspace for column values */
             Datum       *values;
@@ -533,12 +547,12 @@ typedef struct ExprEvalStep
         /* for EEOP_CONVERT_ROWTYPE */
         struct
         {
-            ConvertRowtypeExpr *convert;    /* original expression */
+            Oid            inputtype;    /* input composite type */
+            Oid            outputtype; /* output composite type */
             /* these three fields are filled at runtime: */
-            TupleDesc    indesc; /* tupdesc for input type */
-            TupleDesc    outdesc;    /* tupdesc for output type */
+            ExprEvalRowtypeCache *incache;    /* cache for input type */
+            ExprEvalRowtypeCache *outcache; /* cache for output type */
             TupleConversionMap *map;    /* column mapping */
-            bool        initialized;    /* initialized for current types? */
         }            convert_rowtype;

         /* for EEOP_SCALARARRAYOP */
diff --git a/src/pl/plpgsql/src/expected/plpgsql_transaction.out b/src/pl/plpgsql/src/expected/plpgsql_transaction.out
index 5f576231c3..e205a1e002 100644
--- a/src/pl/plpgsql/src/expected/plpgsql_transaction.out
+++ b/src/pl/plpgsql/src/expected/plpgsql_transaction.out
@@ -379,6 +379,30 @@ SELECT * FROM test1;
 ---+---
 (0 rows)

+-- operations on composite types vs. internal transactions
+DO LANGUAGE plpgsql $$
+declare
+  c test1 := row(42, 'hello');
+  r bool;
+begin
+  for i in 1..3 loop
+    r := c is not null;
+    raise notice 'r = %', r;
+    commit;
+  end loop;
+  for i in 1..3 loop
+    r := c is null;
+    raise notice 'r = %', r;
+    rollback;
+  end loop;
+end
+$$;
+NOTICE:  r = t
+NOTICE:  r = t
+NOTICE:  r = t
+NOTICE:  r = f
+NOTICE:  r = f
+NOTICE:  r = f
 -- COMMIT failures
 DO LANGUAGE plpgsql $$
 BEGIN
diff --git a/src/pl/plpgsql/src/sql/plpgsql_transaction.sql b/src/pl/plpgsql/src/sql/plpgsql_transaction.sql
index 7575655c1a..94fd406b7a 100644
--- a/src/pl/plpgsql/src/sql/plpgsql_transaction.sql
+++ b/src/pl/plpgsql/src/sql/plpgsql_transaction.sql
@@ -313,6 +313,26 @@ $$;
 SELECT * FROM test1;


+-- operations on composite types vs. internal transactions
+DO LANGUAGE plpgsql $$
+declare
+  c test1 := row(42, 'hello');
+  r bool;
+begin
+  for i in 1..3 loop
+    r := c is not null;
+    raise notice 'r = %', r;
+    commit;
+  end loop;
+  for i in 1..3 loop
+    r := c is null;
+    raise notice 'r = %', r;
+    rollback;
+  end loop;
+end
+$$;
+
+
 -- COMMIT failures
 DO LANGUAGE plpgsql $$
 BEGIN

pgsql-hackers by date:

Previous
From: Tomas Vondra
Date:
Subject: Re: pgsql: autovacuum: handle analyze for partitioned tables
Next
From: David Rowley
Date:
Subject: Re: Binary search in ScalarArrayOpExpr for OR'd constant arrays