Re: ALTER TABLE ADD COLUMN fast default - Mailing list pgsql-hackers

From Andres Freund
Subject Re: ALTER TABLE ADD COLUMN fast default
Date
Msg-id 20180328070041.ol7u4qpawy267mrg@alap3.anarazel.de
Whole thread Raw
In response to Re: ALTER TABLE ADD COLUMN fast default  (Andrew Dunstan <andrew.dunstan@2ndquadrant.com>)
Responses Re: ALTER TABLE ADD COLUMN fast default  (Andrew Dunstan <andrew.dunstan@2ndquadrant.com>)
List pgsql-hackers
Hi,

On 2018-03-26 09:02:10 +1030, Andrew Dunstan wrote:
> Thanks for this, all looks good. Here is the consolidate patch
> rebased. If there are no further comments I propose to commit this in
> a few days time.

Here's a bit of post commit review:

@@ -1310,13 +1679,11 @@ slot_getsomeattrs(TupleTableSlot *slot, int attnum)
 
     /*
      * If tuple doesn't have all the atts indicated by tupleDesc, read the
-     * rest as null
+     * rest as NULLs or missing values
      */
-    for (; attno < attnum; attno++)
-    {
-        slot->tts_values[attno] = (Datum) 0;
-        slot->tts_isnull[attno] = true;
-    }
+    if (attno < attnum)
+        slot_getmissingattrs(slot, attno, attnum);
+
     slot->tts_nvalid = attnum;
 }

It's worthwhile to note that this'll re-process *all* missing values,
even if they've already been deformed. I.e. if
slot_getmissingattrs(.., first-missing)
slot_getmissingattrs(.., first-missing + 1)
slot_getmissingattrs(.., first-missing + 2)
is called, all three missing values will be copied every time. That's
because tts_nvalid isn't taken into account.  I wonder if slot_getmissingattrs
could take tts_nvalid into account?

I also wonder if this doesn't deserve an unlikely(), to avoid the cost
of spilling registers in the hot branch of not missing any values.


+    else
+    {
+        /* if there is a missing values array we must process them one by one */
+        for (missattnum = lastAttNum - 1;
+             missattnum >= startAttNum;
+             missattnum--)
+        {
+            slot->tts_values[missattnum] = attrmiss[missattnum].ammissing;
+            slot->tts_isnull[missattnum] =
+                !attrmiss[missattnum].ammissingPresent;
+        }
+    }
+}

Why is this done backwards? It's noticeably slower to walk arrays
backwards on some CPU microarchitectures.



@@ -176,6 +179,23 @@ CreateTupleDescCopyConstr(TupleDesc tupdesc)
             }
         }
 


@@ -469,6 +503,29 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
             if (strcmp(defval1->adbin, defval2->adbin) != 0)
                 return false;
         }
+        if (constr1->missing)
+        {
+            if (!constr2->missing)
+                return false;
+            for (i = 0; i < tupdesc1->natts; i++)
+            {
+                AttrMissing *missval1 = constr1->missing + i;
+                AttrMissing *missval2 = constr2->missing + i;

It's a bit odd to not use array indexing here?


@@ -625,7 +625,15 @@ ExecCopySlotMinimalTuple(TupleTableSlot *slot)
     if (slot->tts_mintuple)
         return heap_copy_minimal_tuple(slot->tts_mintuple);
     if (slot->tts_tuple)
-        return minimal_tuple_from_heap_tuple(slot->tts_tuple);
+    {
+        if (TTS_HAS_PHYSICAL_TUPLE(slot) &&
+            HeapTupleHeaderGetNatts(slot->tts_tuple->t_data)
+            < slot->tts_tupleDescriptor->natts)
+            return minimal_expand_tuple(slot->tts_tuple,
+                                        slot->tts_tupleDescriptor);
+        else
+            return minimal_tuple_from_heap_tuple(slot->tts_tuple);
+    }
 

What's the TTS_HAS_PHYSICAL_TUPLE doing here? How can this be relevant
given the previous tts_mintuple check? Am I missing something?



@@ -563,10 +569,63 @@ RelationBuildTupleDesc(Relation relation)
                     MemoryContextAllocZero(CacheMemoryContext,
                                            relation->rd_rel->relnatts *
                                            sizeof(AttrDefault));
-            attrdef[ndef].adnum = attp->attnum;
+            attrdef[ndef].adnum = attnum;
             attrdef[ndef].adbin = NULL;
+
             ndef++;
         }
+
+        /* Likewise for a missing value */
+        if (attp->atthasmissing)
+        {
+            Datum        missingval;
+            bool        missingNull;
+
+            /* Do we have a missing value? */
+            missingval = heap_getattr(pg_attribute_tuple,
+                                      Anum_pg_attribute_attmissingval,
+                                      pg_attribute_desc->rd_att,
+                                      &missingNull);
+            if (!missingNull)
+            {
+                /* Yes, fetch from the array */
+                MemoryContext oldcxt;
+                bool        is_null;
+                int            one = 1;
+                Datum        missval;
+
+                if (attrmiss == NULL)
+                    attrmiss = (AttrMissing *)
+                        MemoryContextAllocZero(CacheMemoryContext,
+                                               relation->rd_rel->relnatts *
+                                               sizeof(AttrMissing));
+
+                missval = array_get_element(missingval,
+                                            1,
+                                            &one,
+                                            -1,
+                                            attp->attlen,
+                                            attp->attbyval,
+                                            attp->attalign,
+                                            &is_null);
+                Assert(!is_null);
+                if (attp->attbyval)
+                {
+                    /* for copy by val just copy the datum direct */
+                    attrmiss[attnum - 1].ammissing = missval;
+                }
+                else
+                {
+                    /* otherwise copy in the correct context */
+                    oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+                    attrmiss[attnum - 1].ammissing = datumCopy(missval,
+                                                               attp->attbyval,
+                                                               attp->attlen);
+                    MemoryContextSwitchTo(oldcxt);
+                }
+                attrmiss[attnum - 1].ammissingPresent = true;
+            }
+        }
         need--;
         if (need == 0)
             break;

I'm still strongly object to do doing this unconditionally for queries
that never need any of this.  We're can end up with a significant number
of large tuples in memory here, and then copy that through dozens of
tupledesc copies in queries.


@@ -167,6 +170,12 @@ CATALOG(pg_attribute,1249) BKI_BOOTSTRAP BKI_WITHOUT_OIDS BKI_ROWTYPE_OID(75) BK
 
     /* Column-level FDW options */
     text        attfdwoptions[1] BKI_DEFAULT(_null_);
+
+    /*
+     * Missing value for added columns. This is a one element array which lets
+     * us store a value of the attribute type here.
+     */
+    anyarray    attmissingval BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_attribute;
 

Still think this is a bad location, and it'll reduce cache hit ratio for
catalog lookups.

Greetings,

Andres Freund


pgsql-hackers by date:

Previous
From: Michael Paquier
Date:
Subject: Re: Problem while setting the fpw with SIGHUP
Next
From: Amit Kapila
Date:
Subject: Re: [HACKERS] why not parallel seq scan for slow functions