Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker - Mailing list pgsql-bugs

From Tom Lane
Subject Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker
Date
Msg-id 13241.1574379258@sss.pgh.pa.us
Whole thread Raw
In response to Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-bugs
I wrote:
> Tomas Vondra <tomas.vondra@2ndquadrant.com> writes:
>> FWIW my hunch is the bug is somewhere in this chunk of code from
>> apply_heap_update:
>> oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
>> ExecCopySlot(remoteslot, localslot);
>> slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
>> MemoryContextSwitchTo(oldctx);

> I imagine the only reason this code has gotten past the valgrind
> animals is that we're not testing cases where non-replaced columns
> in the subscriber table are of pass-by-ref types.

Actually, it doesn't appear to me that we're testing this with
any non-replaced columns at all.  The test modifications in the
attached proposed patch add that.  For me, the unpatched code
doesn't crash with this test, but the non-replaced column reads
back as empty which is certainly wrong.  Valgrind would likely
complain too, but I didn't try it.

            regards, tom lane

diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index ff62303..9c06b67 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -363,13 +363,19 @@ slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
 }

 /*
- * Modify slot with user data provided as C strings.
+ * Replace selected columns with user data provided as C strings.
  * This is somewhat similar to heap_modify_tuple but also calls the type
- * input function on the user data as the input is the text representation
- * of the types.
+ * input functions on the user data.
+ * "slot" is filled with a copy of the tuple in "srcslot", with
+ * columns selected by the "replaces" array replaced with data values
+ * from "values".
+ * Caution: unreplaced pass-by-ref columns in "slot" will point into the
+ * storage for "srcslot".  This is OK for current usage, but someday we may
+ * need to materialize "slot" at the end to make it independent of "srcslot".
  */
 static void
-slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+slot_modify_cstrings(TupleTableSlot *slot, TupleTableSlot *srcslot,
+                     LogicalRepRelMapEntry *rel,
                      char **values, bool *replaces)
 {
     int            natts = slot->tts_tupleDescriptor->natts;
@@ -377,9 +383,18 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
     SlotErrCallbackArg errarg;
     ErrorContextCallback errcallback;

-    slot_getallattrs(slot);
+    /* We'll fill "slot" with a virtual tuple, so we must start with ... */
     ExecClearTuple(slot);

+    /*
+     * Transfer all the column data from srcslot, so that we have valid values
+     * for unreplaced columns.
+     */
+    Assert(natts == srcslot->tts_tupleDescriptor->natts);
+    slot_getallattrs(srcslot);
+    memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
+    memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
+
     /* Push callback + info on the error context stack */
     errarg.rel = rel;
     errarg.local_attnum = -1;
@@ -428,6 +443,7 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
     /* Pop the error context stack */
     error_context_stack = errcallback.previous;

+    /* And finally, declare that "slot" contains a valid virtual tuple */
     ExecStoreVirtualTuple(slot);
 }

@@ -740,8 +756,8 @@ apply_handle_update(StringInfo s)
     {
         /* Process and store remote tuple in the slot */
         oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-        ExecCopySlot(remoteslot, localslot);
-        slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
+        slot_modify_cstrings(remoteslot, localslot, rel,
+                             newtup.values, newtup.changed);
         MemoryContextSwitchTo(oldctx);

         EvalPlanQualSetSlot(&epqstate, remoteslot);
diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
index 40e306a..116e487 100644
--- a/src/test/subscription/t/001_rep_changes.pl
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 17;
+use Test::More tests => 18;

 # Initialize publisher node
 my $node_publisher = get_new_node('publisher');
@@ -45,7 +45,7 @@ $node_subscriber->safe_psql('postgres',

 # different column count and order than on publisher
 $node_subscriber->safe_psql('postgres',
-    "CREATE TABLE tab_mixed (c text, b text, a int primary key)");
+    "CREATE TABLE tab_mixed (c text default 'local', b text, a int primary key)");

 # replication of the table with included index
 $node_subscriber->safe_psql('postgres',
@@ -114,8 +114,8 @@ is($result, qq(20|-20|-1), 'check replicated changes on subscriber');

 $result =
   $node_subscriber->safe_psql('postgres', "SELECT c, b, a FROM tab_mixed");
-is( $result, qq(|foo|1
-|bar|2), 'check replicated changes with different column order');
+is( $result, qq(local|foo|1
+local|bar|2), 'check replicated changes with different column order');

 $result = $node_subscriber->safe_psql('postgres',
     "SELECT count(*), min(a), max(a) FROM tab_include");
@@ -139,11 +139,14 @@ $node_publisher->safe_psql('postgres',
     "ALTER TABLE tab_ins REPLICA IDENTITY FULL");
 $node_subscriber->safe_psql('postgres',
     "ALTER TABLE tab_ins REPLICA IDENTITY FULL");
+# tab_mixed can use DEFAULT, since it has a primary key

 # and do the updates
 $node_publisher->safe_psql('postgres', "UPDATE tab_full SET a = a * a");
 $node_publisher->safe_psql('postgres',
     "UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'");
+$node_publisher->safe_psql('postgres',
+    "UPDATE tab_mixed SET b = 'baz' WHERE a = 1");

 $node_publisher->wait_for_catchup('tap_sub');

@@ -159,6 +162,12 @@ bb
 bb),
     'update works with REPLICA IDENTITY FULL and text datums');

+$result = $node_subscriber->safe_psql('postgres',
+    "SELECT * FROM tab_mixed ORDER BY a");
+is( $result, qq(local|baz|1
+local|bar|2),
+    'update works with different column order and subscriber local values');
+
 # check that change of connection string and/or publication list causes
 # restart of subscription workers. Not all of these are registered as tests
 # as we need to poll for a change but the test suite will fail none the less

pgsql-bugs by date:

Previous
From: Tom Lane
Date:
Subject: Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker
Next
From: mayur
Date:
Subject: Re: BUG #16130: planner does not pick unique btree index and goes for seq scan but unsafe hash index works.