Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker - Mailing list pgsql-bugs
| From | Tom Lane |
|---|---|
| Subject | Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker |
| Date | |
| Msg-id | 13241.1574379258@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker (Tom Lane <tgl@sss.pgh.pa.us>) |
| List | pgsql-bugs |
I wrote:
> Tomas Vondra <tomas.vondra@2ndquadrant.com> writes:
>> FWIW my hunch is the bug is somewhere in this chunk of code from
>> apply_heap_update:
>> oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
>> ExecCopySlot(remoteslot, localslot);
>> slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
>> MemoryContextSwitchTo(oldctx);
> I imagine the only reason this code has gotten past the valgrind
> animals is that we're not testing cases where non-replaced columns
> in the subscriber table are of pass-by-ref types.
Actually, it doesn't appear to me that we're testing this with
any non-replaced columns at all. The test modifications in the
attached proposed patch add that. For me, the unpatched code
doesn't crash with this test, but the non-replaced column reads
back as empty which is certainly wrong. Valgrind would likely
complain too, but I didn't try it.
regards, tom lane
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index ff62303..9c06b67 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -363,13 +363,19 @@ slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
}
/*
- * Modify slot with user data provided as C strings.
+ * Replace selected columns with user data provided as C strings.
* This is somewhat similar to heap_modify_tuple but also calls the type
- * input function on the user data as the input is the text representation
- * of the types.
+ * input functions on the user data.
+ * "slot" is filled with a copy of the tuple in "srcslot", with
+ * columns selected by the "replaces" array replaced with data values
+ * from "values".
+ * Caution: unreplaced pass-by-ref columns in "slot" will point into the
+ * storage for "srcslot". This is OK for current usage, but someday we may
+ * need to materialize "slot" at the end to make it independent of "srcslot".
*/
static void
-slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+slot_modify_cstrings(TupleTableSlot *slot, TupleTableSlot *srcslot,
+ LogicalRepRelMapEntry *rel,
char **values, bool *replaces)
{
int natts = slot->tts_tupleDescriptor->natts;
@@ -377,9 +383,18 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
SlotErrCallbackArg errarg;
ErrorContextCallback errcallback;
- slot_getallattrs(slot);
+ /* We'll fill "slot" with a virtual tuple, so we must start with ... */
ExecClearTuple(slot);
+ /*
+ * Transfer all the column data from srcslot, so that we have valid values
+ * for unreplaced columns.
+ */
+ Assert(natts == srcslot->tts_tupleDescriptor->natts);
+ slot_getallattrs(srcslot);
+ memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
+ memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
+
/* Push callback + info on the error context stack */
errarg.rel = rel;
errarg.local_attnum = -1;
@@ -428,6 +443,7 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
/* Pop the error context stack */
error_context_stack = errcallback.previous;
+ /* And finally, declare that "slot" contains a valid virtual tuple */
ExecStoreVirtualTuple(slot);
}
@@ -740,8 +756,8 @@ apply_handle_update(StringInfo s)
{
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- ExecCopySlot(remoteslot, localslot);
- slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
+ slot_modify_cstrings(remoteslot, localslot, rel,
+ newtup.values, newtup.changed);
MemoryContextSwitchTo(oldctx);
EvalPlanQualSetSlot(&epqstate, remoteslot);
diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
index 40e306a..116e487 100644
--- a/src/test/subscription/t/001_rep_changes.pl
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 17;
+use Test::More tests => 18;
# Initialize publisher node
my $node_publisher = get_new_node('publisher');
@@ -45,7 +45,7 @@ $node_subscriber->safe_psql('postgres',
# different column count and order than on publisher
$node_subscriber->safe_psql('postgres',
- "CREATE TABLE tab_mixed (c text, b text, a int primary key)");
+ "CREATE TABLE tab_mixed (c text default 'local', b text, a int primary key)");
# replication of the table with included index
$node_subscriber->safe_psql('postgres',
@@ -114,8 +114,8 @@ is($result, qq(20|-20|-1), 'check replicated changes on subscriber');
$result =
$node_subscriber->safe_psql('postgres', "SELECT c, b, a FROM tab_mixed");
-is( $result, qq(|foo|1
-|bar|2), 'check replicated changes with different column order');
+is( $result, qq(local|foo|1
+local|bar|2), 'check replicated changes with different column order');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab_include");
@@ -139,11 +139,14 @@ $node_publisher->safe_psql('postgres',
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
$node_subscriber->safe_psql('postgres',
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
+# tab_mixed can use DEFAULT, since it has a primary key
# and do the updates
$node_publisher->safe_psql('postgres', "UPDATE tab_full SET a = a * a");
$node_publisher->safe_psql('postgres',
"UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'");
+$node_publisher->safe_psql('postgres',
+ "UPDATE tab_mixed SET b = 'baz' WHERE a = 1");
$node_publisher->wait_for_catchup('tap_sub');
@@ -159,6 +162,12 @@ bb
bb),
'update works with REPLICA IDENTITY FULL and text datums');
+$result = $node_subscriber->safe_psql('postgres',
+ "SELECT * FROM tab_mixed ORDER BY a");
+is( $result, qq(local|baz|1
+local|bar|2),
+ 'update works with different column order and subscriber local values');
+
# check that change of connection string and/or publication list causes
# restart of subscription workers. Not all of these are registered as tests
# as we need to poll for a change but the test suite will fail none the less
pgsql-bugs by date: