Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker - Mailing list pgsql-bugs
From | Tom Lane |
---|---|
Subject | Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker |
Date | |
Msg-id | 13241.1574379258@sss.pgh.pa.us Whole thread Raw |
In response to | Re: BUG #16129: Segfault in tts_virtual_materialize in logical replication worker (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-bugs |
I wrote: > Tomas Vondra <tomas.vondra@2ndquadrant.com> writes: >> FWIW my hunch is the bug is somewhere in this chunk of code from >> apply_heap_update: >> oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); >> ExecCopySlot(remoteslot, localslot); >> slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed); >> MemoryContextSwitchTo(oldctx); > I imagine the only reason this code has gotten past the valgrind > animals is that we're not testing cases where non-replaced columns > in the subscriber table are of pass-by-ref types. Actually, it doesn't appear to me that we're testing this with any non-replaced columns at all. The test modifications in the attached proposed patch add that. For me, the unpatched code doesn't crash with this test, but the non-replaced column reads back as empty which is certainly wrong. Valgrind would likely complain too, but I didn't try it. regards, tom lane diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index ff62303..9c06b67 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -363,13 +363,19 @@ slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, } /* - * Modify slot with user data provided as C strings. + * Replace selected columns with user data provided as C strings. * This is somewhat similar to heap_modify_tuple but also calls the type - * input function on the user data as the input is the text representation - * of the types. + * input functions on the user data. + * "slot" is filled with a copy of the tuple in "srcslot", with + * columns selected by the "replaces" array replaced with data values + * from "values". + * Caution: unreplaced pass-by-ref columns in "slot" will point into the + * storage for "srcslot". This is OK for current usage, but someday we may + * need to materialize "slot" at the end to make it independent of "srcslot". */ static void -slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, +slot_modify_cstrings(TupleTableSlot *slot, TupleTableSlot *srcslot, + LogicalRepRelMapEntry *rel, char **values, bool *replaces) { int natts = slot->tts_tupleDescriptor->natts; @@ -377,9 +383,18 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, SlotErrCallbackArg errarg; ErrorContextCallback errcallback; - slot_getallattrs(slot); + /* We'll fill "slot" with a virtual tuple, so we must start with ... */ ExecClearTuple(slot); + /* + * Transfer all the column data from srcslot, so that we have valid values + * for unreplaced columns. + */ + Assert(natts == srcslot->tts_tupleDescriptor->natts); + slot_getallattrs(srcslot); + memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum)); + memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool)); + /* Push callback + info on the error context stack */ errarg.rel = rel; errarg.local_attnum = -1; @@ -428,6 +443,7 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, /* Pop the error context stack */ error_context_stack = errcallback.previous; + /* And finally, declare that "slot" contains a valid virtual tuple */ ExecStoreVirtualTuple(slot); } @@ -740,8 +756,8 @@ apply_handle_update(StringInfo s) { /* Process and store remote tuple in the slot */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - ExecCopySlot(remoteslot, localslot); - slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed); + slot_modify_cstrings(remoteslot, localslot, rel, + newtup.values, newtup.changed); MemoryContextSwitchTo(oldctx); EvalPlanQualSetSlot(&epqstate, remoteslot); diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index 40e306a..116e487 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 17; +use Test::More tests => 18; # Initialize publisher node my $node_publisher = get_new_node('publisher'); @@ -45,7 +45,7 @@ $node_subscriber->safe_psql('postgres', # different column count and order than on publisher $node_subscriber->safe_psql('postgres', - "CREATE TABLE tab_mixed (c text, b text, a int primary key)"); + "CREATE TABLE tab_mixed (c text default 'local', b text, a int primary key)"); # replication of the table with included index $node_subscriber->safe_psql('postgres', @@ -114,8 +114,8 @@ is($result, qq(20|-20|-1), 'check replicated changes on subscriber'); $result = $node_subscriber->safe_psql('postgres', "SELECT c, b, a FROM tab_mixed"); -is( $result, qq(|foo|1 -|bar|2), 'check replicated changes with different column order'); +is( $result, qq(local|foo|1 +local|bar|2), 'check replicated changes with different column order'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_include"); @@ -139,11 +139,14 @@ $node_publisher->safe_psql('postgres', "ALTER TABLE tab_ins REPLICA IDENTITY FULL"); $node_subscriber->safe_psql('postgres', "ALTER TABLE tab_ins REPLICA IDENTITY FULL"); +# tab_mixed can use DEFAULT, since it has a primary key # and do the updates $node_publisher->safe_psql('postgres', "UPDATE tab_full SET a = a * a"); $node_publisher->safe_psql('postgres', "UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'"); +$node_publisher->safe_psql('postgres', + "UPDATE tab_mixed SET b = 'baz' WHERE a = 1"); $node_publisher->wait_for_catchup('tap_sub'); @@ -159,6 +162,12 @@ bb bb), 'update works with REPLICA IDENTITY FULL and text datums'); +$result = $node_subscriber->safe_psql('postgres', + "SELECT * FROM tab_mixed ORDER BY a"); +is( $result, qq(local|baz|1 +local|bar|2), + 'update works with different column order and subscriber local values'); + # check that change of connection string and/or publication list causes # restart of subscription workers. Not all of these are registered as tests # as we need to poll for a change but the test suite will fail none the less
pgsql-bugs by date: