From 396ffef4af4dea6268fc74b4b5a5f48b4b04c892 Mon Sep 17 00:00:00 2001 From: Evan Montgomery-Recht Date: Sun, 5 Apr 2026 22:23:26 -0400 Subject: [PATCH v1] Fix RI fast-path crash when FK triggers fire under nested SPI The fast-path FK check code (ri_FastPathGetEntry) opens PK relations, creates TupleTableSlots, and caches them in RI_FastPathEntry for the duration of a trigger batch. The batch callback (ri_FastPathTeardown) that closes them fires only at query_depth == 0 via FireAfterTriggerBatchCallbacks(). When FK triggers fire inside a nested SPI context -- for example, a C-language function such as PostGIS's topogeo_addLineString() that uses SPI_connect/SPI_execute to INSERT into a table with multiple FK constraints -- the relations and TupleDesc pins are registered with the SPI portal's resource owner. When that portal finishes and its resource owner is released, the references are decremented. Later, when the batch callback fires at query_depth == 0, ri_FastPathTeardown attempts to close relations whose reference counts are already zero, triggering: TRAP: failed Assert("rel->rd_refcnt > 0") in RelationDecrementReferenceCount, called from index_close and TupleDesc pins that are no longer tracked by any resource owner cause "tupdesc reference not owned by resource owner" errors. Fix by transferring both relation references and TupleDesc pins from the current (inner) resource owner to CurTransactionResourceOwner immediately after creating them. ri_FastPathTeardown is updated to clear any buffered tuples (whose buffer pins belong to the current resource owner) before switching to CurTransactionResourceOwner for the close/drop operations. The transfer uses RelationIncrementReferenceCount / PinTupleDesc under the target owner followed by RelationDecrementReferenceCount / ReleaseTupleDesc under the original, rather than switching CurrentResourceOwner around the table_open/index_open calls, because the latter would also affect transient buffer pins acquired during catalog lookups inside those functions. Add a test module (test_spi_func) with a C function that executes SQL via SPI, reproducing the crash scenario that simple PL/pgSQL cannot trigger. Bug found via PostGIS topology CI: toTopoGeom() -> SPI INSERT into edge_data (4 immediate FK constraints) crashes on PG 19devel but passes on PG 18. Discussion: https://postgr.es/m/CA+HiwqF4C0ws3cO+z5cLkPuvwnAwkSp7sfvgGj3yQ=Li6KNMqA@mail.gmail.com --- src/backend/utils/adt/ri_triggers.c | 59 ++++++++++++++ src/test/modules/meson.build | 1 + .../test_spi_func/expected/ri_fastpath.out | 79 +++++++++++++++++++ src/test/modules/test_spi_func/meson.build | 31 ++++++++ .../modules/test_spi_func/sql/ri_fastpath.sql | 65 +++++++++++++++ .../test_spi_func/test_spi_func--1.0.sql | 9 +++ .../modules/test_spi_func/test_spi_func.c | 51 ++++++++++++ .../test_spi_func/test_spi_func.control | 4 + 8 files changed, 299 insertions(+) create mode 100644 src/test/modules/test_spi_func/expected/ri_fastpath.out create mode 100644 src/test/modules/test_spi_func/meson.build create mode 100644 src/test/modules/test_spi_func/sql/ri_fastpath.sql create mode 100644 src/test/modules/test_spi_func/test_spi_func--1.0.sql create mode 100644 src/test/modules/test_spi_func/test_spi_func.c create mode 100644 src/test/modules/test_spi_func/test_spi_func.control diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 84f9fecdb4c..94346892151 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -52,6 +52,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/resowner.h" #include "utils/rls.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" @@ -4148,6 +4149,25 @@ ri_FastPathTeardown(void) hash_seq_init(&status, ri_fastpath_cache); while ((entry = hash_seq_search(&status)) != NULL) { + ResourceOwner oldowner; + + /* + * First, clear any buffered tuple from the slots. This must happen + * under the current resource owner because buffer pins from the last + * index scan belong to it. + */ + if (entry->pk_slot) + ExecClearTuple(entry->pk_slot); + if (entry->fk_slot) + ExecClearTuple(entry->fk_slot); + + /* + * Now switch to CurTransactionResourceOwner for closing relations and + * dropping slots, since that's where their refs were transferred in + * ri_FastPathGetEntry(). + */ + oldowner = CurrentResourceOwner; + CurrentResourceOwner = CurTransactionResourceOwner; if (entry->idx_rel) index_close(entry->idx_rel, NoLock); if (entry->pk_rel) @@ -4156,6 +4176,7 @@ ri_FastPathTeardown(void) ExecDropSingleTupleTableSlot(entry->pk_slot); if (entry->fk_slot) ExecDropSingleTupleTableSlot(entry->fk_slot); + CurrentResourceOwner = oldowner; if (entry->flush_cxt) MemoryContextDelete(entry->flush_cxt); } @@ -4271,6 +4292,44 @@ ri_FastPathGetEntry(const RI_ConstraintInfo *riinfo, Relation fk_rel) entry->fk_slot = MakeSingleTupleTableSlot(RelationGetDescr(fk_rel), &TTSOpsHeapTuple); + /* + * Transfer relation and TupleDesc references from the current + * resource owner to CurTransactionResourceOwner so they survive + * cleanup of inner resource owners (e.g., SPI portals from C-language + * functions). The batch callback that closes them + * (ri_FastPathTeardown) fires at query_depth == 0, which may be long + * after the resource owner that was current when the trigger fired + * has been released. + * + * We open relations and create slots under the current resource owner + * (to avoid affecting transient buffer pins from catalog lookups), + * then transfer the relation refs and TupleDesc pins by incrementing + * under the target owner and decrementing under the original. + * + * Relation TupleDescs (rd_att) are reference-counted (tdrefcount >= + * 1), so PinTupleDesc inside table_slot_create / + * MakeSingleTupleTableSlot registers them with the resource owner. + * These must also be transferred. + */ + if (CurrentResourceOwner != CurTransactionResourceOwner) + { + ResourceOwner saved = CurrentResourceOwner; + + /* Add refs under CurTransactionResourceOwner */ + CurrentResourceOwner = CurTransactionResourceOwner; + RelationIncrementReferenceCount(entry->pk_rel); + RelationIncrementReferenceCount(entry->idx_rel); + PinTupleDesc(entry->pk_slot->tts_tupleDescriptor); + PinTupleDesc(entry->fk_slot->tts_tupleDescriptor); + + /* Remove refs from the original resource owner */ + CurrentResourceOwner = saved; + RelationDecrementReferenceCount(entry->pk_rel); + RelationDecrementReferenceCount(entry->idx_rel); + ReleaseTupleDesc(entry->pk_slot->tts_tupleDescriptor); + ReleaseTupleDesc(entry->fk_slot->tts_tupleDescriptor); + } + entry->flush_cxt = AllocSetContextCreate(TopTransactionContext, "RI fast path flush temporary context", ALLOCSET_SMALL_SIZES); diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 4bca42bb370..3d5d016c46f 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -53,6 +53,7 @@ subdir('test_saslprep') subdir('test_shmem') subdir('test_shm_mq') subdir('test_slru') +subdir('test_spi_func') subdir('test_tidstore') subdir('typcache') subdir('unsafe_tests') diff --git a/src/test/modules/test_spi_func/expected/ri_fastpath.out b/src/test/modules/test_spi_func/expected/ri_fastpath.out new file mode 100644 index 00000000000..5b2bf9e9310 --- /dev/null +++ b/src/test/modules/test_spi_func/expected/ri_fastpath.out @@ -0,0 +1,79 @@ +-- +-- Test RI fast-path FK check under C-level SPI. +-- +-- The RI fast-path caches relation references in ri_FastPathGetEntry() +-- under the current resource owner. When FK triggers fire inside a +-- C-level SPI context (SPI_connect/SPI_execute/SPI_finish), the inner +-- resource owner is released before the batch callback that closes +-- those relations fires at query_depth == 0. Without the fix, this +-- crashes with Assert(rel->rd_refcnt > 0) in index_close. +-- +-- Simple PL/pgSQL does NOT trigger this because its SPI connection +-- outlives the batch callback. A C function using SPI is required. +-- +CREATE EXTENSION test_spi_func; +CREATE TABLE ri_fp_pk1 (id serial PRIMARY KEY); +CREATE TABLE ri_fp_pk2 (id serial PRIMARY KEY); +CREATE TABLE ri_fp_pk3 (id serial PRIMARY KEY); +INSERT INTO ri_fp_pk1 VALUES (1); +INSERT INTO ri_fp_pk2 VALUES (1); +INSERT INTO ri_fp_pk3 VALUES (1); +CREATE TABLE ri_fp_fk ( + id serial PRIMARY KEY, + a int REFERENCES ri_fp_pk1(id), + b int REFERENCES ri_fp_pk2(id), + c int REFERENCES ri_fp_pk3(id), + d int REFERENCES ri_fp_pk1(id), + e int REFERENCES ri_fp_pk2(id), + f int REFERENCES ri_fp_pk3(id) +); +-- C-level SPI INSERT: the critical test case. +-- Without the fix this crashes the server. +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (1, 1, 1, 1, 1, 1)'); + spi_exec +---------- + +(1 row) + +-- Additional C-level SPI INSERTs to exercise batch reuse across calls. +-- Use different column orderings to ensure each is a distinct statement. +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (f, e, d, c, b, a) VALUES (1, 1, 1, 1, 1, 1)'); + spi_exec +---------- + +(1 row) + +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, c, e, b, d, f) VALUES (1, 1, 1, 1, 1, 1)'); + spi_exec +---------- + +(1 row) + +-- C-level SPI with FK violation: should error, not crash +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (999, 1, 1, 1, 1, 1)'); +ERROR: insert or update on table "ri_fp_fk" violates foreign key constraint "ri_fp_fk_a_fkey" +DETAIL: Key (a)=(999) is not present in table "ri_fp_pk1". +CONTEXT: SQL statement "INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (999, 1, 1, 1, 1, 1)" +-- Nested: PL/pgSQL calling C SPI (mimics PostGIS toTopoGeom pattern) +CREATE FUNCTION plpgsql_calls_c_spi() RETURNS void AS $$ +DECLARE + ins_stmt text := 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (1, 1, 1, 1, 1, 1)'; +BEGIN + PERFORM spi_exec(ins_stmt); +END; +$$ LANGUAGE plpgsql; +SELECT plpgsql_calls_c_spi(); + plpgsql_calls_c_spi +--------------------- + +(1 row) + +-- Cleanup +DROP FUNCTION plpgsql_calls_c_spi(); +DROP TABLE ri_fp_fk; +DROP TABLE ri_fp_pk3, ri_fp_pk2, ri_fp_pk1; +DROP EXTENSION test_spi_func; diff --git a/src/test/modules/test_spi_func/meson.build b/src/test/modules/test_spi_func/meson.build new file mode 100644 index 00000000000..939edc898a4 --- /dev/null +++ b/src/test/modules/test_spi_func/meson.build @@ -0,0 +1,31 @@ +test_spi_func_sources = files( + 'test_spi_func.c', +) + +if host_system == 'windows' + test_spi_func_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_spi_func', + '--FILEDESC', 'test_spi_func - SQL-callable C SPI function',]) +endif + +test_spi_func = shared_module('test_spi_func', + test_spi_func_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_spi_func + +test_install_data += files( + 'test_spi_func.control', + 'test_spi_func--1.0.sql', +) + +tests += { + 'name': 'test_spi_func', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'ri_fastpath', + ], + }, +} diff --git a/src/test/modules/test_spi_func/sql/ri_fastpath.sql b/src/test/modules/test_spi_func/sql/ri_fastpath.sql new file mode 100644 index 00000000000..002f4ad5e52 --- /dev/null +++ b/src/test/modules/test_spi_func/sql/ri_fastpath.sql @@ -0,0 +1,65 @@ +-- +-- Test RI fast-path FK check under C-level SPI. +-- +-- The RI fast-path caches relation references in ri_FastPathGetEntry() +-- under the current resource owner. When FK triggers fire inside a +-- C-level SPI context (SPI_connect/SPI_execute/SPI_finish), the inner +-- resource owner is released before the batch callback that closes +-- those relations fires at query_depth == 0. Without the fix, this +-- crashes with Assert(rel->rd_refcnt > 0) in index_close. +-- +-- Simple PL/pgSQL does NOT trigger this because its SPI connection +-- outlives the batch callback. A C function using SPI is required. +-- + +CREATE EXTENSION test_spi_func; + +CREATE TABLE ri_fp_pk1 (id serial PRIMARY KEY); +CREATE TABLE ri_fp_pk2 (id serial PRIMARY KEY); +CREATE TABLE ri_fp_pk3 (id serial PRIMARY KEY); +INSERT INTO ri_fp_pk1 VALUES (1); +INSERT INTO ri_fp_pk2 VALUES (1); +INSERT INTO ri_fp_pk3 VALUES (1); + +CREATE TABLE ri_fp_fk ( + id serial PRIMARY KEY, + a int REFERENCES ri_fp_pk1(id), + b int REFERENCES ri_fp_pk2(id), + c int REFERENCES ri_fp_pk3(id), + d int REFERENCES ri_fp_pk1(id), + e int REFERENCES ri_fp_pk2(id), + f int REFERENCES ri_fp_pk3(id) +); + +-- C-level SPI INSERT: the critical test case. +-- Without the fix this crashes the server. +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (1, 1, 1, 1, 1, 1)'); + +-- Additional C-level SPI INSERTs to exercise batch reuse across calls. +-- Use different column orderings to ensure each is a distinct statement. +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (f, e, d, c, b, a) VALUES (1, 1, 1, 1, 1, 1)'); +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, c, e, b, d, f) VALUES (1, 1, 1, 1, 1, 1)'); + +-- C-level SPI with FK violation: should error, not crash +SELECT spi_exec( + 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (999, 1, 1, 1, 1, 1)'); + +-- Nested: PL/pgSQL calling C SPI (mimics PostGIS toTopoGeom pattern) +CREATE FUNCTION plpgsql_calls_c_spi() RETURNS void AS $$ +DECLARE + ins_stmt text := 'INSERT INTO ri_fp_fk (a, b, c, d, e, f) VALUES (1, 1, 1, 1, 1, 1)'; +BEGIN + PERFORM spi_exec(ins_stmt); +END; +$$ LANGUAGE plpgsql; + +SELECT plpgsql_calls_c_spi(); + +-- Cleanup +DROP FUNCTION plpgsql_calls_c_spi(); +DROP TABLE ri_fp_fk; +DROP TABLE ri_fp_pk3, ri_fp_pk2, ri_fp_pk1; +DROP EXTENSION test_spi_func; diff --git a/src/test/modules/test_spi_func/test_spi_func--1.0.sql b/src/test/modules/test_spi_func/test_spi_func--1.0.sql new file mode 100644 index 00000000000..d5d67974d5b --- /dev/null +++ b/src/test/modules/test_spi_func/test_spi_func--1.0.sql @@ -0,0 +1,9 @@ +/* src/test/modules/test_spi_func/test_spi_func--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_spi_func" to load this file. \quit + +CREATE FUNCTION spi_exec(query text) +RETURNS void +AS 'MODULE_PATHNAME', 'spi_exec' +LANGUAGE C STRICT; diff --git a/src/test/modules/test_spi_func/test_spi_func.c b/src/test/modules/test_spi_func/test_spi_func.c new file mode 100644 index 00000000000..51f4b9c4f73 --- /dev/null +++ b/src/test/modules/test_spi_func/test_spi_func.c @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------- + * + * test_spi_func.c + * SQL-callable C function that uses SPI to execute a query. + * + * Useful for testing code paths that only trigger under C-level + * SPI (not PL/pgSQL), such as resource owner interactions with + * RI fast-path FK checks. + * + * Copyright (c) 2026, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_spi_func/test_spi_func.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/spi.h" +#include "utils/builtins.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(spi_exec); + +/* + * spi_exec(query text) - execute a SQL query via SPI. + * + * Opens a fresh SPI connection, executes the query, and closes the + * connection. This mimics the SPI usage pattern of C-language + * extensions (e.g., PostGIS topology functions) where each call + * to SPI_connect / SPI_execute / SPI_finish creates and destroys + * a short-lived SPI context. + */ +Datum +spi_exec(PG_FUNCTION_ARGS) +{ + const char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + int ret; + + SPI_connect(); + + ret = SPI_execute(query, false, 0); + + if (ret < 0) + elog(ERROR, "SPI_execute failed: error code %d", ret); + + SPI_finish(); + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_spi_func/test_spi_func.control b/src/test/modules/test_spi_func/test_spi_func.control new file mode 100644 index 00000000000..87bd9dc9782 --- /dev/null +++ b/src/test/modules/test_spi_func/test_spi_func.control @@ -0,0 +1,4 @@ +comment = 'Test SQL-callable C function that uses SPI' +default_version = '1.0' +module_pathname = '$libdir/test_spi_func' +relocatable = true -- 2.50.1 (Apple Git-155)