From f05cfecc37b3a8ac687fe862b3f096c088e4e31a Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sat, 17 Jan 2026 19:33:53 -0500 Subject: [PATCH v14 3/3] Add remote_analyze to postgres_fdw remote statistics fetching. This is accomplished through a new option, remote_analyze, which is available at the server level and table level. The default value is false. If remote_analyze is enabled, and if the first attempt to fetch remote statistics did not fetch attribute statistics for every local table column, then an attempt will be made to ANALYZE the remote table. If that remote ANALYZE succeeds, then a second and final attempt will be made to fetch remote statistics. If the statistics found are still insufficient, then the local ANALYZE command will fall back to regular row sampling and computing the statistics locally. --- doc/src/sgml/postgres-fdw.sgml | 16 ++ .../postgres_fdw/expected/postgres_fdw.out | 38 +++++ contrib/postgres_fdw/option.c | 5 + contrib/postgres_fdw/postgres_fdw.c | 150 ++++++++++++++---- contrib/postgres_fdw/sql/postgres_fdw.sql | 34 ++++ 5 files changed, 214 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index d7c0dc8ed14..4c7559dad6f 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -387,6 +387,22 @@ OPTIONS (ADD password_required 'false'); + + remote_analyze (boolean) + + + This option, which can be specified for a foreign table or a foreign + server, determines whether an ANALYZE on a foreign + table will attempt to ANALYZE the remote table if + the first attempt to fetch remote statistics fails, and will then + make a second and final attempt to fetch remote statistics. This option + has no meaning if the foreign table has fetch_stats + disabled. + The default is false. + + + + diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index a76f1278538..f7a578d267d 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -12699,6 +12699,44 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; -- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', + fetch_stats 'true', + remote_analyze 'true'); +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------+----------- +(0 rows) + +ANALYZE remote_analyze_ftable; +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------------------+----------- + remote_analyze_ftable | 3 + remote_analyze_table | 3 +(2 rows) + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; +-- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== -- Disable debug_discard_caches in order to manage remote connections diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index 5b7726800d0..2941ecbfb87 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -121,6 +121,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) strcmp(def->defname, "parallel_commit") == 0 || strcmp(def->defname, "parallel_abort") == 0 || strcmp(def->defname, "fetch_stats") == 0 || + strcmp(def->defname, "remote_analyze") == 0 || strcmp(def->defname, "keep_connections") == 0) { /* these accept only boolean values */ @@ -283,6 +284,10 @@ InitPgFdwOptions(void) {"fetch_stats", ForeignServerRelationId, false}, {"fetch_stats", ForeignTableRelationId, false}, + /* remote_analyze is available on both server and table */ + {"remote_analyze", ForeignServerRelationId, false}, + {"remote_analyze", ForeignTableRelationId, false}, + /* * sslcert and sslkey are in fact libpq options, but we repeat them * here to allow them to appear in both foreign server context (when diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index c76ca2fc69b..5f79ea57e73 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5319,6 +5319,36 @@ import_cleanup: return ok; } +/* + * Analyze a remote table. + */ +static bool +analyze_remote_table(PGconn *conn, const char *remote_schemaname, + const char *remote_relname) +{ + StringInfoData buf; + PGresult *res; + bool ok = true; + + initStringInfo(&buf); + + appendStringInfo(&buf, "ANALYZE %s", + quote_qualified_identifier(remote_schemaname, remote_relname)); + + res = pgfdw_exec_query(conn, buf.data, NULL); + + if (res == NULL || + PQresultStatus(res) != PGRES_COMMAND_OK) + { + pgfdw_report(WARNING, res, conn, buf.data); + ok = false; + } + + PQclear(res); + pfree(buf.data); + return ok; +} + /* * Attempt to fetch remote relations stats. */ @@ -5482,6 +5512,33 @@ match_attrmap(PGresult *res, const char *remote_schemaname, return true; } +/* + * Convenience routine to test if the relstats result set shows that the + * relation has tuples. + */ +static +bool +has_tuples(PGresult *relstats) +{ + char *reltuples; + + reltuples = PQgetvalue(relstats, 0, RELSTATS_RELTUPLES); + + /* + * If the reltuples value > 0, then then we can expect to find attribute + * stats for the table. + * + * A reltuples value of -1 means the table has never been analyzed (v14+). + * + * In versions prior to v14, a value of 0 was ambiguous, it could mean + * that the table had never been analyzed, or that it was empty at the + * time that it was analyzed. Either way, we wouldn't expect to find + * attstats for the relation. + */ + return ((strcmp(reltuples, "0") != 0) && + (strcmp(reltuples, "-1") != 0)); +} + /* * Attempt to fetch statistics from a remote server. */ @@ -5492,13 +5549,13 @@ fetch_remote_statistics(PGconn *conn, int server_version_num, int natts, RemoteAttributeMapping * remattrmap, const char *column_list, - RemoteStatsResults * remstats) + bool remote_analyze, RemoteStatsResults * remstats) { PGresult *attstats = NULL; PGresult *relstats = NULL; char relkind; - char *reltuples; + bool has_stats = false; relstats = fetch_relstats(conn, remote_schemaname, remote_relname); @@ -5527,34 +5584,51 @@ fetch_remote_statistics(PGconn *conn, goto cleanup; } + if (has_tuples(relstats)) + { + /* See if the table actually has attribute stats. */ + attstats = fetch_attstats(conn, server_version_num, remote_schemaname, + remote_relname, column_list); + + if ((PQntuples(attstats) > 0) && + match_attrmap(attstats, remote_schemaname, remote_relname, + natts, remattrmap)) + has_stats = true; + } + + if (!has_stats) + { + /* If no second chance, skip to reporting no stats found */ + if (!remote_analyze) + goto notfound; + + ereport(LOG, + errmsg("attempting remote analyze of table %s.%s", + remote_schemaname, remote_relname)); + PQclear(attstats); + PQclear(relstats); + + if (!analyze_remote_table(conn, remote_schemaname, remote_relname)) + goto cleanup; + + relstats = fetch_relstats(conn, remote_schemaname, remote_relname); + if (!has_tuples(relstats)) + goto notfound; + + attstats = fetch_attstats(conn, server_version_num, remote_schemaname, + remote_relname, column_list); + + if (PQntuples(attstats) == 0) + goto notfound; + + if (!match_attrmap(attstats, remote_schemaname, remote_relname, + natts, remattrmap)) + goto cleanup; + } + /* - * If the reltuples value > 0, then then we can expect to find attribute - * stats for the table. - * - * A reltuples value of -1 means the table has never been analyzed (v14+). - * - * In versions prior to v14, a value of 0 was ambiguous, it could mean - * that the table had never been analyzed, or that it was empty at the - * time that it was analyzed. Either way, we wouldn't expect to find - * attstats for the relation. + * We found stats, on either the first try or the second. */ - reltuples = PQgetvalue(relstats, 0, RELSTATS_RELTUPLES); - if ((strcmp(reltuples, "0") == 0) || - (strcmp(reltuples, "-1") == 0)) - goto notfound; - - /* See if it actually has any attribute stats. */ - attstats = fetch_attstats(conn, server_version_num, remote_schemaname, - remote_relname, column_list); - - if (PQntuples(attstats) == 0) - goto notfound; - - /* Reject the stats if any are missing or in excess. */ - if (!match_attrmap(attstats, remote_schemaname, remote_relname, natts, - remattrmap)) - goto cleanup; - remstats->rel = relstats; remstats->att = attstats; return true; @@ -5712,9 +5786,11 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) { ForeignTable *table; + ForeignServer *server; UserMapping *user; PGconn *conn; ListCell *lc; + bool remote_analyze = false; int server_version_num = 0; const char *schemaname = NULL; const char *relname = NULL; @@ -5730,12 +5806,25 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) RemoteStatsResults remstats = {.rel = NULL,.att = NULL}; table = GetForeignTable(RelationGetRelid(relation)); + server = GetForeignServer(table->serverid); user = GetUserMapping(GetUserId(), table->serverid); conn = GetConnection(user, false, NULL); server_version_num = PQserverVersion(conn); schemaname = get_namespace_name(RelationGetNamespace(relation)); relname = RelationGetRelationName(relation); + /* + * Server-level options can be overridden by table-level options, so check + * server-level first. + */ + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); + } + foreach(lc, table->options) { DefElem *def = (DefElem *) lfirst(lc); @@ -5744,6 +5833,8 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) remote_schemaname = defGetString(def); else if (strcmp(def->defname, "table_name") == 0) remote_relname = defGetString(def); + else if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); } /* @@ -5822,7 +5913,8 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) ok = fetch_remote_statistics(conn, remote_schemaname, remote_relname, server_version_num, natts, remattrmap, - column_list.data, &remstats); + column_list.data, remote_analyze, + &remstats); ReleaseConnection(conn); pfree(column_list.data); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index a0f52e57dbc..3390f6e5ec1 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -4412,6 +4412,40 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; +-- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); + +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', + fetch_stats 'true', + remote_analyze 'true'); + +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +ANALYZE remote_analyze_ftable; + +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; + -- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== -- 2.53.0