diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 5aa3455e30..bebac2f2e0 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -56,6 +56,7 @@ #include "utils/rel.h" #include "utils/syscache.h" #include "utils/typcache.h" +#include "commands/tablecmds.h" /* * Global context for foreign_expr_walker's search of an expression tree. @@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) deparseRelation(buf, rel); } +/* + * Construct a simple "TRUNCATE rel" statement + */ +void +deparseTruncateSql(StringInfo buf, + List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs) +{ + ListCell *lc1, *lc2; + + appendStringInfoString(buf, "TRUNCATE "); + forboth (lc1, frels_list, + lc2, frels_extra) + { + Relation frel = lfirst(lc1); + int extra = lfirst_int(lc2); + + if (lc1 != list_head(frels_list)) + appendStringInfoString(buf, ", "); + if (extra & TRUNCATE_REL_CONTEXT_ONLY) + appendStringInfoString(buf, "ONLY "); + deparseRelation(buf, frel); + } + appendStringInfo(buf, " %s IDENTITY", + restart_seqs ? "RESTART" : "CONTINUE"); + switch (behavior) + { + case DROP_RESTRICT: + appendStringInfoString(buf, " RESTRICT"); + break; + case DROP_CASCADE: + appendStringInfoString(buf, " CASCADE"); + break; + } +} + /* * Construct name to use for given column, and emit it into buf. * If it has a column_name FDW option, use that instead of attribute name. diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 59e4e27ffb..45bb03f56d 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -8215,6 +8215,243 @@ select * from rem3; drop foreign table rem3; drop table loc3; -- =================================================================== +-- test for TRUNCATE +-- =================================================================== +CREATE TABLE tru_rtable0 (id int primary key, x text); +CREATE TABLE tru_rtable1 (id int primary key, y text); +CREATE FOREIGN TABLE tru_ftable (id int, x text) + SERVER loopback OPTIONS (table_name 'tru_rtable0'); +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x); +CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id); +CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable + FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable + FOR VALUES WITH (MODULUS 2, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'tru_rtable1'); +INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x); +CREATE TABLE tru_pk_table(id int primary key, z text); +CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id)); +INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x); +INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x); +CREATE FOREIGN TABLE tru_pk_ftable (id int, z text) + SERVER loopback OPTIONS (table_name 'tru_pk_table'); +CREATE TABLE tru_rtable_parent (id int, a text); +CREATE TABLE tru_rtable_child (id int, a text); +CREATE FOREIGN TABLE tru_ftable_parent (id int, a text) + SERVER loopback OPTIONS (table_name 'tru_rtable_parent'); +CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent) + SERVER loopback OPTIONS (table_name 'tru_rtable_child'); +INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x); +INSERT INTO tru_rtable_child (SELECT x, md5(x::text) FROM generate_series(10, 18) x); +-- normal truncate +SELECT * FROM tru_ftable ORDER BY id; + id | x +----+---------------------------------- + 1 | c4ca4238a0b923820dcc509a6f75849b + 2 | c81e728d9d4c2f636f067f89cc14862c + 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 + 4 | a87ff679a2f3e71d9181a67b7542122c + 5 | e4da3b7fbbce2345d7772b0674a318d5 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 7 | 8f14e45fceea167a5a36dedd4bea2543 + 8 | c9f0f895fb98ab9159f51fd0297e236d + 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 10 | d3d9446802a44259755d38e6d163e820 +(10 rows) + +TRUNCATE tru_ftable; +SELECT * FROM tru_rtable0; -- empty + id | x +----+--- +(0 rows) + +SELECT * FROM tru_ftable; -- empty + id | x +----+--- +(0 rows) + +-- 'truncatable' option +ALTER SERVER loopback OPTIONS (ADD truncatable 'false'); +TRUNCATE tru_ftable; -- error +ERROR: truncate on "tru_ftable" is prohibited +ALTER SERVER loopback OPTIONS (DROP truncatable); +ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false'); +TRUNCATE tru_ftable; -- error +ERROR: truncate on "tru_ftable" is prohibited +ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true'); +TRUNCATE tru_ftable; -- accepted +-- partition table mixtured by table and foreign table +SELECT * FROM tru_ptable ORDER BY id; + id | y +----+---------------------------------- + 11 | 6512bd43d9caa6e02c990b0a82652dca + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 14 | aab3238922bcc25a6f606eb525ffdc56 + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 16 | c74d97b01eae257e44aa9d5bade97baf + 17 | 70efdf2ec9b086079795c442636b55fb + 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 19 | 1f0e3dad99908345f7439f8ffabdffc4 + 20 | 98f13708210194c475687be6106a3b84 +(10 rows) + +TRUNCATE tru_ptable; +SELECT * FROM tru_ptable; -- empty + id | y +----+--- +(0 rows) + +SELECT * FROM tru_ptable__p0; -- empty + id | y +----+--- +(0 rows) + +SELECT * FROM tru_ftable__p1; -- empty + id | y +----+--- +(0 rows) + +SELECT * FROM tru_rtable1; -- empty + id | y +----+--- +(0 rows) + +-- 'CASCADE' option +SELECT * FROM tru_pk_ftable ORDER BY id; + id | z +----+---------------------------------- + 1 | c81e728d9d4c2f636f067f89cc14862c + 2 | eccbc87e4b5ce2fe28308fd9f2a7baf3 + 3 | a87ff679a2f3e71d9181a67b7542122c + 4 | e4da3b7fbbce2345d7772b0674a318d5 + 5 | 1679091c5a880faf6fb5e6087eb1b2dc + 6 | 8f14e45fceea167a5a36dedd4bea2543 + 7 | c9f0f895fb98ab9159f51fd0297e236d + 8 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 9 | d3d9446802a44259755d38e6d163e820 + 10 | 6512bd43d9caa6e02c990b0a82652dca +(10 rows) + +TRUNCATE tru_pk_ftable; -- failed by FK reference +ERROR: cannot truncate a table referenced in a foreign key constraint +DETAIL: Table "tru_fk_table" references "tru_pk_table". +HINT: Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE. +CONTEXT: remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT +TRUNCATE tru_pk_ftable CASCADE; +SELECT * FROM tru_pk_ftable; -- empty + id | z +----+--- +(0 rows) + +SELECT * FROM tru_fk_table; -- also truncated + fkey +------ +(0 rows) + +-- truncate two tables at a command +INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x); +INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x); +SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id; + id | x | id | z +----+----------------------------------+----+---------------------------------- + 1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 | | + 2 | a87ff679a2f3e71d9181a67b7542122c | | + 3 | e4da3b7fbbce2345d7772b0674a318d5 | 3 | 1679091c5a880faf6fb5e6087eb1b2dc + 4 | 1679091c5a880faf6fb5e6087eb1b2dc | 4 | 8f14e45fceea167a5a36dedd4bea2543 + 5 | 8f14e45fceea167a5a36dedd4bea2543 | 5 | c9f0f895fb98ab9159f51fd0297e236d + 6 | c9f0f895fb98ab9159f51fd0297e236d | 6 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 | 7 | d3d9446802a44259755d38e6d163e820 + 8 | d3d9446802a44259755d38e6d163e820 | 8 | 6512bd43d9caa6e02c990b0a82652dca + | | 9 | c20ad4d76fe97759aa27a0c99bff6710 + | | 10 | c51ce410c124a10e0db5e4b97fc2af39 +(10 rows) + +TRUNCATE tru_ftable, tru_pk_ftable CASCADE; +SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id; -- empty + id | x | id | z +----+---+----+--- +(0 rows) + +-- truncate with ONLY clause +TRUNCATE ONLY tru_ftable_parent; +SELECT * FROM tru_ftable_parent ORDER BY id; + id | a +----+---------------------------------- + 10 | d3d9446802a44259755d38e6d163e820 + 11 | 6512bd43d9caa6e02c990b0a82652dca + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 14 | aab3238922bcc25a6f606eb525ffdc56 + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 16 | c74d97b01eae257e44aa9d5bade97baf + 17 | 70efdf2ec9b086079795c442636b55fb + 18 | 6f4922f45568161a8cdf4ad2299f6d23 +(9 rows) + +TRUNCATE tru_ftable_parent; +SELECT * FROM tru_ftable_parent; -- empty + id | a +----+--- +(0 rows) + +-- in case when remote table has inherited children +CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0); +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x); +INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x); +SELECT * FROM tru_ftable ORDER BY id; + id | x +----+---------------------------------- + 5 | e4da3b7fbbce2345d7772b0674a318d5 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 7 | 8f14e45fceea167a5a36dedd4bea2543 + 8 | c9f0f895fb98ab9159f51fd0297e236d + 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 10 | d3d9446802a44259755d38e6d163e820 + 11 | 6512bd43d9caa6e02c990b0a82652dca + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 14 | aab3238922bcc25a6f606eb525ffdc56 +(10 rows) + +TRUNCATE ONLY tru_ftable; -- truncate only parent portion +SELECT * FROM tru_ftable ORDER BY id; + id | x +----+---------------------------------- + 10 | d3d9446802a44259755d38e6d163e820 + 11 | 6512bd43d9caa6e02c990b0a82652dca + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 14 | aab3238922bcc25a6f606eb525ffdc56 +(5 rows) + +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x); +SELECT * FROM tru_ftable; + id | x +----+---------------------------------- + 21 | 3c59dc048e8850243be8079a5c74d079 + 22 | b6d767d2f8ed5d21a44b0e5886680cb9 + 23 | 37693cfc748049e45d87b8c7d8b9aacd + 24 | 1ff1de774005f8da13f42943881c655f + 25 | 8e296a067a37563370ded05f5a3bf3ec + 10 | d3d9446802a44259755d38e6d163e820 + 11 | 6512bd43d9caa6e02c990b0a82652dca + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 14 | aab3238922bcc25a6f606eb525ffdc56 +(10 rows) + +TRUNCATE tru_ftable; -- truncate both of parent and child +SELECT * FROM tru_ftable; -- empty + id | x +----+--- +(0 rows) + +-- cleanup +DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable; +DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, +tru_rtable_parent,tru_rtable_child, tru_rtable0_child; +-- =================================================================== -- test IMPORT FOREIGN SCHEMA -- =================================================================== CREATE SCHEMA import_source; @@ -8913,7 +9150,7 @@ DO $d$ END; $d$; ERROR: invalid option "password" -HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections +HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections CONTEXT: SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')" PL/pgSQL function inline_code_block line 3 at EXECUTE -- If we add a password for our user mapping instead, we should get a different diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index f1d0c8bd41..fc196fabc0 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) if (strcmp(def->defname, "use_remote_estimate") == 0 || strcmp(def->defname, "updatable") == 0 || strcmp(def->defname, "async_capable") == 0 || + strcmp(def->defname, "truncatable") == 0 || strcmp(def->defname, "keep_connections") == 0) { /* these accept only boolean values */ @@ -213,6 +214,9 @@ InitPgFdwOptions(void) /* updatable is available on both server and table */ {"updatable", ForeignServerRelationId, false}, {"updatable", ForeignTableRelationId, false}, + /* truncatable is available on both server and table */ + {"truncatable", ForeignServerRelationId, false}, + {"truncatable", ForeignTableRelationId, false}, /* fetch_size is available on both server and table */ {"fetch_size", ForeignServerRelationId, false}, {"fetch_size", ForeignTableRelationId, false}, diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 16c2979f2d..6fa20834a4 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate, ExplainState *es); static void postgresExplainDirectModify(ForeignScanState *node, ExplainState *es); +static void postgresExecForeignTruncate(List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs); static bool postgresAnalyzeForeignTable(Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); @@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) routine->ExplainForeignModify = postgresExplainForeignModify; routine->ExplainDirectModify = postgresExplainDirectModify; + /* Support function for TRUNCATE */ + routine->ExecForeignTruncate = postgresExecForeignTruncate; + /* Support functions for ANALYZE */ routine->AnalyzeForeignTable = postgresAnalyzeForeignTable; @@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es) } } +/* + * postgresExecForeignTruncate + * It propagates TRUNCATE command to the remote host inside of the + * transaction block. + */ +static void +postgresExecForeignTruncate(List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs) +{ + Oid server_id = InvalidOid; + ForeignServer *serv = NULL; + UserMapping *user = NULL; + PGconn *conn = NULL; + PGresult *res; + StringInfoData sql; + ListCell *lc; + bool server_truncatable = true; + + /* pick up remote connection, and sanity checks */ + foreach (lc, frels_list) + { + Relation frel = lfirst(lc); + Oid frel_oid = RelationGetRelid(frel); + ForeignTable *ft = GetForeignTable(frel_oid); + ListCell *cell; + bool truncatable; + + if (!OidIsValid(server_id)) + { + server_id = GetForeignServerIdByRelId(frel_oid); + serv = GetForeignServer(server_id); + user = GetUserMapping(GetUserId(), server_id); + conn = GetConnection(user, false,NULL); + + foreach (cell, serv->options) + { + DefElem *defel = (DefElem *) lfirst(cell); + + if (strcmp(defel->defname, "truncatable") == 0) + { + server_truncatable = defGetBoolean(defel); + break; + } + } + } + else + { + /* postgresExecForeignTruncate() is invoked for each server */ + Assert(server_id == GetForeignServerIdByRelId(frel_oid)); + } + + /* ensure the target foreign table is truncatable */ + truncatable = server_truncatable; + foreach (cell, ft->options) + { + DefElem *defel = (DefElem *) lfirst(cell); + + if (strcmp(defel->defname, "truncatable") == 0) + { + truncatable = defGetBoolean(defel); + break; + } + } + if (!truncatable) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("truncate on \"%s\" is prohibited", + RelationGetRelationName(frel)))); + } + /* set up remote query */ + initStringInfo(&sql); + deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs); + /* run remote query */ + if (!PQsendQuery(conn, sql.data)) + pgfdw_report_error(ERROR, NULL, conn, false, sql.data); + res = pgfdw_get_result(conn, sql.data); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pgfdw_report_error(ERROR, res, conn, true, sql.data); + /* clean-up */ + PQclear(res); + pfree(sql.data); +} /* * estimate_path_cost_size diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 88d94da6f6..68599892f8 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root, extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel); extern void deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs); +extern void deparseTruncateSql(StringInfo buf, + List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs); extern void deparseStringLiteral(StringInfo buf, const char *val); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); extern Expr *find_em_expr_for_input_target(PlannerInfo *root, diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 107d1c0e03..4e620f67ff 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2351,6 +2351,101 @@ select * from rem3; drop foreign table rem3; drop table loc3; +-- =================================================================== +-- test for TRUNCATE +-- =================================================================== +CREATE TABLE tru_rtable0 (id int primary key, x text); +CREATE TABLE tru_rtable1 (id int primary key, y text); +CREATE FOREIGN TABLE tru_ftable (id int, x text) + SERVER loopback OPTIONS (table_name 'tru_rtable0'); +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x); + +CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id); +CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable + FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable + FOR VALUES WITH (MODULUS 2, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'tru_rtable1'); +INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x); + +CREATE TABLE tru_pk_table(id int primary key, z text); +CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id)); +INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x); +INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x); +CREATE FOREIGN TABLE tru_pk_ftable (id int, z text) + SERVER loopback OPTIONS (table_name 'tru_pk_table'); + +CREATE TABLE tru_rtable_parent (id int, a text); +CREATE TABLE tru_rtable_child (id int, a text); +CREATE FOREIGN TABLE tru_ftable_parent (id int, a text) + SERVER loopback OPTIONS (table_name 'tru_rtable_parent'); +CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent) + SERVER loopback OPTIONS (table_name 'tru_rtable_child'); +INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x); +INSERT INTO tru_rtable_child (SELECT x, md5(x::text) FROM generate_series(10, 18) x); + +-- normal truncate +SELECT * FROM tru_ftable ORDER BY id; +TRUNCATE tru_ftable; +SELECT * FROM tru_rtable0; -- empty +SELECT * FROM tru_ftable; -- empty + +-- 'truncatable' option +ALTER SERVER loopback OPTIONS (ADD truncatable 'false'); +TRUNCATE tru_ftable; -- error +ALTER SERVER loopback OPTIONS (DROP truncatable); +ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false'); +TRUNCATE tru_ftable; -- error +ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true'); +TRUNCATE tru_ftable; -- accepted + +-- partition table mixtured by table and foreign table +SELECT * FROM tru_ptable ORDER BY id; +TRUNCATE tru_ptable; +SELECT * FROM tru_ptable; -- empty +SELECT * FROM tru_ptable__p0; -- empty +SELECT * FROM tru_ftable__p1; -- empty +SELECT * FROM tru_rtable1; -- empty + +-- 'CASCADE' option +SELECT * FROM tru_pk_ftable ORDER BY id; +TRUNCATE tru_pk_ftable; -- failed by FK reference +TRUNCATE tru_pk_ftable CASCADE; +SELECT * FROM tru_pk_ftable; -- empty +SELECT * FROM tru_fk_table; -- also truncated + +-- truncate two tables at a command +INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x); +INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x); +SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id; +TRUNCATE tru_ftable, tru_pk_ftable CASCADE; +SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id; -- empty + +-- truncate with ONLY clause +TRUNCATE ONLY tru_ftable_parent; +SELECT * FROM tru_ftable_parent ORDER BY id; +TRUNCATE tru_ftable_parent; +SELECT * FROM tru_ftable_parent; -- empty + +-- in case when remote table has inherited children +CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0); +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x); +INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x); +SELECT * FROM tru_ftable ORDER BY id; + +TRUNCATE ONLY tru_ftable; -- truncate only parent portion +SELECT * FROM tru_ftable ORDER BY id; + +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x); +SELECT * FROM tru_ftable; +TRUNCATE tru_ftable; -- truncate both of parent and child +SELECT * FROM tru_ftable; -- empty + +-- cleanup +DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable; +DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, +tru_rtable_parent,tru_rtable_child, tru_rtable0_child; + -- =================================================================== -- test IMPORT FOREIGN SCHEMA -- =================================================================== diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 0f2397df49..97eea600cc 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node); + + FDW Routines for Truncate + +void +ExecForeignTruncate(List *frels_list, List *frels_extra, + DropBehavior behavior, bool restart_seqs); + + + Truncate a set of foreign tables defined by + frels_list belonging to the same foreign server. + This optional function is called during execution of + TRUNCATE for each foreign server involved + in one TRUNCATE command (note that invocations + are not per foreign table). + + frels_extra delivers extra information about + the context where the foreign tables are truncated. It is a list of integers and has same length with + frels_list. TRUNCATE_REL_CONTEXT_NORMAL means that + the foreign table is specified WITHOUT "ONLY" clause, and TRUNCATE_REL_CONTEXT_ONLY means + specified WITH "ONLY" clause. TRUNCATE_REL_CONTEXT_CASCADING + values means that foreign tables are not specified in the TRUNCATE, + but truncated due to dependency(like partition's child leaf). + + + + If the ExecForeignTruncate pointer is set to + NULL, attempts to truncate the foreign table will + fail with an error message. + + + + behavior defines how foreign tables should + be truncated, using as possible values DROP_RESTRICT + and DROP_CASCADE (to map with the equivalents of + TRUNCATE). + + + + restart_seqs is set to true + if RESTART IDENTITY was supplied in the + TRUNCATE. + + + FDW Routines for Row Locking diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index a7c695b000..1092160603 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -63,9 +63,10 @@ Now you need only SELECT from a foreign table to access the data stored in its underlying remote table. You can also modify - the remote table using INSERT, UPDATE, or - DELETE. (Of course, the remote user you have specified - in your user mapping must have privileges to do these things.) + the remote table using INSERT, UPDATE, + DELETE, or TRUNCATE. + (Of course, the remote user you have specified in your user mapping must + have privileges to do these things.) @@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false'); By default all foreign tables using postgres_fdw are assumed - to be updatable. This may be overridden using the following option: + to be updatable and truncatable. This may be overridden using the following options: @@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false'); + + truncatable + + + This option controls whether postgres_fdw allows + foreign tables to be truncated using TRUNCATE + command. It can be specified for a foreign table or a foreign server. + A table-level option overrides a server-level option. + The default is true. + + + Pay attention for the case when a foreign table maps remote table + that has inherited children or partition leafs. + TRUNCATE specifies the foreign tables with + ONLY clause, remove queries over the + postgres_fdw also specify remote tables with + ONLY clause, that will truncate only parent + portion of the remote table. In the results, it looks like + TRUNCATE command partially eliminated contents + of the foreign tables. + + + diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 0ba1e2d44d..01ec9b8b0a 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1970,9 +1970,7 @@ testdb=> - If you edit a file or the previous query, and you quit the editor without - modifying the file, the query buffer is cleared. - Otherwise, the new contents of the query buffer are re-parsed according to + The new contents of the query buffer are then re-parsed according to the normal rules of psql, treating the whole buffer as a single line. Any complete queries are immediately executed; that is, if the query buffer contains or ends with a @@ -2041,8 +2039,7 @@ Tue Oct 26 21:40:57 CEST 1999 in the form of a CREATE OR REPLACE FUNCTION or CREATE OR REPLACE PROCEDURE command. Editing is done in the same way as for \edit. - If you quit the editor without saving, the statement is discarded. - If you save and exit the editor, the updated command is executed immediately + After the editor exits, the updated command is executed immediately if you added a semicolon to it. Otherwise it is redisplayed; type semicolon or \g to send it, or \r to cancel. @@ -2118,8 +2115,7 @@ Tue Oct 26 21:40:57 CEST 1999 This command fetches and edits the definition of the named view, in the form of a CREATE OR REPLACE VIEW command. Editing is done in the same way as for \edit. - If you quit the editor without saving, the statement is discarded. - If you save and exit the editor, the updated command is executed immediately + After the editor exits, the updated command is executed immediately if you added a semicolon to it. Otherwise it is redisplayed; type semicolon or \g to send it, or \r to cancel. diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml index 91cdac5562..d9f97561a5 100644 --- a/doc/src/sgml/ref/truncate.sgml +++ b/doc/src/sgml/ref/truncate.sgml @@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] name [ - TRUNCATE is not currently supported for foreign tables. - This implies that if a specified table has any descendant tables that are - foreign, the command will fail. + TRUNCATE can be used for foreign tables if + the foreign data wrapper supports, for instance, see . diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 174727b501..dce7088e6b 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -407,11 +407,8 @@ AuxiliaryProcessMain(int argc, char *argv[]) */ CreateAuxProcessResourceOwner(); - /* Initialize statistics reporting */ - pgstat_initialize(); - /* Initialize backend status information */ - pgstat_beinit(); + pgstat_initialize(); pgstat_bestart(); /* register a before-shutdown callback for LWLock cleanup */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 88a68a4697..05bcb12d1d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -59,6 +59,7 @@ #include "commands/typecmds.h" #include "commands/user.h" #include "executor/executor.h" +#include "foreign/fdwapi.h" #include "foreign/foreign.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -310,6 +311,21 @@ struct DropRelationCallbackState #define ATT_FOREIGN_TABLE 0x0020 #define ATT_PARTITIONED_INDEX 0x0040 +/* + * ForeignTruncateInfo + * + * Information related to truncation of foreign tables. This is used for + * the elements in a hash table. It uses the server OID as lookup key, + * and includes a per-server list of all foreign tables involved in the + * truncation. + */ +typedef struct ForeignTruncateInfo +{ + Oid server_oid; + List *frels_list; + List *frels_extra; +} ForeignTruncateInfo; + /* * Partition tables are expected to be dropped when the parent partitioned * table gets dropped. Hence for partitioning we use AUTO dependency. @@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, * Executes a TRUNCATE command. * * This is a multi-relation truncate. We first open and grab exclusive - * lock on all relations involved, checking permissions and otherwise - * verifying that the relation is OK for truncation. In CASCADE mode, + * lock on all relations involved, checking permissions (local database + * ACLs even if relations are foreign tables) and otherwise verifying + * that the relation is OK for truncation. In CASCADE mode, * relations having FK references to the targeted relations are automatically * added to the group; in RESTRICT mode, we check that all FK references are * internal to the group that's being truncated. Finally all the relations - * are truncated and reindexed. + * are truncated and reindexed. If any foreign tables are involved, + * its callback shall be invoked prior to the truncation of regular tables. */ void ExecuteTruncate(TruncateStmt *stmt) { List *rels = NIL; List *relids = NIL; + List *relids_extra = NIL; List *relids_logged = NIL; ListCell *cell; @@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt) rels = lappend(rels, rel); relids = lappend_oid(relids, myrelid); + relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY)); /* Log this relation only if needed for logical decoding */ - if (RelationIsLogicallyLogged(rel)) + if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) relids_logged = lappend_oid(relids_logged, myrelid); if (recurse) @@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt) rels = lappend(rels, rel); relids = lappend_oid(relids, childrelid); + relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING); /* Log this relation only if needed for logical decoding */ - if (RelationIsLogicallyLogged(rel)) + if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) relids_logged = lappend_oid(relids_logged, childrelid); } } @@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt) errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly."))); } - ExecuteTruncateGuts(rels, relids, relids_logged, + ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged, stmt->behavior, stmt->restart_seqs); /* And close the rels */ @@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt) * this information handy in this form. */ void -ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, +ExecuteTruncateGuts(List *explicit_rels, + List *relids, + List *relids_extra, + List *relids_logged, DropBehavior behavior, bool restart_seqs) { List *rels; List *seq_relids = NIL; + HTAB *ft_htab = NULL; EState *estate; ResultRelInfo *resultRelInfos; ResultRelInfo *resultRelInfo; SubTransactionId mySubid; ListCell *cell; + ListCell *lc1, *lc2; Oid *logrelids; /* @@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, truncate_check_activity(rel); rels = lappend(rels, rel); relids = lappend_oid(relids, relid); + relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING); /* Log this relation only if needed for logical decoding */ - if (RelationIsLogicallyLogged(rel)) + if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) relids_logged = lappend_oid(relids_logged, relid); } } @@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, */ mySubid = GetCurrentSubTransactionId(); - foreach(cell, rels) + Assert(list_length(rels) == list_length(relids_extra)); + forboth (lc1, rels, + lc2, relids_extra) { - Relation rel = (Relation) lfirst(cell); + Relation rel = (Relation) lfirst(lc1); + int extra = lfirst_int(lc2); /* Skip partitioned tables as there is nothing to do */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) continue; + /* + * If truncating a foreign table, the foreign data wrapper callback + * for TRUNCATE is called once for each server with a list of all the + * relations to process linked to this server. The list of relations + * for each server is saved as a single entry in a hash table that + * uses the server OID as lookup key. Once the full set of lists is + * built, all the entries of the hash table are scanned, and the list + * of relations associated with the server is passed down to the + * TRUNCATE callback of its foreign data wrapper. + */ + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + Oid frel_oid = RelationGetRelid(rel); + Oid server_oid = GetForeignServerIdByRelId(frel_oid); + bool found; + ForeignTruncateInfo *ft_info; + + /* if the hash table does not exist yet, initialize it */ + if (!ft_htab) + { + HASHCTL hctl; + + memset(&hctl, 0, sizeof(HASHCTL)); + hctl.keysize = sizeof(Oid); + hctl.entrysize = sizeof(ForeignTruncateInfo); + hctl.hcxt = CurrentMemoryContext; + + ft_htab = hash_create("TRUNCATE for Foreign Tables", + 32, /* start small and extend */ + &hctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + } + + /* + * Look after the entry of the server in the hash table, and + * initialize it if the entry does not exist yet. + */ + ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found); + if (!found) + { + ft_info->server_oid = server_oid; + ft_info->frels_list = NIL; + ft_info->frels_extra = NIL; + } + + /* save the relation in the list */ + ft_info->frels_list = lappend(ft_info->frels_list, rel); + ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra); + continue; + } + /* * Normally, we need a transaction-safe truncation here. However, if * the table was either created in the current (sub)transaction or has @@ -1938,6 +2019,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, pgstat_count_truncate(rel); } + /* + * Now go through the hash table, and process each entry associated to the + * servers involved in the TRUNCATE. + */ + if (ft_htab) + { + ForeignTruncateInfo *ft_info; + HASH_SEQ_STATUS seq; + + hash_seq_init(&seq, ft_htab); + + while ((ft_info = hash_seq_search(&seq)) != NULL) + { + FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid); + + /* truncate_check_rel() has checked that already */ + Assert(routine->ExecForeignTruncate != NULL); + + routine->ExecForeignTruncate(ft_info->frels_list, + ft_info->frels_extra, + behavior, + restart_seqs); + } + } + /* * Restart owned sequences if we were asked to. */ @@ -2023,12 +2129,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple) char *relname = NameStr(reltuple->relname); /* - * Only allow truncate on regular tables and partitioned tables (although, - * the latter are only being included here for the following checks; no - * physical truncation will occur in their case.) + * Only allow truncate on regular tables, foreign tables using foreign + * data wrappers supporting TRUNCATE and partitioned tables (although, the + * latter are only being included here for the following checks; no + * physical truncation will occur in their case.). */ - if (reltuple->relkind != RELKIND_RELATION && - reltuple->relkind != RELKIND_PARTITIONED_TABLE) + if (reltuple->relkind == RELKIND_FOREIGN_TABLE) + { + Oid server_id = GetForeignServerIdByRelId(relid); + FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id); + + if (!fdwroutine->ExecForeignTruncate) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot truncate foreign table \"%s\"", + relname))); + } + else if (reltuple->relkind != RELKIND_RELATION && + reltuple->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", relname))); diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 4c4b072068..498d6ee123 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -46,6 +46,7 @@ #include "libpq/pqsignal.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pg_trace.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "postmaster/fork_process.h" @@ -62,6 +63,8 @@ #include "storage/pg_shmem.h" #include "storage/proc.h" #include "storage/procsignal.h" +#include "storage/sinvaladt.h" +#include "utils/ascii.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" @@ -105,12 +108,26 @@ #define PGSTAT_FUNCTION_HASH_SIZE 512 +/* ---------- + * Total number of backends including auxiliary + * + * We reserve a slot for each possible BackendId, plus one for each + * possible auxiliary process type. (This scheme assumes there is not + * more than one of any auxiliary process type at a time.) MaxBackends + * includes autovacuum workers and background workers as well. + * ---------- + */ +#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES) + + /* ---------- * GUC parameters * ---------- */ +bool pgstat_track_activities = false; bool pgstat_track_counts = false; int pgstat_track_functions = TRACK_FUNC_OFF; +int pgstat_track_activity_query_size = 1024; /* ---------- * Built from GUC parameter @@ -242,8 +259,8 @@ static int pgStatXactCommit = 0; static int pgStatXactRollback = 0; PgStat_Counter pgStatBlockReadTime = 0; PgStat_Counter pgStatBlockWriteTime = 0; -PgStat_Counter pgStatActiveTime = 0; -PgStat_Counter pgStatTransactionIdleTime = 0; +static PgStat_Counter pgStatActiveTime = 0; +static PgStat_Counter pgStatTransactionIdleTime = 0; SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL; /* Record that's written to 2PC state file when pgstat state is persisted */ @@ -266,6 +283,12 @@ typedef struct TwoPhasePgStatRecord static MemoryContext pgStatLocalContext = NULL; static HTAB *pgStatDBHash = NULL; +/* Status for backends including auxiliary */ +static LocalPgBackendStatus *localBackendStatusTable = NULL; + +/* Total number of backends including auxiliary */ +static int localNumBackends = 0; + /* * Cluster wide statistics, kept in the stats collector. * Contains statistics that are not collected per database @@ -302,6 +325,7 @@ static pid_t pgstat_forkexec(void); #endif NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn(); +static void pgstat_beshutdown_hook(int code, Datum arg); static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create); static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, @@ -311,6 +335,7 @@ static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanen static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep); static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent); static void backend_read_statsfile(void); +static void pgstat_read_current_status(void); static bool pgstat_write_statsfile_needed(void); static bool pgstat_db_requested(Oid databaseid); @@ -2732,6 +2757,65 @@ pgstat_fetch_stat_funcentry(Oid func_id) } +/* ---------- + * pgstat_fetch_stat_beentry() - + * + * Support function for the SQL-callable pgstat* functions. Returns + * our local copy of the current-activity entry for one backend. + * + * NB: caller is responsible for a check if the user is permitted to see + * this info (especially the querystring). + * ---------- + */ +PgBackendStatus * +pgstat_fetch_stat_beentry(int beid) +{ + pgstat_read_current_status(); + + if (beid < 1 || beid > localNumBackends) + return NULL; + + return &localBackendStatusTable[beid - 1].backendStatus; +} + + +/* ---------- + * pgstat_fetch_stat_local_beentry() - + * + * Like pgstat_fetch_stat_beentry() but with locally computed additions (like + * xid and xmin values of the backend) + * + * NB: caller is responsible for a check if the user is permitted to see + * this info (especially the querystring). + * ---------- + */ +LocalPgBackendStatus * +pgstat_fetch_stat_local_beentry(int beid) +{ + pgstat_read_current_status(); + + if (beid < 1 || beid > localNumBackends) + return NULL; + + return &localBackendStatusTable[beid - 1]; +} + + +/* ---------- + * pgstat_fetch_stat_numbackends() - + * + * Support function for the SQL-callable pgstat* functions. Returns + * the maximum current backend id. + * ---------- + */ +int +pgstat_fetch_stat_numbackends(void) +{ + pgstat_read_current_status(); + + return localNumBackends; +} + /* * --------- * pgstat_fetch_stat_archiver() - @@ -2815,16 +2899,421 @@ pgstat_fetch_replslot(int *nslots_p) return replSlotStats; } +/* ------------------------------------------------------------ + * Functions for management of the shared-memory PgBackendStatus array + * ------------------------------------------------------------ + */ + +static PgBackendStatus *BackendStatusArray = NULL; +static PgBackendStatus *MyBEEntry = NULL; +static char *BackendAppnameBuffer = NULL; +static char *BackendClientHostnameBuffer = NULL; +static char *BackendActivityBuffer = NULL; +static Size BackendActivityBufferSize = 0; +#ifdef USE_SSL +static PgBackendSSLStatus *BackendSslStatusBuffer = NULL; +#endif +#ifdef ENABLE_GSS +static PgBackendGSSStatus *BackendGssStatusBuffer = NULL; +#endif + + +/* + * Report shared-memory space needed by CreateSharedBackendStatus. + */ +Size +BackendStatusShmemSize(void) +{ + Size size; + + /* BackendStatusArray: */ + size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots); + /* BackendAppnameBuffer: */ + size = add_size(size, + mul_size(NAMEDATALEN, NumBackendStatSlots)); + /* BackendClientHostnameBuffer: */ + size = add_size(size, + mul_size(NAMEDATALEN, NumBackendStatSlots)); + /* BackendActivityBuffer: */ + size = add_size(size, + mul_size(pgstat_track_activity_query_size, NumBackendStatSlots)); +#ifdef USE_SSL + /* BackendSslStatusBuffer: */ + size = add_size(size, + mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots)); +#endif +#ifdef ENABLE_GSS + /* BackendGssStatusBuffer: */ + size = add_size(size, + mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots)); +#endif + return size; +} + +/* + * Initialize the shared status array and several string buffers + * during postmaster startup. + */ +void +CreateSharedBackendStatus(void) +{ + Size size; + bool found; + int i; + char *buffer; + + /* Create or attach to the shared array */ + size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots); + BackendStatusArray = (PgBackendStatus *) + ShmemInitStruct("Backend Status Array", size, &found); + + if (!found) + { + /* + * We're the first - initialize. + */ + MemSet(BackendStatusArray, 0, size); + } + + /* Create or attach to the shared appname buffer */ + size = mul_size(NAMEDATALEN, NumBackendStatSlots); + BackendAppnameBuffer = (char *) + ShmemInitStruct("Backend Application Name Buffer", size, &found); + + if (!found) + { + MemSet(BackendAppnameBuffer, 0, size); + + /* Initialize st_appname pointers. */ + buffer = BackendAppnameBuffer; + for (i = 0; i < NumBackendStatSlots; i++) + { + BackendStatusArray[i].st_appname = buffer; + buffer += NAMEDATALEN; + } + } + + /* Create or attach to the shared client hostname buffer */ + size = mul_size(NAMEDATALEN, NumBackendStatSlots); + BackendClientHostnameBuffer = (char *) + ShmemInitStruct("Backend Client Host Name Buffer", size, &found); + + if (!found) + { + MemSet(BackendClientHostnameBuffer, 0, size); + + /* Initialize st_clienthostname pointers. */ + buffer = BackendClientHostnameBuffer; + for (i = 0; i < NumBackendStatSlots; i++) + { + BackendStatusArray[i].st_clienthostname = buffer; + buffer += NAMEDATALEN; + } + } + + /* Create or attach to the shared activity buffer */ + BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size, + NumBackendStatSlots); + BackendActivityBuffer = (char *) + ShmemInitStruct("Backend Activity Buffer", + BackendActivityBufferSize, + &found); + + if (!found) + { + MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize); + + /* Initialize st_activity pointers. */ + buffer = BackendActivityBuffer; + for (i = 0; i < NumBackendStatSlots; i++) + { + BackendStatusArray[i].st_activity_raw = buffer; + buffer += pgstat_track_activity_query_size; + } + } + +#ifdef USE_SSL + /* Create or attach to the shared SSL status buffer */ + size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots); + BackendSslStatusBuffer = (PgBackendSSLStatus *) + ShmemInitStruct("Backend SSL Status Buffer", size, &found); + + if (!found) + { + PgBackendSSLStatus *ptr; + + MemSet(BackendSslStatusBuffer, 0, size); + + /* Initialize st_sslstatus pointers. */ + ptr = BackendSslStatusBuffer; + for (i = 0; i < NumBackendStatSlots; i++) + { + BackendStatusArray[i].st_sslstatus = ptr; + ptr++; + } + } +#endif + +#ifdef ENABLE_GSS + /* Create or attach to the shared GSSAPI status buffer */ + size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots); + BackendGssStatusBuffer = (PgBackendGSSStatus *) + ShmemInitStruct("Backend GSS Status Buffer", size, &found); + + if (!found) + { + PgBackendGSSStatus *ptr; + + MemSet(BackendGssStatusBuffer, 0, size); + + /* Initialize st_gssstatus pointers. */ + ptr = BackendGssStatusBuffer; + for (i = 0; i < NumBackendStatSlots; i++) + { + BackendStatusArray[i].st_gssstatus = ptr; + ptr++; + } + } +#endif +} + + +/* ---------- + * pgstat_initialize() - + * + * Initialize pgstats state, and set up our on-proc-exit hook. + * Called from InitPostgres and AuxiliaryProcessMain. For auxiliary process, + * MyBackendId is invalid. Otherwise, MyBackendId must be set, + * but we must not have started any transaction yet (since the + * exit hook must run after the last transaction exit). + * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful. + * ---------- + */ +void +pgstat_initialize(void) +{ + /* Initialize MyBEEntry */ + if (MyBackendId != InvalidBackendId) + { + Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends); + MyBEEntry = &BackendStatusArray[MyBackendId - 1]; + } + else + { + /* Must be an auxiliary process */ + Assert(MyAuxProcType != NotAnAuxProcess); + + /* + * Assign the MyBEEntry for an auxiliary process. Since it doesn't + * have a BackendId, the slot is statically allocated based on the + * auxiliary process type (MyAuxProcType). Backends use slots indexed + * in the range from 1 to MaxBackends (inclusive), so we use + * MaxBackends + AuxBackendType + 1 as the index of the slot for an + * auxiliary process. + */ + MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType]; + } + + /* + * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can + * calculate how much pgWalUsage counters are increased by substracting + * prevWalUsage from pgWalUsage. + */ + prevWalUsage = pgWalUsage; + + /* Set up a process-exit hook to clean up */ + on_shmem_exit(pgstat_beshutdown_hook, 0); +} + +/* ---------- + * pgstat_bestart() - + * + * Initialize this backend's entry in the PgBackendStatus array. + * Called from InitPostgres. + * + * Apart from auxiliary processes, MyBackendId, MyDatabaseId, + * session userid, and application_name must be set for a + * backend (hence, this cannot be combined with pgstat_initialize). + * Note also that we must be inside a transaction if this isn't an aux + * process, as we may need to do encoding conversion on some strings. + * ---------- + */ +void +pgstat_bestart(void) +{ + volatile PgBackendStatus *vbeentry = MyBEEntry; + PgBackendStatus lbeentry; +#ifdef USE_SSL + PgBackendSSLStatus lsslstatus; +#endif +#ifdef ENABLE_GSS + PgBackendGSSStatus lgssstatus; +#endif + + /* pgstats state must be initialized from pgstat_initialize() */ + Assert(vbeentry != NULL); + + /* + * To minimize the time spent modifying the PgBackendStatus entry, and + * avoid risk of errors inside the critical section, we first copy the + * shared-memory struct to a local variable, then modify the data in the + * local variable, then copy the local variable back to shared memory. + * Only the last step has to be inside the critical section. + * + * Most of the data we copy from shared memory is just going to be + * overwritten, but the struct's not so large that it's worth the + * maintenance hassle to copy only the needful fields. + */ + memcpy(&lbeentry, + unvolatize(PgBackendStatus *, vbeentry), + sizeof(PgBackendStatus)); + + /* These structs can just start from zeroes each time, though */ +#ifdef USE_SSL + memset(&lsslstatus, 0, sizeof(lsslstatus)); +#endif +#ifdef ENABLE_GSS + memset(&lgssstatus, 0, sizeof(lgssstatus)); +#endif + + /* + * Now fill in all the fields of lbeentry, except for strings that are + * out-of-line data. Those have to be handled separately, below. + */ + lbeentry.st_procpid = MyProcPid; + lbeentry.st_backendType = MyBackendType; + lbeentry.st_proc_start_timestamp = MyStartTimestamp; + lbeentry.st_activity_start_timestamp = 0; + lbeentry.st_state_start_timestamp = 0; + lbeentry.st_xact_start_timestamp = 0; + lbeentry.st_databaseid = MyDatabaseId; + + /* We have userid for client-backends, wal-sender and bgworker processes */ + if (lbeentry.st_backendType == B_BACKEND + || lbeentry.st_backendType == B_WAL_SENDER + || lbeentry.st_backendType == B_BG_WORKER) + lbeentry.st_userid = GetSessionUserId(); + else + lbeentry.st_userid = InvalidOid; + + /* + * We may not have a MyProcPort (eg, if this is the autovacuum process). + * If so, use all-zeroes client address, which is dealt with specially in + * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port. + */ + if (MyProcPort) + memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr, + sizeof(lbeentry.st_clientaddr)); + else + MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr)); + +#ifdef USE_SSL + if (MyProcPort && MyProcPort->ssl_in_use) + { + lbeentry.st_ssl = true; + lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort); + strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN); + strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN); + be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN); + be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN); + be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN); + } + else + { + lbeentry.st_ssl = false; + } +#else + lbeentry.st_ssl = false; +#endif + +#ifdef ENABLE_GSS + if (MyProcPort && MyProcPort->gss != NULL) + { + const char *princ = be_gssapi_get_princ(MyProcPort); + + lbeentry.st_gss = true; + lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort); + lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort); + if (princ) + strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN); + } + else + { + lbeentry.st_gss = false; + } +#else + lbeentry.st_gss = false; +#endif + + lbeentry.st_state = STATE_UNDEFINED; + lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID; + lbeentry.st_progress_command_target = InvalidOid; + + /* + * we don't zero st_progress_param here to save cycles; nobody should + * examine it until st_progress_command has been set to something other + * than PROGRESS_COMMAND_INVALID + */ + + /* + * We're ready to enter the critical section that fills the shared-memory + * status entry. We follow the protocol of bumping st_changecount before + * and after; and make sure it's even afterwards. We use a volatile + * pointer here to ensure the compiler doesn't try to get cute. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry); + + /* make sure we'll memcpy the same st_changecount back */ + lbeentry.st_changecount = vbeentry->st_changecount; + + memcpy(unvolatize(PgBackendStatus *, vbeentry), + &lbeentry, + sizeof(PgBackendStatus)); + + /* + * We can write the out-of-line strings and structs using the pointers + * that are in lbeentry; this saves some de-volatilizing messiness. + */ + lbeentry.st_appname[0] = '\0'; + if (MyProcPort && MyProcPort->remote_hostname) + strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname, + NAMEDATALEN); + else + lbeentry.st_clienthostname[0] = '\0'; + lbeentry.st_activity_raw[0] = '\0'; + /* Also make sure the last byte in each string area is always 0 */ + lbeentry.st_appname[NAMEDATALEN - 1] = '\0'; + lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0'; + lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0'; + +#ifdef USE_SSL + memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus)); +#endif +#ifdef ENABLE_GSS + memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus)); +#endif + + PGSTAT_END_WRITE_ACTIVITY(vbeentry); + + /* Update app name to current GUC setting */ + if (application_name) + pgstat_report_appname(application_name); +} + /* * Shut down a single backend's statistics reporting at process exit. * * Flush any remaining statistics counts out to the collector. * Without this, operations triggered during backend exit (such as * temp table deletions) won't be counted. + * + * Lastly, clear out our entry in the PgBackendStatus array. */ static void -pgstat_shutdown_hook(int code, Datum arg) +pgstat_beshutdown_hook(int code, Datum arg) { + volatile PgBackendStatus *beentry = MyBEEntry; + /* * If we got as far as discovering our own database ID, we can report what * we did to the collector. Otherwise, we'd be sending an invalid @@ -2833,29 +3322,584 @@ pgstat_shutdown_hook(int code, Datum arg) */ if (OidIsValid(MyDatabaseId)) pgstat_report_stat(true); + + /* + * Clear my status entry, following the protocol of bumping st_changecount + * before and after. We use a volatile pointer here to ensure the + * compiler doesn't try to get cute. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + + beentry->st_procpid = 0; /* mark invalid */ + + PGSTAT_END_WRITE_ACTIVITY(beentry); } + /* ---------- - * pgstat_initialize() - + * pgstat_report_activity() - * - * Initialize pgstats state, and set up our on-proc-exit hook. - * Called from InitPostgres and AuxiliaryProcessMain. + * Called from tcop/postgres.c to report what the backend is actually doing + * (but note cmd_str can be NULL for certain cases). * - * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful. + * All updates of the status entry follow the protocol of bumping + * st_changecount before and after. We use a volatile pointer here to + * ensure the compiler doesn't try to get cute. * ---------- */ void -pgstat_initialize(void) +pgstat_report_activity(BackendState state, const char *cmd_str) { + volatile PgBackendStatus *beentry = MyBEEntry; + TimestampTz start_timestamp; + TimestampTz current_timestamp; + int len = 0; + + TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str); + + if (!beentry) + return; + + if (!pgstat_track_activities) + { + if (beentry->st_state != STATE_DISABLED) + { + volatile PGPROC *proc = MyProc; + + /* + * track_activities is disabled, but we last reported a + * non-disabled state. As our final update, change the state and + * clear fields we will not be updating anymore. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + beentry->st_state = STATE_DISABLED; + beentry->st_state_start_timestamp = 0; + beentry->st_activity_raw[0] = '\0'; + beentry->st_activity_start_timestamp = 0; + /* st_xact_start_timestamp and wait_event_info are also disabled */ + beentry->st_xact_start_timestamp = 0; + proc->wait_event_info = 0; + PGSTAT_END_WRITE_ACTIVITY(beentry); + } + return; + } + /* - * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can - * calculate how much pgWalUsage counters are increased by substracting - * prevWalUsage from pgWalUsage. + * To minimize the time spent modifying the entry, and avoid risk of + * errors inside the critical section, fetch all the needed data first. */ - prevWalUsage = pgWalUsage; + start_timestamp = GetCurrentStatementStartTimestamp(); + if (cmd_str != NULL) + { + /* + * Compute length of to-be-stored string unaware of multi-byte + * characters. For speed reasons that'll get corrected on read, rather + * than computed every write. + */ + len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1); + } + current_timestamp = GetCurrentTimestamp(); - /* Set up a process-exit hook to clean up */ - on_shmem_exit(pgstat_shutdown_hook, 0); + /* + * If the state has changed from "active" or "idle in transaction", + * calculate the duration. + */ + if ((beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH || + beentry->st_state == STATE_IDLEINTRANSACTION || + beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) && + state != beentry->st_state) + { + long secs; + int usecs; + + TimestampDifference(beentry->st_state_start_timestamp, + current_timestamp, + &secs, &usecs); + + if (beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH) + pgStatActiveTime += secs * 1000000 + usecs; + else + pgStatTransactionIdleTime += secs * 1000000 + usecs; + } + + /* + * Now update the status entry + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + + beentry->st_state = state; + beentry->st_state_start_timestamp = current_timestamp; + + if (cmd_str != NULL) + { + memcpy((char *) beentry->st_activity_raw, cmd_str, len); + beentry->st_activity_raw[len] = '\0'; + beentry->st_activity_start_timestamp = start_timestamp; + } + + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/*----------- + * pgstat_progress_start_command() - + * + * Set st_progress_command (and st_progress_command_target) in own backend + * entry. Also, zero-initialize st_progress_param array. + *----------- + */ +void +pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + + if (!beentry || !pgstat_track_activities) + return; + + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + beentry->st_progress_command = cmdtype; + beentry->st_progress_command_target = relid; + MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param)); + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/*----------- + * pgstat_progress_update_param() - + * + * Update index'th member in st_progress_param[] of own backend entry. + *----------- + */ +void +pgstat_progress_update_param(int index, int64 val) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + + Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM); + + if (!beentry || !pgstat_track_activities) + return; + + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + beentry->st_progress_param[index] = val; + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/*----------- + * pgstat_progress_update_multi_param() - + * + * Update multiple members in st_progress_param[] of own backend entry. + * This is atomic; readers won't see intermediate states. + *----------- + */ +void +pgstat_progress_update_multi_param(int nparam, const int *index, + const int64 *val) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + int i; + + if (!beentry || !pgstat_track_activities || nparam == 0) + return; + + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + + for (i = 0; i < nparam; ++i) + { + Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM); + + beentry->st_progress_param[index[i]] = val[i]; + } + + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/*----------- + * pgstat_progress_end_command() - + * + * Reset st_progress_command (and st_progress_command_target) in own backend + * entry. This signals the end of the command. + *----------- + */ +void +pgstat_progress_end_command(void) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + + if (!beentry || !pgstat_track_activities) + return; + + if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID) + return; + + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + beentry->st_progress_command = PROGRESS_COMMAND_INVALID; + beentry->st_progress_command_target = InvalidOid; + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/* ---------- + * pgstat_report_appname() - + * + * Called to update our application name. + * ---------- + */ +void +pgstat_report_appname(const char *appname) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + int len; + + if (!beentry) + return; + + /* This should be unnecessary if GUC did its job, but be safe */ + len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1); + + /* + * Update my status entry, following the protocol of bumping + * st_changecount before and after. We use a volatile pointer here to + * ensure the compiler doesn't try to get cute. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + + memcpy((char *) beentry->st_appname, appname, len); + beentry->st_appname[len] = '\0'; + + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/* + * Report current transaction start timestamp as the specified value. + * Zero means there is no active transaction. + */ +void +pgstat_report_xact_timestamp(TimestampTz tstamp) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + + if (!pgstat_track_activities || !beentry) + return; + + /* + * Update my status entry, following the protocol of bumping + * st_changecount before and after. We use a volatile pointer here to + * ensure the compiler doesn't try to get cute. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + + beentry->st_xact_start_timestamp = tstamp; + + PGSTAT_END_WRITE_ACTIVITY(beentry); +} + +/* ---------- + * pgstat_read_current_status() - + * + * Copy the current contents of the PgBackendStatus array to local memory, + * if not already done in this transaction. + * ---------- + */ +static void +pgstat_read_current_status(void) +{ + volatile PgBackendStatus *beentry; + LocalPgBackendStatus *localtable; + LocalPgBackendStatus *localentry; + char *localappname, + *localclienthostname, + *localactivity; +#ifdef USE_SSL + PgBackendSSLStatus *localsslstatus; +#endif +#ifdef ENABLE_GSS + PgBackendGSSStatus *localgssstatus; +#endif + int i; + + Assert(!pgStatRunningInCollector); + if (localBackendStatusTable) + return; /* already done */ + + pgstat_setup_memcxt(); + + /* + * Allocate storage for local copy of state data. We can presume that + * none of these requests overflow size_t, because we already calculated + * the same values using mul_size during shmem setup. However, with + * probably-silly values of pgstat_track_activity_query_size and + * max_connections, the localactivity buffer could exceed 1GB, so use + * "huge" allocation for that one. + */ + localtable = (LocalPgBackendStatus *) + MemoryContextAlloc(pgStatLocalContext, + sizeof(LocalPgBackendStatus) * NumBackendStatSlots); + localappname = (char *) + MemoryContextAlloc(pgStatLocalContext, + NAMEDATALEN * NumBackendStatSlots); + localclienthostname = (char *) + MemoryContextAlloc(pgStatLocalContext, + NAMEDATALEN * NumBackendStatSlots); + localactivity = (char *) + MemoryContextAllocHuge(pgStatLocalContext, + pgstat_track_activity_query_size * NumBackendStatSlots); +#ifdef USE_SSL + localsslstatus = (PgBackendSSLStatus *) + MemoryContextAlloc(pgStatLocalContext, + sizeof(PgBackendSSLStatus) * NumBackendStatSlots); +#endif +#ifdef ENABLE_GSS + localgssstatus = (PgBackendGSSStatus *) + MemoryContextAlloc(pgStatLocalContext, + sizeof(PgBackendGSSStatus) * NumBackendStatSlots); +#endif + + localNumBackends = 0; + + beentry = BackendStatusArray; + localentry = localtable; + for (i = 1; i <= NumBackendStatSlots; i++) + { + /* + * Follow the protocol of retrying if st_changecount changes while we + * copy the entry, or if it's odd. (The check for odd is needed to + * cover the case where we are able to completely copy the entry while + * the source backend is between increment steps.) We use a volatile + * pointer here to ensure the compiler doesn't try to get cute. + */ + for (;;) + { + int before_changecount; + int after_changecount; + + pgstat_begin_read_activity(beentry, before_changecount); + + localentry->backendStatus.st_procpid = beentry->st_procpid; + /* Skip all the data-copying work if entry is not in use */ + if (localentry->backendStatus.st_procpid > 0) + { + memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus)); + + /* + * For each PgBackendStatus field that is a pointer, copy the + * pointed-to data, then adjust the local copy of the pointer + * field to point at the local copy of the data. + * + * strcpy is safe even if the string is modified concurrently, + * because there's always a \0 at the end of the buffer. + */ + strcpy(localappname, (char *) beentry->st_appname); + localentry->backendStatus.st_appname = localappname; + strcpy(localclienthostname, (char *) beentry->st_clienthostname); + localentry->backendStatus.st_clienthostname = localclienthostname; + strcpy(localactivity, (char *) beentry->st_activity_raw); + localentry->backendStatus.st_activity_raw = localactivity; +#ifdef USE_SSL + if (beentry->st_ssl) + { + memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus)); + localentry->backendStatus.st_sslstatus = localsslstatus; + } +#endif +#ifdef ENABLE_GSS + if (beentry->st_gss) + { + memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus)); + localentry->backendStatus.st_gssstatus = localgssstatus; + } +#endif + } + + pgstat_end_read_activity(beentry, after_changecount); + + if (pgstat_read_activity_complete(before_changecount, + after_changecount)) + break; + + /* Make sure we can break out of loop if stuck... */ + CHECK_FOR_INTERRUPTS(); + } + + beentry++; + /* Only valid entries get included into the local array */ + if (localentry->backendStatus.st_procpid > 0) + { + BackendIdGetTransactionIds(i, + &localentry->backend_xid, + &localentry->backend_xmin); + + localentry++; + localappname += NAMEDATALEN; + localclienthostname += NAMEDATALEN; + localactivity += pgstat_track_activity_query_size; +#ifdef USE_SSL + localsslstatus++; +#endif +#ifdef ENABLE_GSS + localgssstatus++; +#endif + localNumBackends++; + } + } + + /* Set the pointer only after completion of a valid table */ + localBackendStatusTable = localtable; +} + +/* ---------- + * pgstat_get_backend_current_activity() - + * + * Return a string representing the current activity of the backend with + * the specified PID. This looks directly at the BackendStatusArray, + * and so will provide current information regardless of the age of our + * transaction's snapshot of the status array. + * + * It is the caller's responsibility to invoke this only for backends whose + * state is expected to remain stable while the result is in use. The + * only current use is in deadlock reporting, where we can expect that + * the target backend is blocked on a lock. (There are corner cases + * where the target's wait could get aborted while we are looking at it, + * but the very worst consequence is to return a pointer to a string + * that's been changed, so we won't worry too much.) + * + * Note: return strings for special cases match pg_stat_get_backend_activity. + * ---------- + */ +const char * +pgstat_get_backend_current_activity(int pid, bool checkUser) +{ + PgBackendStatus *beentry; + int i; + + beentry = BackendStatusArray; + for (i = 1; i <= MaxBackends; i++) + { + /* + * Although we expect the target backend's entry to be stable, that + * doesn't imply that anyone else's is. To avoid identifying the + * wrong backend, while we check for a match to the desired PID we + * must follow the protocol of retrying if st_changecount changes + * while we examine the entry, or if it's odd. (This might be + * unnecessary, since fetching or storing an int is almost certainly + * atomic, but let's play it safe.) We use a volatile pointer here to + * ensure the compiler doesn't try to get cute. + */ + volatile PgBackendStatus *vbeentry = beentry; + bool found; + + for (;;) + { + int before_changecount; + int after_changecount; + + pgstat_begin_read_activity(vbeentry, before_changecount); + + found = (vbeentry->st_procpid == pid); + + pgstat_end_read_activity(vbeentry, after_changecount); + + if (pgstat_read_activity_complete(before_changecount, + after_changecount)) + break; + + /* Make sure we can break out of loop if stuck... */ + CHECK_FOR_INTERRUPTS(); + } + + if (found) + { + /* Now it is safe to use the non-volatile pointer */ + if (checkUser && !superuser() && beentry->st_userid != GetUserId()) + return ""; + else if (*(beentry->st_activity_raw) == '\0') + return ""; + else + { + /* this'll leak a bit of memory, but that seems acceptable */ + return pgstat_clip_activity(beentry->st_activity_raw); + } + } + + beentry++; + } + + /* If we get here, caller is in error ... */ + return ""; +} + +/* ---------- + * pgstat_get_crashed_backend_activity() - + * + * Return a string representing the current activity of the backend with + * the specified PID. Like the function above, but reads shared memory with + * the expectation that it may be corrupt. On success, copy the string + * into the "buffer" argument and return that pointer. On failure, + * return NULL. + * + * This function is only intended to be used by the postmaster to report the + * query that crashed a backend. In particular, no attempt is made to + * follow the correct concurrency protocol when accessing the + * BackendStatusArray. But that's OK, in the worst case we'll return a + * corrupted message. We also must take care not to trip on ereport(ERROR). + * ---------- + */ +const char * +pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen) +{ + volatile PgBackendStatus *beentry; + int i; + + beentry = BackendStatusArray; + + /* + * We probably shouldn't get here before shared memory has been set up, + * but be safe. + */ + if (beentry == NULL || BackendActivityBuffer == NULL) + return NULL; + + for (i = 1; i <= MaxBackends; i++) + { + if (beentry->st_procpid == pid) + { + /* Read pointer just once, so it can't change after validation */ + const char *activity = beentry->st_activity_raw; + const char *activity_last; + + /* + * We mustn't access activity string before we verify that it + * falls within the BackendActivityBuffer. To make sure that the + * entire string including its ending is contained within the + * buffer, subtract one activity length from the buffer size. + */ + activity_last = BackendActivityBuffer + BackendActivityBufferSize + - pgstat_track_activity_query_size; + + if (activity < BackendActivityBuffer || + activity > activity_last) + return NULL; + + /* If no string available, no point in a report */ + if (activity[0] == '\0') + return NULL; + + /* + * Copy only ASCII-safe characters so we don't run into encoding + * problems when reporting the message; and be sure not to run off + * the end of memory. As only ASCII characters are reported, it + * doesn't seem necessary to perform multibyte aware clipping. + */ + ascii_safe_strlcpy(buffer, activity, + Min(buflen, pgstat_track_activity_query_size)); + + return buffer; + } + + beentry++; + } + + /* PID not found */ + return NULL; } /* ------------------------------------------------------------ @@ -4612,15 +5656,10 @@ pgstat_clear_snapshot(void) /* Reset variables */ pgStatLocalContext = NULL; pgStatDBHash = NULL; + localBackendStatusTable = NULL; + localNumBackends = 0; replSlotStats = NULL; nReplSlotStats = 0; - - /* - * Historically the backend_status.c facilities lived in this file, and - * were reset with the same function. For now keep it that way, and - * forward the reset request. - */ - pgstat_clear_backend_activity_snapshot(); } @@ -5555,6 +6594,50 @@ pgstat_db_requested(Oid databaseid) return false; } +/* + * Convert a potentially unsafely truncated activity string (see + * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated + * one. + * + * The returned string is allocated in the caller's memory context and may be + * freed. + */ +char * +pgstat_clip_activity(const char *raw_activity) +{ + char *activity; + int rawlen; + int cliplen; + + /* + * Some callers, like pgstat_get_backend_current_activity(), do not + * guarantee that the buffer isn't concurrently modified. We try to take + * care that the buffer is always terminated by a NUL byte regardless, but + * let's still be paranoid about the string's length. In those cases the + * underlying buffer is guaranteed to be pgstat_track_activity_query_size + * large. + */ + activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1); + + /* now double-guaranteed to be NUL terminated */ + rawlen = strlen(activity); + + /* + * All supported server-encodings make it possible to determine the length + * of a multi-byte character from its first byte (this is not the case for + * client encodings, see GB18030). As st_activity is always stored using + * server encoding, this allows us to perform multi-byte aware truncation, + * even if the string earlier was truncated in the middle of a multi-byte + * character. + */ + cliplen = pg_mbcliplen(activity, rawlen, + pgstat_track_activity_query_size - 1); + + activity[cliplen] = '\0'; + + return activity; +} + /* ---------- * pgstat_replslot_index * diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 354fbe4b4b..7dffd187fa 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s) List *rels = NIL; List *part_rels = NIL; List *relids = NIL; + List *relids_extra = NIL; List *relids_logged = NIL; ListCell *lc; @@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s) remote_rels = lappend(remote_rels, rel); rels = lappend(rels, rel->localrel); relids = lappend_oid(relids, rel->localreloid); - if (RelationIsLogicallyLogged(rel->localrel)) + relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL); + if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) relids_logged = lappend_oid(relids_logged, rel->localreloid); /* @@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s) rels = lappend(rels, childrel); part_rels = lappend(part_rels, childrel); relids = lappend_oid(relids, childrelid); + relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING); /* Log this relation only if needed for logical decoding */ - if (RelationIsLogicallyLogged(childrel)) + if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) relids_logged = lappend_oid(relids_logged, childrelid); } } @@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s) * to replaying changes without further cascading. This might be later * changeable with a user specified option. */ - ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs); - + ExecuteTruncateGuts(rels, + relids, + relids_extra, + relids_logged, + DROP_RESTRICT, + restart_seqs); foreach(lc, remote_rels) { LogicalRepRelMapEntry *rel = lfirst(lc); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 692f21ef6a..897045ee27 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -448,9 +448,6 @@ InitProcess(void) OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); - /* now that we have a proc, report wait events to shared memory */ - pgstat_set_wait_event_storage(&MyProc->wait_event_info); - /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly @@ -604,9 +601,6 @@ InitAuxiliaryProcess(void) OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); - /* now that we have a proc, report wait events to shared memory */ - pgstat_set_wait_event_storage(&MyProc->wait_event_info); - /* Check that group locking fields are in a proper initial state. */ Assert(MyProc->lockGroupLeader == NULL); Assert(dlist_is_empty(&MyProc->lockGroupMembers)); @@ -890,15 +884,10 @@ ProcKill(int code, Datum arg) /* * Reset MyLatch to the process local one. This is so that signal * handlers et al can continue using the latch after the shared latch - * isn't ours anymore. - * - * Similarly, stop reporting wait events to MyProc->wait_event_info. - * - * After that clear MyProc and disown the shared latch. + * isn't ours anymore. After that clear MyProc and disown the shared + * latch. */ SwitchBackToLocalLatch(); - pgstat_reset_wait_event_storage(); - proc = MyProc; MyProc = NULL; DisownLatch(&proc->procLatch); @@ -963,10 +952,13 @@ AuxiliaryProcKill(int code, Datum arg) /* Cancel any pending condition variable sleep, too */ ConditionVariableCancelSleep(); - /* look at the equivalent ProcKill() code for comments */ + /* + * Reset MyLatch to the process local one. This is so that signal + * handlers et al can continue using the latch after the shared latch + * isn't ours anymore. After that clear MyProc and disown the shared + * latch. + */ SwitchBackToLocalLatch(); - pgstat_reset_wait_event_storage(); - proc = MyProc; MyProc = NULL; DisownLatch(&proc->procLatch); diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index 59196f278d..7cbe0cdbe8 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -14,8 +14,6 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = \ - backend_progress.o \ - backend_status.o \ wait_event.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c deleted file mode 100644 index 293254993c..0000000000 --- a/src/backend/utils/activity/backend_progress.c +++ /dev/null @@ -1,112 +0,0 @@ -/* ---------- - * progress.c - * - * Command progress reporting infrastructure. - * - * Copyright (c) 2001-2021, PostgreSQL Global Development Group - * - * src/backend/postmaster/progress.c - * ---------- - */ -#include "postgres.h" - -#include "port/atomics.h" /* for memory barriers */ -#include "utils/backend_progress.h" -#include "utils/backend_status.h" - - -/*----------- - * pgstat_progress_start_command() - - * - * Set st_progress_command (and st_progress_command_target) in own backend - * entry. Also, zero-initialize st_progress_param array. - *----------- - */ -void -pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - - if (!beentry || !pgstat_track_activities) - return; - - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - beentry->st_progress_command = cmdtype; - beentry->st_progress_command_target = relid; - MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param)); - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/*----------- - * pgstat_progress_update_param() - - * - * Update index'th member in st_progress_param[] of own backend entry. - *----------- - */ -void -pgstat_progress_update_param(int index, int64 val) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - - Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM); - - if (!beentry || !pgstat_track_activities) - return; - - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - beentry->st_progress_param[index] = val; - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/*----------- - * pgstat_progress_update_multi_param() - - * - * Update multiple members in st_progress_param[] of own backend entry. - * This is atomic; readers won't see intermediate states. - *----------- - */ -void -pgstat_progress_update_multi_param(int nparam, const int *index, - const int64 *val) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - int i; - - if (!beentry || !pgstat_track_activities || nparam == 0) - return; - - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - - for (i = 0; i < nparam; ++i) - { - Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM); - - beentry->st_progress_param[index[i]] = val[i]; - } - - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/*----------- - * pgstat_progress_end_command() - - * - * Reset st_progress_command (and st_progress_command_target) in own backend - * entry. This signals the end of the command. - *----------- - */ -void -pgstat_progress_end_command(void) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - - if (!beentry || !pgstat_track_activities) - return; - - if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID) - return; - - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - beentry->st_progress_command = PROGRESS_COMMAND_INVALID; - beentry->st_progress_command_target = InvalidOid; - PGSTAT_END_WRITE_ACTIVITY(beentry); -} diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c deleted file mode 100644 index a25ec0ee3c..0000000000 --- a/src/backend/utils/activity/backend_status.c +++ /dev/null @@ -1,1077 +0,0 @@ -/* ---------- - * backend_status.c - * Backend status reporting infrastructure. - * - * Copyright (c) 2001-2021, PostgreSQL Global Development Group - * - * - * IDENTIFICATION - * src/backend/postmaster/backend_status.c - * ---------- - */ -#include "postgres.h" - -#include "access/xact.h" -#include "libpq/libpq.h" -#include "miscadmin.h" -#include "pg_trace.h" -#include "pgstat.h" -#include "port/atomics.h" /* for memory barriers */ -#include "storage/ipc.h" -#include "storage/proc.h" /* for MyProc */ -#include "storage/sinvaladt.h" -#include "utils/ascii.h" -#include "utils/backend_status.h" -#include "utils/guc.h" /* for application_name */ -#include "utils/memutils.h" - - -/* ---------- - * Total number of backends including auxiliary - * - * We reserve a slot for each possible BackendId, plus one for each - * possible auxiliary process type. (This scheme assumes there is not - * more than one of any auxiliary process type at a time.) MaxBackends - * includes autovacuum workers and background workers as well. - * ---------- - */ -#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES) - - -/* ---------- - * GUC parameters - * ---------- - */ -bool pgstat_track_activities = false; -int pgstat_track_activity_query_size = 1024; - - -/* exposed so that progress.c can access it */ -PgBackendStatus *MyBEEntry = NULL; - - -static PgBackendStatus *BackendStatusArray = NULL; -static char *BackendAppnameBuffer = NULL; -static char *BackendClientHostnameBuffer = NULL; -static char *BackendActivityBuffer = NULL; -static Size BackendActivityBufferSize = 0; -#ifdef USE_SSL -static PgBackendSSLStatus *BackendSslStatusBuffer = NULL; -#endif -#ifdef ENABLE_GSS -static PgBackendGSSStatus *BackendGssStatusBuffer = NULL; -#endif - - -/* Status for backends including auxiliary */ -static LocalPgBackendStatus *localBackendStatusTable = NULL; - -/* Total number of backends including auxiliary */ -static int localNumBackends = 0; - -static MemoryContext backendStatusSnapContext; - - -static void pgstat_beshutdown_hook(int code, Datum arg); -static void pgstat_read_current_status(void); -static void pgstat_setup_backend_status_context(void); - - -/* - * Report shared-memory space needed by CreateSharedBackendStatus. - */ -Size -BackendStatusShmemSize(void) -{ - Size size; - - /* BackendStatusArray: */ - size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots); - /* BackendAppnameBuffer: */ - size = add_size(size, - mul_size(NAMEDATALEN, NumBackendStatSlots)); - /* BackendClientHostnameBuffer: */ - size = add_size(size, - mul_size(NAMEDATALEN, NumBackendStatSlots)); - /* BackendActivityBuffer: */ - size = add_size(size, - mul_size(pgstat_track_activity_query_size, NumBackendStatSlots)); -#ifdef USE_SSL - /* BackendSslStatusBuffer: */ - size = add_size(size, - mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots)); -#endif -#ifdef ENABLE_GSS - /* BackendGssStatusBuffer: */ - size = add_size(size, - mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots)); -#endif - return size; -} - -/* - * Initialize the shared status array and several string buffers - * during postmaster startup. - */ -void -CreateSharedBackendStatus(void) -{ - Size size; - bool found; - int i; - char *buffer; - - /* Create or attach to the shared array */ - size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots); - BackendStatusArray = (PgBackendStatus *) - ShmemInitStruct("Backend Status Array", size, &found); - - if (!found) - { - /* - * We're the first - initialize. - */ - MemSet(BackendStatusArray, 0, size); - } - - /* Create or attach to the shared appname buffer */ - size = mul_size(NAMEDATALEN, NumBackendStatSlots); - BackendAppnameBuffer = (char *) - ShmemInitStruct("Backend Application Name Buffer", size, &found); - - if (!found) - { - MemSet(BackendAppnameBuffer, 0, size); - - /* Initialize st_appname pointers. */ - buffer = BackendAppnameBuffer; - for (i = 0; i < NumBackendStatSlots; i++) - { - BackendStatusArray[i].st_appname = buffer; - buffer += NAMEDATALEN; - } - } - - /* Create or attach to the shared client hostname buffer */ - size = mul_size(NAMEDATALEN, NumBackendStatSlots); - BackendClientHostnameBuffer = (char *) - ShmemInitStruct("Backend Client Host Name Buffer", size, &found); - - if (!found) - { - MemSet(BackendClientHostnameBuffer, 0, size); - - /* Initialize st_clienthostname pointers. */ - buffer = BackendClientHostnameBuffer; - for (i = 0; i < NumBackendStatSlots; i++) - { - BackendStatusArray[i].st_clienthostname = buffer; - buffer += NAMEDATALEN; - } - } - - /* Create or attach to the shared activity buffer */ - BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size, - NumBackendStatSlots); - BackendActivityBuffer = (char *) - ShmemInitStruct("Backend Activity Buffer", - BackendActivityBufferSize, - &found); - - if (!found) - { - MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize); - - /* Initialize st_activity pointers. */ - buffer = BackendActivityBuffer; - for (i = 0; i < NumBackendStatSlots; i++) - { - BackendStatusArray[i].st_activity_raw = buffer; - buffer += pgstat_track_activity_query_size; - } - } - -#ifdef USE_SSL - /* Create or attach to the shared SSL status buffer */ - size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots); - BackendSslStatusBuffer = (PgBackendSSLStatus *) - ShmemInitStruct("Backend SSL Status Buffer", size, &found); - - if (!found) - { - PgBackendSSLStatus *ptr; - - MemSet(BackendSslStatusBuffer, 0, size); - - /* Initialize st_sslstatus pointers. */ - ptr = BackendSslStatusBuffer; - for (i = 0; i < NumBackendStatSlots; i++) - { - BackendStatusArray[i].st_sslstatus = ptr; - ptr++; - } - } -#endif - -#ifdef ENABLE_GSS - /* Create or attach to the shared GSSAPI status buffer */ - size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots); - BackendGssStatusBuffer = (PgBackendGSSStatus *) - ShmemInitStruct("Backend GSS Status Buffer", size, &found); - - if (!found) - { - PgBackendGSSStatus *ptr; - - MemSet(BackendGssStatusBuffer, 0, size); - - /* Initialize st_gssstatus pointers. */ - ptr = BackendGssStatusBuffer; - for (i = 0; i < NumBackendStatSlots; i++) - { - BackendStatusArray[i].st_gssstatus = ptr; - ptr++; - } - } -#endif -} - -/* - * Initialize pgstats backend activity state, and set up our on-proc-exit - * hook. Called from InitPostgres and AuxiliaryProcessMain. For auxiliary - * process, MyBackendId is invalid. Otherwise, MyBackendId must be set, but we - * must not have started any transaction yet (since the exit hook must run - * after the last transaction exit). - * - * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful. - */ -void -pgstat_beinit(void) -{ - /* Initialize MyBEEntry */ - if (MyBackendId != InvalidBackendId) - { - Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends); - MyBEEntry = &BackendStatusArray[MyBackendId - 1]; - } - else - { - /* Must be an auxiliary process */ - Assert(MyAuxProcType != NotAnAuxProcess); - - /* - * Assign the MyBEEntry for an auxiliary process. Since it doesn't - * have a BackendId, the slot is statically allocated based on the - * auxiliary process type (MyAuxProcType). Backends use slots indexed - * in the range from 1 to MaxBackends (inclusive), so we use - * MaxBackends + AuxBackendType + 1 as the index of the slot for an - * auxiliary process. - */ - MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType]; - } - - /* Set up a process-exit hook to clean up */ - on_shmem_exit(pgstat_beshutdown_hook, 0); -} - - -/* ---------- - * pgstat_bestart() - - * - * Initialize this backend's entry in the PgBackendStatus array. - * Called from InitPostgres. - * - * Apart from auxiliary processes, MyBackendId, MyDatabaseId, - * session userid, and application_name must be set for a - * backend (hence, this cannot be combined with pgbestat_beinit). - * Note also that we must be inside a transaction if this isn't an aux - * process, as we may need to do encoding conversion on some strings. - * ---------- - */ -void -pgstat_bestart(void) -{ - volatile PgBackendStatus *vbeentry = MyBEEntry; - PgBackendStatus lbeentry; -#ifdef USE_SSL - PgBackendSSLStatus lsslstatus; -#endif -#ifdef ENABLE_GSS - PgBackendGSSStatus lgssstatus; -#endif - - /* pgstats state must be initialized from pgstat_beinit() */ - Assert(vbeentry != NULL); - - /* - * To minimize the time spent modifying the PgBackendStatus entry, and - * avoid risk of errors inside the critical section, we first copy the - * shared-memory struct to a local variable, then modify the data in the - * local variable, then copy the local variable back to shared memory. - * Only the last step has to be inside the critical section. - * - * Most of the data we copy from shared memory is just going to be - * overwritten, but the struct's not so large that it's worth the - * maintenance hassle to copy only the needful fields. - */ - memcpy(&lbeentry, - unvolatize(PgBackendStatus *, vbeentry), - sizeof(PgBackendStatus)); - - /* These structs can just start from zeroes each time, though */ -#ifdef USE_SSL - memset(&lsslstatus, 0, sizeof(lsslstatus)); -#endif -#ifdef ENABLE_GSS - memset(&lgssstatus, 0, sizeof(lgssstatus)); -#endif - - /* - * Now fill in all the fields of lbeentry, except for strings that are - * out-of-line data. Those have to be handled separately, below. - */ - lbeentry.st_procpid = MyProcPid; - lbeentry.st_backendType = MyBackendType; - lbeentry.st_proc_start_timestamp = MyStartTimestamp; - lbeentry.st_activity_start_timestamp = 0; - lbeentry.st_state_start_timestamp = 0; - lbeentry.st_xact_start_timestamp = 0; - lbeentry.st_databaseid = MyDatabaseId; - - /* We have userid for client-backends, wal-sender and bgworker processes */ - if (lbeentry.st_backendType == B_BACKEND - || lbeentry.st_backendType == B_WAL_SENDER - || lbeentry.st_backendType == B_BG_WORKER) - lbeentry.st_userid = GetSessionUserId(); - else - lbeentry.st_userid = InvalidOid; - - /* - * We may not have a MyProcPort (eg, if this is the autovacuum process). - * If so, use all-zeroes client address, which is dealt with specially in - * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port. - */ - if (MyProcPort) - memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr, - sizeof(lbeentry.st_clientaddr)); - else - MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr)); - -#ifdef USE_SSL - if (MyProcPort && MyProcPort->ssl_in_use) - { - lbeentry.st_ssl = true; - lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort); - strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN); - strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN); - be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN); - be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN); - be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN); - } - else - { - lbeentry.st_ssl = false; - } -#else - lbeentry.st_ssl = false; -#endif - -#ifdef ENABLE_GSS - if (MyProcPort && MyProcPort->gss != NULL) - { - const char *princ = be_gssapi_get_princ(MyProcPort); - - lbeentry.st_gss = true; - lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort); - lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort); - if (princ) - strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN); - } - else - { - lbeentry.st_gss = false; - } -#else - lbeentry.st_gss = false; -#endif - - lbeentry.st_state = STATE_UNDEFINED; - lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID; - lbeentry.st_progress_command_target = InvalidOid; - - /* - * we don't zero st_progress_param here to save cycles; nobody should - * examine it until st_progress_command has been set to something other - * than PROGRESS_COMMAND_INVALID - */ - - /* - * We're ready to enter the critical section that fills the shared-memory - * status entry. We follow the protocol of bumping st_changecount before - * and after; and make sure it's even afterwards. We use a volatile - * pointer here to ensure the compiler doesn't try to get cute. - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry); - - /* make sure we'll memcpy the same st_changecount back */ - lbeentry.st_changecount = vbeentry->st_changecount; - - memcpy(unvolatize(PgBackendStatus *, vbeentry), - &lbeentry, - sizeof(PgBackendStatus)); - - /* - * We can write the out-of-line strings and structs using the pointers - * that are in lbeentry; this saves some de-volatilizing messiness. - */ - lbeentry.st_appname[0] = '\0'; - if (MyProcPort && MyProcPort->remote_hostname) - strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname, - NAMEDATALEN); - else - lbeentry.st_clienthostname[0] = '\0'; - lbeentry.st_activity_raw[0] = '\0'; - /* Also make sure the last byte in each string area is always 0 */ - lbeentry.st_appname[NAMEDATALEN - 1] = '\0'; - lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0'; - lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0'; - -#ifdef USE_SSL - memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus)); -#endif -#ifdef ENABLE_GSS - memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus)); -#endif - - PGSTAT_END_WRITE_ACTIVITY(vbeentry); - - /* Update app name to current GUC setting */ - if (application_name) - pgstat_report_appname(application_name); -} - -/* - * Clear out our entry in the PgBackendStatus array. - */ -static void -pgstat_beshutdown_hook(int code, Datum arg) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - - /* - * Clear my status entry, following the protocol of bumping st_changecount - * before and after. We use a volatile pointer here to ensure the - * compiler doesn't try to get cute. - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - - beentry->st_procpid = 0; /* mark invalid */ - - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/* - * Discard any data collected in the current transaction. Any subsequent - * request will cause new snapshots to be read. - * - * This is also invoked during transaction commit or abort to discard the - * no-longer-wanted snapshot. - */ -void -pgstat_clear_backend_activity_snapshot(void) -{ - /* Release memory, if any was allocated */ - if (backendStatusSnapContext) - { - MemoryContextDelete(backendStatusSnapContext); - backendStatusSnapContext = NULL; - } - - /* Reset variables */ - localBackendStatusTable = NULL; - localNumBackends = 0; -} - -static void -pgstat_setup_backend_status_context(void) -{ - if (!backendStatusSnapContext) - backendStatusSnapContext = AllocSetContextCreate(TopMemoryContext, - "Backend Status Snapshot", - ALLOCSET_SMALL_SIZES); -} - - -/* ---------- - * pgstat_report_activity() - - * - * Called from tcop/postgres.c to report what the backend is actually doing - * (but note cmd_str can be NULL for certain cases). - * - * All updates of the status entry follow the protocol of bumping - * st_changecount before and after. We use a volatile pointer here to - * ensure the compiler doesn't try to get cute. - * ---------- - */ -void -pgstat_report_activity(BackendState state, const char *cmd_str) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - TimestampTz start_timestamp; - TimestampTz current_timestamp; - int len = 0; - - TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str); - - if (!beentry) - return; - - if (!pgstat_track_activities) - { - if (beentry->st_state != STATE_DISABLED) - { - volatile PGPROC *proc = MyProc; - - /* - * track_activities is disabled, but we last reported a - * non-disabled state. As our final update, change the state and - * clear fields we will not be updating anymore. - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - beentry->st_state = STATE_DISABLED; - beentry->st_state_start_timestamp = 0; - beentry->st_activity_raw[0] = '\0'; - beentry->st_activity_start_timestamp = 0; - /* st_xact_start_timestamp and wait_event_info are also disabled */ - beentry->st_xact_start_timestamp = 0; - proc->wait_event_info = 0; - PGSTAT_END_WRITE_ACTIVITY(beentry); - } - return; - } - - /* - * To minimize the time spent modifying the entry, and avoid risk of - * errors inside the critical section, fetch all the needed data first. - */ - start_timestamp = GetCurrentStatementStartTimestamp(); - if (cmd_str != NULL) - { - /* - * Compute length of to-be-stored string unaware of multi-byte - * characters. For speed reasons that'll get corrected on read, rather - * than computed every write. - */ - len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1); - } - current_timestamp = GetCurrentTimestamp(); - - /* - * If the state has changed from "active" or "idle in transaction", - * calculate the duration. - */ - if ((beentry->st_state == STATE_RUNNING || - beentry->st_state == STATE_FASTPATH || - beentry->st_state == STATE_IDLEINTRANSACTION || - beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) && - state != beentry->st_state) - { - long secs; - int usecs; - - TimestampDifference(beentry->st_state_start_timestamp, - current_timestamp, - &secs, &usecs); - - if (beentry->st_state == STATE_RUNNING || - beentry->st_state == STATE_FASTPATH) - pgstat_count_conn_active_time(secs * 1000000 + usecs); - else - pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs); - } - - /* - * Now update the status entry - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - - beentry->st_state = state; - beentry->st_state_start_timestamp = current_timestamp; - - if (cmd_str != NULL) - { - memcpy((char *) beentry->st_activity_raw, cmd_str, len); - beentry->st_activity_raw[len] = '\0'; - beentry->st_activity_start_timestamp = start_timestamp; - } - - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/* ---------- - * pgstat_report_appname() - - * - * Called to update our application name. - * ---------- - */ -void -pgstat_report_appname(const char *appname) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - int len; - - if (!beentry) - return; - - /* This should be unnecessary if GUC did its job, but be safe */ - len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1); - - /* - * Update my status entry, following the protocol of bumping - * st_changecount before and after. We use a volatile pointer here to - * ensure the compiler doesn't try to get cute. - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - - memcpy((char *) beentry->st_appname, appname, len); - beentry->st_appname[len] = '\0'; - - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/* - * Report current transaction start timestamp as the specified value. - * Zero means there is no active transaction. - */ -void -pgstat_report_xact_timestamp(TimestampTz tstamp) -{ - volatile PgBackendStatus *beentry = MyBEEntry; - - if (!pgstat_track_activities || !beentry) - return; - - /* - * Update my status entry, following the protocol of bumping - * st_changecount before and after. We use a volatile pointer here to - * ensure the compiler doesn't try to get cute. - */ - PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); - - beentry->st_xact_start_timestamp = tstamp; - - PGSTAT_END_WRITE_ACTIVITY(beentry); -} - -/* ---------- - * pgstat_read_current_status() - - * - * Copy the current contents of the PgBackendStatus array to local memory, - * if not already done in this transaction. - * ---------- - */ -static void -pgstat_read_current_status(void) -{ - volatile PgBackendStatus *beentry; - LocalPgBackendStatus *localtable; - LocalPgBackendStatus *localentry; - char *localappname, - *localclienthostname, - *localactivity; -#ifdef USE_SSL - PgBackendSSLStatus *localsslstatus; -#endif -#ifdef ENABLE_GSS - PgBackendGSSStatus *localgssstatus; -#endif - int i; - - if (localBackendStatusTable) - return; /* already done */ - - pgstat_setup_backend_status_context(); - - /* - * Allocate storage for local copy of state data. We can presume that - * none of these requests overflow size_t, because we already calculated - * the same values using mul_size during shmem setup. However, with - * probably-silly values of pgstat_track_activity_query_size and - * max_connections, the localactivity buffer could exceed 1GB, so use - * "huge" allocation for that one. - */ - localtable = (LocalPgBackendStatus *) - MemoryContextAlloc(backendStatusSnapContext, - sizeof(LocalPgBackendStatus) * NumBackendStatSlots); - localappname = (char *) - MemoryContextAlloc(backendStatusSnapContext, - NAMEDATALEN * NumBackendStatSlots); - localclienthostname = (char *) - MemoryContextAlloc(backendStatusSnapContext, - NAMEDATALEN * NumBackendStatSlots); - localactivity = (char *) - MemoryContextAllocHuge(backendStatusSnapContext, - pgstat_track_activity_query_size * NumBackendStatSlots); -#ifdef USE_SSL - localsslstatus = (PgBackendSSLStatus *) - MemoryContextAlloc(backendStatusSnapContext, - sizeof(PgBackendSSLStatus) * NumBackendStatSlots); -#endif -#ifdef ENABLE_GSS - localgssstatus = (PgBackendGSSStatus *) - MemoryContextAlloc(backendStatusSnapContext, - sizeof(PgBackendGSSStatus) * NumBackendStatSlots); -#endif - - localNumBackends = 0; - - beentry = BackendStatusArray; - localentry = localtable; - for (i = 1; i <= NumBackendStatSlots; i++) - { - /* - * Follow the protocol of retrying if st_changecount changes while we - * copy the entry, or if it's odd. (The check for odd is needed to - * cover the case where we are able to completely copy the entry while - * the source backend is between increment steps.) We use a volatile - * pointer here to ensure the compiler doesn't try to get cute. - */ - for (;;) - { - int before_changecount; - int after_changecount; - - pgstat_begin_read_activity(beentry, before_changecount); - - localentry->backendStatus.st_procpid = beentry->st_procpid; - /* Skip all the data-copying work if entry is not in use */ - if (localentry->backendStatus.st_procpid > 0) - { - memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus)); - - /* - * For each PgBackendStatus field that is a pointer, copy the - * pointed-to data, then adjust the local copy of the pointer - * field to point at the local copy of the data. - * - * strcpy is safe even if the string is modified concurrently, - * because there's always a \0 at the end of the buffer. - */ - strcpy(localappname, (char *) beentry->st_appname); - localentry->backendStatus.st_appname = localappname; - strcpy(localclienthostname, (char *) beentry->st_clienthostname); - localentry->backendStatus.st_clienthostname = localclienthostname; - strcpy(localactivity, (char *) beentry->st_activity_raw); - localentry->backendStatus.st_activity_raw = localactivity; -#ifdef USE_SSL - if (beentry->st_ssl) - { - memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus)); - localentry->backendStatus.st_sslstatus = localsslstatus; - } -#endif -#ifdef ENABLE_GSS - if (beentry->st_gss) - { - memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus)); - localentry->backendStatus.st_gssstatus = localgssstatus; - } -#endif - } - - pgstat_end_read_activity(beentry, after_changecount); - - if (pgstat_read_activity_complete(before_changecount, - after_changecount)) - break; - - /* Make sure we can break out of loop if stuck... */ - CHECK_FOR_INTERRUPTS(); - } - - beentry++; - /* Only valid entries get included into the local array */ - if (localentry->backendStatus.st_procpid > 0) - { - BackendIdGetTransactionIds(i, - &localentry->backend_xid, - &localentry->backend_xmin); - - localentry++; - localappname += NAMEDATALEN; - localclienthostname += NAMEDATALEN; - localactivity += pgstat_track_activity_query_size; -#ifdef USE_SSL - localsslstatus++; -#endif -#ifdef ENABLE_GSS - localgssstatus++; -#endif - localNumBackends++; - } - } - - /* Set the pointer only after completion of a valid table */ - localBackendStatusTable = localtable; -} - - -/* ---------- - * pgstat_get_backend_current_activity() - - * - * Return a string representing the current activity of the backend with - * the specified PID. This looks directly at the BackendStatusArray, - * and so will provide current information regardless of the age of our - * transaction's snapshot of the status array. - * - * It is the caller's responsibility to invoke this only for backends whose - * state is expected to remain stable while the result is in use. The - * only current use is in deadlock reporting, where we can expect that - * the target backend is blocked on a lock. (There are corner cases - * where the target's wait could get aborted while we are looking at it, - * but the very worst consequence is to return a pointer to a string - * that's been changed, so we won't worry too much.) - * - * Note: return strings for special cases match pg_stat_get_backend_activity. - * ---------- - */ -const char * -pgstat_get_backend_current_activity(int pid, bool checkUser) -{ - PgBackendStatus *beentry; - int i; - - beentry = BackendStatusArray; - for (i = 1; i <= MaxBackends; i++) - { - /* - * Although we expect the target backend's entry to be stable, that - * doesn't imply that anyone else's is. To avoid identifying the - * wrong backend, while we check for a match to the desired PID we - * must follow the protocol of retrying if st_changecount changes - * while we examine the entry, or if it's odd. (This might be - * unnecessary, since fetching or storing an int is almost certainly - * atomic, but let's play it safe.) We use a volatile pointer here to - * ensure the compiler doesn't try to get cute. - */ - volatile PgBackendStatus *vbeentry = beentry; - bool found; - - for (;;) - { - int before_changecount; - int after_changecount; - - pgstat_begin_read_activity(vbeentry, before_changecount); - - found = (vbeentry->st_procpid == pid); - - pgstat_end_read_activity(vbeentry, after_changecount); - - if (pgstat_read_activity_complete(before_changecount, - after_changecount)) - break; - - /* Make sure we can break out of loop if stuck... */ - CHECK_FOR_INTERRUPTS(); - } - - if (found) - { - /* Now it is safe to use the non-volatile pointer */ - if (checkUser && !superuser() && beentry->st_userid != GetUserId()) - return ""; - else if (*(beentry->st_activity_raw) == '\0') - return ""; - else - { - /* this'll leak a bit of memory, but that seems acceptable */ - return pgstat_clip_activity(beentry->st_activity_raw); - } - } - - beentry++; - } - - /* If we get here, caller is in error ... */ - return ""; -} - -/* ---------- - * pgstat_get_crashed_backend_activity() - - * - * Return a string representing the current activity of the backend with - * the specified PID. Like the function above, but reads shared memory with - * the expectation that it may be corrupt. On success, copy the string - * into the "buffer" argument and return that pointer. On failure, - * return NULL. - * - * This function is only intended to be used by the postmaster to report the - * query that crashed a backend. In particular, no attempt is made to - * follow the correct concurrency protocol when accessing the - * BackendStatusArray. But that's OK, in the worst case we'll return a - * corrupted message. We also must take care not to trip on ereport(ERROR). - * ---------- - */ -const char * -pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen) -{ - volatile PgBackendStatus *beentry; - int i; - - beentry = BackendStatusArray; - - /* - * We probably shouldn't get here before shared memory has been set up, - * but be safe. - */ - if (beentry == NULL || BackendActivityBuffer == NULL) - return NULL; - - for (i = 1; i <= MaxBackends; i++) - { - if (beentry->st_procpid == pid) - { - /* Read pointer just once, so it can't change after validation */ - const char *activity = beentry->st_activity_raw; - const char *activity_last; - - /* - * We mustn't access activity string before we verify that it - * falls within the BackendActivityBuffer. To make sure that the - * entire string including its ending is contained within the - * buffer, subtract one activity length from the buffer size. - */ - activity_last = BackendActivityBuffer + BackendActivityBufferSize - - pgstat_track_activity_query_size; - - if (activity < BackendActivityBuffer || - activity > activity_last) - return NULL; - - /* If no string available, no point in a report */ - if (activity[0] == '\0') - return NULL; - - /* - * Copy only ASCII-safe characters so we don't run into encoding - * problems when reporting the message; and be sure not to run off - * the end of memory. As only ASCII characters are reported, it - * doesn't seem necessary to perform multibyte aware clipping. - */ - ascii_safe_strlcpy(buffer, activity, - Min(buflen, pgstat_track_activity_query_size)); - - return buffer; - } - - beentry++; - } - - /* PID not found */ - return NULL; -} - - -/* ---------- - * pgstat_fetch_stat_beentry() - - * - * Support function for the SQL-callable pgstat* functions. Returns - * our local copy of the current-activity entry for one backend. - * - * NB: caller is responsible for a check if the user is permitted to see - * this info (especially the querystring). - * ---------- - */ -PgBackendStatus * -pgstat_fetch_stat_beentry(int beid) -{ - pgstat_read_current_status(); - - if (beid < 1 || beid > localNumBackends) - return NULL; - - return &localBackendStatusTable[beid - 1].backendStatus; -} - - -/* ---------- - * pgstat_fetch_stat_local_beentry() - - * - * Like pgstat_fetch_stat_beentry() but with locally computed additions (like - * xid and xmin values of the backend) - * - * NB: caller is responsible for a check if the user is permitted to see - * this info (especially the querystring). - * ---------- - */ -LocalPgBackendStatus * -pgstat_fetch_stat_local_beentry(int beid) -{ - pgstat_read_current_status(); - - if (beid < 1 || beid > localNumBackends) - return NULL; - - return &localBackendStatusTable[beid - 1]; -} - - -/* ---------- - * pgstat_fetch_stat_numbackends() - - * - * Support function for the SQL-callable pgstat* functions. Returns - * the maximum current backend id. - * ---------- - */ -int -pgstat_fetch_stat_numbackends(void) -{ - pgstat_read_current_status(); - - return localNumBackends; -} - -/* - * Convert a potentially unsafely truncated activity string (see - * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated - * one. - * - * The returned string is allocated in the caller's memory context and may be - * freed. - */ -char * -pgstat_clip_activity(const char *raw_activity) -{ - char *activity; - int rawlen; - int cliplen; - - /* - * Some callers, like pgstat_get_backend_current_activity(), do not - * guarantee that the buffer isn't concurrently modified. We try to take - * care that the buffer is always terminated by a NUL byte regardless, but - * let's still be paranoid about the string's length. In those cases the - * underlying buffer is guaranteed to be pgstat_track_activity_query_size - * large. - */ - activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1); - - /* now double-guaranteed to be NUL terminated */ - rawlen = strlen(activity); - - /* - * All supported server-encodings make it possible to determine the length - * of a multi-byte character from its first byte (this is not the case for - * client encodings, see GB18030). As st_activity is always stored using - * server encoding, this allows us to perform multi-byte aware truncation, - * even if the string earlier was truncated in the middle of a multi-byte - * character. - */ - cliplen = pg_mbcliplen(activity, rawlen, - pgstat_track_activity_query_size - 1); - - activity[cliplen] = '\0'; - - return activity; -} diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index accc1eb577..840ebef92a 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -7,17 +7,6 @@ * * IDENTIFICATION * src/backend/postmaster/wait_event.c - * - * NOTES - * - * To make pgstat_report_wait_start() and pgstat_report_wait_end() as - * lightweight as possible, they do not check if shared memory (MyProc - * specifically, where the wait event is stored) is already available. Instead - * we initially set my_wait_event_info to a process local variable, which then - * is redirected to shared memory using pgstat_set_wait_event_storage(). For - * the same reason pgstat_track_activities is not checked - the check adds - * more work than it saves. - * * ---------- */ #include "postgres.h" @@ -34,36 +23,6 @@ static const char *pgstat_get_wait_timeout(WaitEventTimeout w); static const char *pgstat_get_wait_io(WaitEventIO w); -static uint32 local_my_wait_event_info; -uint32 *my_wait_event_info = &local_my_wait_event_info; - - -/* - * Configure wait event reporting to report wait events to *wait_event_info. - * *wait_event_info needs to be valid until pgstat_reset_wait_event_storage() - * is called. - * - * Expected to be called during backend startup, to point my_wait_event_info - * into shared memory. - */ -void -pgstat_set_wait_event_storage(uint32 *wait_event_info) -{ - my_wait_event_info = wait_event_info; -} - -/* - * Reset wait event storage location. - * - * Expected to be called during backend shutdown, before the location set up - * pgstat_set_wait_event_storage() becomes invalid. - */ -void -pgstat_reset_wait_event_storage(void) -{ - my_wait_event_info = &local_my_wait_event_info; -} - /* ---------- * pgstat_get_wait_event_type() - * diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 51d1bbef30..a3ec358538 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -681,10 +681,6 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, if (!bootstrap) pgstat_initialize(); - /* Initialize status reporting */ - if (!bootstrap) - pgstat_beinit(); - /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index c9c9da85f3..60a9c7a2a0 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -90,7 +90,6 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/acl.h" -#include "utils/backend_status.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/float.h" diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index e04ccc5b62..e42be914d2 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -148,11 +148,11 @@ static void save_query_text_state(PsqlScanState scan_state, ConditionalStack cst PQExpBuffer query_buf); static void discard_query_text(PsqlScanState scan_state, ConditionalStack cstack, PQExpBuffer query_buf); -static bool copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf); +static void copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf); static bool do_connect(enum trivalue reuse_previous_specification, char *dbname, char *user, char *host, char *port); static bool do_edit(const char *filename_arg, PQExpBuffer query_buf, - int lineno, bool discard_on_quit, bool *edited); + int lineno, bool *edited); static bool do_shell(const char *command); static bool do_watch(PQExpBuffer query_buf, double sleep); static bool lookup_object_oid(EditableObjectType obj_type, const char *desc, @@ -418,7 +418,7 @@ exec_command(const char *cmd, * the individual command subroutines. */ if (status == PSQL_CMD_SEND) - (void) copy_previous_query(query_buf, previous_buf); + copy_previous_query(query_buf, previous_buf); return status; } @@ -1004,27 +1004,14 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch, } if (status != PSQL_CMD_ERROR) { - bool discard_on_quit; - expand_tilde(&fname); if (fname) - { canonicalize_path(fname); - /* Always clear buffer if the file isn't modified */ - discard_on_quit = true; - } - else - { - /* - * If query_buf is empty, recall previous query for - * editing. But in that case, the query buffer should be - * emptied if editing doesn't modify the file. - */ - discard_on_quit = copy_previous_query(query_buf, - previous_buf); - } - if (do_edit(fname, query_buf, lineno, discard_on_quit, NULL)) + /* If query_buf is empty, recall previous query for editing */ + copy_previous_query(query_buf, previous_buf); + + if (do_edit(fname, query_buf, lineno, NULL)) status = PSQL_CMD_NEWEDIT; else status = PSQL_CMD_ERROR; @@ -1147,7 +1134,7 @@ exec_command_ef_ev(PsqlScanState scan_state, bool active_branch, { bool edited = false; - if (!do_edit(NULL, query_buf, lineno, true, &edited)) + if (!do_edit(NULL, query_buf, lineno, &edited)) status = PSQL_CMD_ERROR; else if (!edited) puts(_("No changes")); @@ -2650,7 +2637,7 @@ exec_command_watch(PsqlScanState scan_state, bool active_branch, } /* If query_buf is empty, recall and execute previous query */ - (void) copy_previous_query(query_buf, previous_buf); + copy_previous_query(query_buf, previous_buf); success = do_watch(query_buf, sleep); @@ -2974,19 +2961,12 @@ discard_query_text(PsqlScanState scan_state, ConditionalStack cstack, * This is used by various slash commands for which re-execution of a * previous query is a common usage. For convenience, we allow the * case of query_buf == NULL (and do nothing). - * - * Returns "true" if the previous query was copied into the query - * buffer, else "false". */ -static bool +static void copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf) { if (query_buf && query_buf->len == 0) - { appendPQExpBufferStr(query_buf, previous_buf->data); - return true; - } - return false; } /* @@ -3667,11 +3647,10 @@ UnsyncVariables(void) /* - * helper for do_edit(): actually invoke the editor + * do_edit -- handler for \e * - * Returns true on success, false if we failed to invoke the editor or - * it returned nonzero status. (An error message is printed for failed- - * to-invoke cases, but not if the editor returns nonzero status.) + * If you do not specify a filename, the current query buffer will be copied + * into a temporary one. */ static bool editFile(const char *fname, int lineno) @@ -3740,23 +3719,10 @@ editFile(const char *fname, int lineno) } -/* - * do_edit -- handler for \e - * - * If you do not specify a filename, the current query buffer will be copied - * into a temporary file. - * - * After this function is done, the resulting file will be copied back into the - * query buffer. As an exception to this, the query buffer will be emptied - * if the file was not modified (or the editor failed) and the caller passes - * "discard_on_quit" = true. - * - * If "edited" isn't NULL, *edited will be set to true if the query buffer - * is successfully replaced. - */ +/* call this one */ static bool do_edit(const char *filename_arg, PQExpBuffer query_buf, - int lineno, bool discard_on_quit, bool *edited) + int lineno, bool *edited) { char fnametmp[MAXPGPATH]; FILE *stream = NULL; @@ -3904,7 +3870,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf, { pg_log_error("%s: %m", fname); error = true; - resetPQExpBuffer(query_buf); } else if (edited) { @@ -3914,15 +3879,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf, fclose(stream); } } - else - { - /* - * If the file was not modified, and the caller requested it, discard - * the query buffer. - */ - if (discard_on_quit) - resetPQExpBuffer(query_buf); - } /* remove temp file */ if (!filename_arg) diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index d7bf16368b..c6b139d57d 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -2,7 +2,7 @@ * * progress.h * Constants used with the progress reporting facilities defined in - * backend_status.h. These are possibly interesting to extensions, so we + * pgstat.h. These are possibly interesting to extensions, so we * expose them via this header file. Note that if you update these * constants, you probably also need to update the views based on them * in system_views.sql. diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index b3d30acc35..d66760e4bf 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid, extern void CheckTableNotInUse(Relation rel, const char *stmt); +#define TRUNCATE_REL_CONTEXT_NORMAL 0x01 +#define TRUNCATE_REL_CONTEXT_ONLY 0x02 +#define TRUNCATE_REL_CONTEXT_CASCADING 0x04 extern void ExecuteTruncate(TruncateStmt *stmt); -extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, - DropBehavior behavior, bool restart_seqs); +extern void ExecuteTruncateGuts(List *explicit_rels, + List *relids, + List *relids_extra, + List *relids_logged, + DropBehavior behavior, + bool restart_seqs); extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass); diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 10d29ff292..205afe5ec3 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation, typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt, Oid serverOid); +typedef void (*ExecForeignTruncate_function) (List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs); + typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node, ParallelContext *pcxt); typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node, @@ -255,6 +260,9 @@ typedef struct FdwRoutine /* Support functions for IMPORT FOREIGN SCHEMA */ ImportForeignSchema_function ImportForeignSchema; + /* Support functions for TRUNCATE */ + ExecForeignTruncate_function ExecForeignTruncate; + /* Support functions for parallelism under Gather node */ IsForeignScanParallelSafe_function IsForeignScanParallelSafe; EstimateDSMForeignScan_function EstimateDSMForeignScan; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 7cd137506e..3247c7b8ad 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -12,10 +12,11 @@ #define PGSTAT_H #include "datatype/timestamp.h" +#include "libpq/pqcomm.h" +#include "miscadmin.h" +#include "port/atomics.h" #include "portability/instr_time.h" -#include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ -#include "utils/backend_progress.h" /* for backward compatibility */ -#include "utils/backend_status.h" /* for backward compatibility */ +#include "postmaster/pgarch.h" #include "utils/hsearch.h" #include "utils/relcache.h" #include "utils/wait_event.h" /* for backward compatibility */ @@ -881,6 +882,262 @@ typedef struct PgStat_ReplSlotStats TimestampTz stat_reset_timestamp; } PgStat_ReplSlotStats; +/* ---------- + * Backend states + * ---------- + */ +typedef enum BackendState +{ + STATE_UNDEFINED, + STATE_IDLE, + STATE_RUNNING, + STATE_IDLEINTRANSACTION, + STATE_FASTPATH, + STATE_IDLEINTRANSACTION_ABORTED, + STATE_DISABLED +} BackendState; + +/* ---------- + * Command type for progress reporting purposes + * ---------- + */ +typedef enum ProgressCommandType +{ + PROGRESS_COMMAND_INVALID, + PROGRESS_COMMAND_VACUUM, + PROGRESS_COMMAND_ANALYZE, + PROGRESS_COMMAND_CLUSTER, + PROGRESS_COMMAND_CREATE_INDEX, + PROGRESS_COMMAND_BASEBACKUP, + PROGRESS_COMMAND_COPY +} ProgressCommandType; + +#define PGSTAT_NUM_PROGRESS_PARAM 20 + +/* ---------- + * Shared-memory data structures + * ---------- + */ + + +/* + * PgBackendSSLStatus + * + * For each backend, we keep the SSL status in a separate struct, that + * is only filled in if SSL is enabled. + * + * All char arrays must be null-terminated. + */ +typedef struct PgBackendSSLStatus +{ + /* Information about SSL connection */ + int ssl_bits; + char ssl_version[NAMEDATALEN]; + char ssl_cipher[NAMEDATALEN]; + char ssl_client_dn[NAMEDATALEN]; + + /* + * serial number is max "20 octets" per RFC 5280, so this size should be + * fine + */ + char ssl_client_serial[NAMEDATALEN]; + + char ssl_issuer_dn[NAMEDATALEN]; +} PgBackendSSLStatus; + +/* + * PgBackendGSSStatus + * + * For each backend, we keep the GSS status in a separate struct, that + * is only filled in if GSS is enabled. + * + * All char arrays must be null-terminated. + */ +typedef struct PgBackendGSSStatus +{ + /* Information about GSSAPI connection */ + char gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */ + bool gss_auth; /* If GSSAPI authentication was used */ + bool gss_enc; /* If encryption is being used */ + +} PgBackendGSSStatus; + + +/* ---------- + * PgBackendStatus + * + * Each live backend maintains a PgBackendStatus struct in shared memory + * showing its current activity. (The structs are allocated according to + * BackendId, but that is not critical.) Note that the collector process + * has no involvement in, or even access to, these structs. + * + * Each auxiliary process also maintains a PgBackendStatus struct in shared + * memory. + * ---------- + */ +typedef struct PgBackendStatus +{ + /* + * To avoid locking overhead, we use the following protocol: a backend + * increments st_changecount before modifying its entry, and again after + * finishing a modification. A would-be reader should note the value of + * st_changecount, copy the entry into private memory, then check + * st_changecount again. If the value hasn't changed, and if it's even, + * the copy is valid; otherwise start over. This makes updates cheap + * while reads are potentially expensive, but that's the tradeoff we want. + * + * The above protocol needs memory barriers to ensure that the apparent + * order of execution is as it desires. Otherwise, for example, the CPU + * might rearrange the code so that st_changecount is incremented twice + * before the modification on a machine with weak memory ordering. Hence, + * use the macros defined below for manipulating st_changecount, rather + * than touching it directly. + */ + int st_changecount; + + /* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */ + int st_procpid; + + /* Type of backends */ + BackendType st_backendType; + + /* Times when current backend, transaction, and activity started */ + TimestampTz st_proc_start_timestamp; + TimestampTz st_xact_start_timestamp; + TimestampTz st_activity_start_timestamp; + TimestampTz st_state_start_timestamp; + + /* Database OID, owning user's OID, connection client address */ + Oid st_databaseid; + Oid st_userid; + SockAddr st_clientaddr; + char *st_clienthostname; /* MUST be null-terminated */ + + /* Information about SSL connection */ + bool st_ssl; + PgBackendSSLStatus *st_sslstatus; + + /* Information about GSSAPI connection */ + bool st_gss; + PgBackendGSSStatus *st_gssstatus; + + /* current state */ + BackendState st_state; + + /* application name; MUST be null-terminated */ + char *st_appname; + + /* + * Current command string; MUST be null-terminated. Note that this string + * possibly is truncated in the middle of a multi-byte character. As + * activity strings are stored more frequently than read, that allows to + * move the cost of correct truncation to the display side. Use + * pgstat_clip_activity() to truncate correctly. + */ + char *st_activity_raw; + + /* + * Command progress reporting. Any command which wishes can advertise + * that it is running by setting st_progress_command, + * st_progress_command_target, and st_progress_param[]. + * st_progress_command_target should be the OID of the relation which the + * command targets (we assume there's just one, as this is meant for + * utility commands), but the meaning of each element in the + * st_progress_param array is command-specific. + */ + ProgressCommandType st_progress_command; + Oid st_progress_command_target; + int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]; +} PgBackendStatus; + +/* + * Macros to load and store st_changecount with appropriate memory barriers. + * + * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY() + * after, modifying the current process's PgBackendStatus data. Note that, + * since there is no mechanism for cleaning up st_changecount after an error, + * THESE MACROS FORM A CRITICAL SECTION. Any error between them will be + * promoted to PANIC, causing a database restart to clean up shared memory! + * Hence, keep the critical section as short and straight-line as possible. + * Aside from being safer, that minimizes the window in which readers will + * have to loop. + * + * Reader logic should follow this sketch: + * + * for (;;) + * { + * int before_ct, after_ct; + * + * pgstat_begin_read_activity(beentry, before_ct); + * ... copy beentry data to local memory ... + * pgstat_end_read_activity(beentry, after_ct); + * if (pgstat_read_activity_complete(before_ct, after_ct)) + * break; + * CHECK_FOR_INTERRUPTS(); + * } + * + * For extra safety, we generally use volatile beentry pointers, although + * the memory barriers should theoretically be sufficient. + */ +#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \ + do { \ + START_CRIT_SECTION(); \ + (beentry)->st_changecount++; \ + pg_write_barrier(); \ + } while (0) + +#define PGSTAT_END_WRITE_ACTIVITY(beentry) \ + do { \ + pg_write_barrier(); \ + (beentry)->st_changecount++; \ + Assert(((beentry)->st_changecount & 1) == 0); \ + END_CRIT_SECTION(); \ + } while (0) + +#define pgstat_begin_read_activity(beentry, before_changecount) \ + do { \ + (before_changecount) = (beentry)->st_changecount; \ + pg_read_barrier(); \ + } while (0) + +#define pgstat_end_read_activity(beentry, after_changecount) \ + do { \ + pg_read_barrier(); \ + (after_changecount) = (beentry)->st_changecount; \ + } while (0) + +#define pgstat_read_activity_complete(before_changecount, after_changecount) \ + ((before_changecount) == (after_changecount) && \ + ((before_changecount) & 1) == 0) + + +/* ---------- + * LocalPgBackendStatus + * + * When we build the backend status array, we use LocalPgBackendStatus to be + * able to add new values to the struct when needed without adding new fields + * to the shared memory. It contains the backend status as a first member. + * ---------- + */ +typedef struct LocalPgBackendStatus +{ + /* + * Local version of the backend status entry. + */ + PgBackendStatus backendStatus; + + /* + * The xid of the current transaction if available, InvalidTransactionId + * if not. + */ + TransactionId backend_xid; + + /* + * The xmin of the current session if available, InvalidTransactionId if + * not. + */ + TransactionId backend_xmin; +} LocalPgBackendStatus; /* * Working state needed to accumulate per-function-call timing statistics. @@ -903,8 +1160,10 @@ typedef struct PgStat_FunctionCallUsage * GUC parameters * ---------- */ +extern PGDLLIMPORT bool pgstat_track_activities; extern PGDLLIMPORT bool pgstat_track_counts; extern PGDLLIMPORT int pgstat_track_functions; +extern PGDLLIMPORT int pgstat_track_activity_query_size; extern char *pgstat_stat_directory; extern char *pgstat_stat_tmpname; extern char *pgstat_stat_filename; @@ -925,14 +1184,6 @@ extern PgStat_MsgWal WalStats; extern PgStat_Counter pgStatBlockReadTime; extern PgStat_Counter pgStatBlockWriteTime; -/* - * Updated by pgstat_count_conn_*_time macros, called by - * pgstat_report_activity(). - */ -extern PgStat_Counter pgStatActiveTime; -extern PgStat_Counter pgStatTransactionIdleTime; - - /* * Updated by the traffic cop and in errfinish() */ @@ -942,6 +1193,9 @@ extern SessionEndType pgStatSessionEndCause; * Functions called from postmaster * ---------- */ +extern Size BackendStatusShmemSize(void); +extern void CreateSharedBackendStatus(void); + extern void pgstat_init(void); extern int pgstat_start(void); extern void pgstat_reset_all(void); @@ -987,13 +1241,30 @@ extern void pgstat_report_replslot(const char *slotname, PgStat_Counter spilltxn extern void pgstat_report_replslot_drop(const char *slotname); extern void pgstat_initialize(void); - +extern void pgstat_bestart(void); + +extern void pgstat_report_activity(BackendState state, const char *cmd_str); +extern void pgstat_report_tempfile(size_t filesize); +extern void pgstat_report_appname(const char *appname); +extern void pgstat_report_xact_timestamp(TimestampTz tstamp); +extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser); +extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer, + int buflen); + +extern void pgstat_progress_start_command(ProgressCommandType cmdtype, + Oid relid); +extern void pgstat_progress_update_param(int index, int64 val); +extern void pgstat_progress_update_multi_param(int nparam, const int *index, + const int64 *val); +extern void pgstat_progress_end_command(void); extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id); extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id); extern void pgstat_initstats(Relation rel); +extern char *pgstat_clip_activity(const char *raw_activity); + /* nontransactional event counts are simple enough to inline */ #define pgstat_count_heap_scan(rel) \ @@ -1035,10 +1306,6 @@ extern void pgstat_initstats(Relation rel); (pgStatBlockReadTime += (n)) #define pgstat_count_buffer_write_time(n) \ (pgStatBlockWriteTime += (n)) -#define pgstat_count_conn_active_time(n) \ - (pgStatActiveTime += (n)) -#define pgstat_count_conn_txn_idle_time(n) \ - (pgStatTransactionIdleTime += (n)) extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n); extern void pgstat_count_heap_update(Relation rel, bool hot); @@ -1075,7 +1342,10 @@ extern bool pgstat_send_wal(bool force); */ extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid); extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); +extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); +extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid); extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid); +extern int pgstat_fetch_stat_numbackends(void); extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); extern PgStat_GlobalStats *pgstat_fetch_global(void); extern PgStat_WalStats *pgstat_fetch_stat_wal(void); diff --git a/src/include/utils/backend_progress.h b/src/include/utils/backend_progress.h deleted file mode 100644 index 1714fa09c1..0000000000 --- a/src/include/utils/backend_progress.h +++ /dev/null @@ -1,44 +0,0 @@ -/* ---------- - * backend_progress.h - * Command progress reporting definition. - * - * Note that this file provides the infrastructure for storing a single - * backend's command progress counters, without ascribing meaning to the - * individual fields. See commands/progress.h and system_views.sql for that. - * - * Copyright (c) 2001-2021, PostgreSQL Global Development Group - * - * src/include/utils/backend_progress.h - * ---------- - */ -#ifndef BACKEND_PROGRESS_H -#define BACKEND_PROGRESS_H - - -/* ---------- - * Command type for progress reporting purposes - * ---------- - */ -typedef enum ProgressCommandType -{ - PROGRESS_COMMAND_INVALID, - PROGRESS_COMMAND_VACUUM, - PROGRESS_COMMAND_ANALYZE, - PROGRESS_COMMAND_CLUSTER, - PROGRESS_COMMAND_CREATE_INDEX, - PROGRESS_COMMAND_BASEBACKUP, - PROGRESS_COMMAND_COPY -} ProgressCommandType; - -#define PGSTAT_NUM_PROGRESS_PARAM 20 - - -extern void pgstat_progress_start_command(ProgressCommandType cmdtype, - Oid relid); -extern void pgstat_progress_update_param(int index, int64 val); -extern void pgstat_progress_update_multi_param(int nparam, const int *index, - const int64 *val); -extern void pgstat_progress_end_command(void); - - -#endif /* BACKEND_PROGRESS_H */ diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h deleted file mode 100644 index 3fd7370d41..0000000000 --- a/src/include/utils/backend_status.h +++ /dev/null @@ -1,316 +0,0 @@ -/* ---------- - * backend_status.h - * Definitions related to backend status reporting - * - * Copyright (c) 2001-2021, PostgreSQL Global Development Group - * - * src/include/utils/backend_status.h - * ---------- - */ -#ifndef BACKEND_STATUS_H -#define BACKEND_STATUS_H - -#include "datatype/timestamp.h" -#include "libpq/pqcomm.h" -#include "miscadmin.h" /* for BackendType */ -#include "utils/backend_progress.h" - - -/* ---------- - * Backend states - * ---------- - */ -typedef enum BackendState -{ - STATE_UNDEFINED, - STATE_IDLE, - STATE_RUNNING, - STATE_IDLEINTRANSACTION, - STATE_FASTPATH, - STATE_IDLEINTRANSACTION_ABORTED, - STATE_DISABLED -} BackendState; - - -/* ---------- - * Shared-memory data structures - * ---------- - */ - -/* - * PgBackendSSLStatus - * - * For each backend, we keep the SSL status in a separate struct, that - * is only filled in if SSL is enabled. - * - * All char arrays must be null-terminated. - */ -typedef struct PgBackendSSLStatus -{ - /* Information about SSL connection */ - int ssl_bits; - char ssl_version[NAMEDATALEN]; - char ssl_cipher[NAMEDATALEN]; - char ssl_client_dn[NAMEDATALEN]; - - /* - * serial number is max "20 octets" per RFC 5280, so this size should be - * fine - */ - char ssl_client_serial[NAMEDATALEN]; - - char ssl_issuer_dn[NAMEDATALEN]; -} PgBackendSSLStatus; - -/* - * PgBackendGSSStatus - * - * For each backend, we keep the GSS status in a separate struct, that - * is only filled in if GSS is enabled. - * - * All char arrays must be null-terminated. - */ -typedef struct PgBackendGSSStatus -{ - /* Information about GSSAPI connection */ - char gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */ - bool gss_auth; /* If GSSAPI authentication was used */ - bool gss_enc; /* If encryption is being used */ - -} PgBackendGSSStatus; - - -/* ---------- - * PgBackendStatus - * - * Each live backend maintains a PgBackendStatus struct in shared memory - * showing its current activity. (The structs are allocated according to - * BackendId, but that is not critical.) Note that the collector process - * has no involvement in, or even access to, these structs. - * - * Each auxiliary process also maintains a PgBackendStatus struct in shared - * memory. - * ---------- - */ -typedef struct PgBackendStatus -{ - /* - * To avoid locking overhead, we use the following protocol: a backend - * increments st_changecount before modifying its entry, and again after - * finishing a modification. A would-be reader should note the value of - * st_changecount, copy the entry into private memory, then check - * st_changecount again. If the value hasn't changed, and if it's even, - * the copy is valid; otherwise start over. This makes updates cheap - * while reads are potentially expensive, but that's the tradeoff we want. - * - * The above protocol needs memory barriers to ensure that the apparent - * order of execution is as it desires. Otherwise, for example, the CPU - * might rearrange the code so that st_changecount is incremented twice - * before the modification on a machine with weak memory ordering. Hence, - * use the macros defined below for manipulating st_changecount, rather - * than touching it directly. - */ - int st_changecount; - - /* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */ - int st_procpid; - - /* Type of backends */ - BackendType st_backendType; - - /* Times when current backend, transaction, and activity started */ - TimestampTz st_proc_start_timestamp; - TimestampTz st_xact_start_timestamp; - TimestampTz st_activity_start_timestamp; - TimestampTz st_state_start_timestamp; - - /* Database OID, owning user's OID, connection client address */ - Oid st_databaseid; - Oid st_userid; - SockAddr st_clientaddr; - char *st_clienthostname; /* MUST be null-terminated */ - - /* Information about SSL connection */ - bool st_ssl; - PgBackendSSLStatus *st_sslstatus; - - /* Information about GSSAPI connection */ - bool st_gss; - PgBackendGSSStatus *st_gssstatus; - - /* current state */ - BackendState st_state; - - /* application name; MUST be null-terminated */ - char *st_appname; - - /* - * Current command string; MUST be null-terminated. Note that this string - * possibly is truncated in the middle of a multi-byte character. As - * activity strings are stored more frequently than read, that allows to - * move the cost of correct truncation to the display side. Use - * pgstat_clip_activity() to truncate correctly. - */ - char *st_activity_raw; - - /* - * Command progress reporting. Any command which wishes can advertise - * that it is running by setting st_progress_command, - * st_progress_command_target, and st_progress_param[]. - * st_progress_command_target should be the OID of the relation which the - * command targets (we assume there's just one, as this is meant for - * utility commands), but the meaning of each element in the - * st_progress_param array is command-specific. - */ - ProgressCommandType st_progress_command; - Oid st_progress_command_target; - int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]; -} PgBackendStatus; - - -/* - * Macros to load and store st_changecount with appropriate memory barriers. - * - * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY() - * after, modifying the current process's PgBackendStatus data. Note that, - * since there is no mechanism for cleaning up st_changecount after an error, - * THESE MACROS FORM A CRITICAL SECTION. Any error between them will be - * promoted to PANIC, causing a database restart to clean up shared memory! - * Hence, keep the critical section as short and straight-line as possible. - * Aside from being safer, that minimizes the window in which readers will - * have to loop. - * - * Reader logic should follow this sketch: - * - * for (;;) - * { - * int before_ct, after_ct; - * - * pgstat_begin_read_activity(beentry, before_ct); - * ... copy beentry data to local memory ... - * pgstat_end_read_activity(beentry, after_ct); - * if (pgstat_read_activity_complete(before_ct, after_ct)) - * break; - * CHECK_FOR_INTERRUPTS(); - * } - * - * For extra safety, we generally use volatile beentry pointers, although - * the memory barriers should theoretically be sufficient. - */ -#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \ - do { \ - START_CRIT_SECTION(); \ - (beentry)->st_changecount++; \ - pg_write_barrier(); \ - } while (0) - -#define PGSTAT_END_WRITE_ACTIVITY(beentry) \ - do { \ - pg_write_barrier(); \ - (beentry)->st_changecount++; \ - Assert(((beentry)->st_changecount & 1) == 0); \ - END_CRIT_SECTION(); \ - } while (0) - -#define pgstat_begin_read_activity(beentry, before_changecount) \ - do { \ - (before_changecount) = (beentry)->st_changecount; \ - pg_read_barrier(); \ - } while (0) - -#define pgstat_end_read_activity(beentry, after_changecount) \ - do { \ - pg_read_barrier(); \ - (after_changecount) = (beentry)->st_changecount; \ - } while (0) - -#define pgstat_read_activity_complete(before_changecount, after_changecount) \ - ((before_changecount) == (after_changecount) && \ - ((before_changecount) & 1) == 0) - - -/* ---------- - * LocalPgBackendStatus - * - * When we build the backend status array, we use LocalPgBackendStatus to be - * able to add new values to the struct when needed without adding new fields - * to the shared memory. It contains the backend status as a first member. - * ---------- - */ -typedef struct LocalPgBackendStatus -{ - /* - * Local version of the backend status entry. - */ - PgBackendStatus backendStatus; - - /* - * The xid of the current transaction if available, InvalidTransactionId - * if not. - */ - TransactionId backend_xid; - - /* - * The xmin of the current session if available, InvalidTransactionId if - * not. - */ - TransactionId backend_xmin; -} LocalPgBackendStatus; - - -/* ---------- - * GUC parameters - * ---------- - */ -extern PGDLLIMPORT bool pgstat_track_activities; -extern PGDLLIMPORT int pgstat_track_activity_query_size; - - -/* ---------- - * Other global variables - * ---------- - */ -extern PGDLLIMPORT PgBackendStatus *MyBEEntry; - - -/* ---------- - * Functions called from postmaster - * ---------- - */ -extern Size BackendStatusShmemSize(void); -extern void CreateSharedBackendStatus(void); - - -/* ---------- - * Functions called from backends - * ---------- - */ - -/* Initialization functions */ -extern void pgstat_beinit(void); -extern void pgstat_bestart(void); - -extern void pgstat_clear_backend_activity_snapshot(void); - -/* Activity reporting functions */ -extern void pgstat_report_activity(BackendState state, const char *cmd_str); -extern void pgstat_report_tempfile(size_t filesize); -extern void pgstat_report_appname(const char *appname); -extern void pgstat_report_xact_timestamp(TimestampTz tstamp); -extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser); -extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer, - int buflen); - - -/* ---------- - * Support functions for the SQL-callable functions to - * generate the pgstat* views. - * ---------- - */ -extern int pgstat_fetch_stat_numbackends(void); -extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); -extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid); -extern char *pgstat_clip_activity(const char *raw_activity); - - -#endif /* BACKEND_STATUS_H */ diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 44448b48ec..2c883467f3 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -11,6 +11,9 @@ #define WAIT_EVENT_H +#include "storage/proc.h" /* for MyProc */ + + /* ---------- * Wait Classes * ---------- @@ -231,10 +234,13 @@ extern const char *pgstat_get_wait_event(uint32 wait_event_info); extern const char *pgstat_get_wait_event_type(uint32 wait_event_info); static inline void pgstat_report_wait_start(uint32 wait_event_info); static inline void pgstat_report_wait_end(void); -extern void pgstat_set_wait_event_storage(uint32 *wait_event_info); -extern void pgstat_reset_wait_event_storage(void); -extern PGDLLIMPORT uint32 *my_wait_event_info; + +/* + * Repeated here for the inline functions because it is declared in pgstat.h, + * which includes this header. + */ +extern PGDLLIMPORT bool pgstat_track_activities; /* ---------- @@ -248,35 +254,47 @@ extern PGDLLIMPORT uint32 *my_wait_event_info; * for wait event which is sufficient for current usage, 1-byte is * reserved for future usage. * - * Historically we used to make this reporting conditional on - * pgstat_track_activities, but the check for that seems to add more cost - * than it saves. - * - * my_wait_event_info initially points to local memory, making it safe to - * call this before MyProc has been initialized. + * NB: this *must* be able to survive being called before MyProc has been + * initialized. * ---------- */ static inline void pgstat_report_wait_start(uint32 wait_event_info) { + volatile PGPROC *proc = MyProc; + + if (!pgstat_track_activities || !proc) + return; + /* * Since this is a four-byte field which is always read and written as * four-bytes, updates are atomic. */ - *(volatile uint32 *) my_wait_event_info = wait_event_info; + proc->wait_event_info = wait_event_info; } /* ---------- * pgstat_report_wait_end() - * * Called to report end of a wait. + * + * NB: this *must* be able to survive being called before MyProc has been + * initialized. * ---------- */ static inline void pgstat_report_wait_end(void) { - /* see pgstat_report_wait_start() */ - *(volatile uint32 *) my_wait_event_info = 0; + volatile PGPROC *proc = MyProc; + + if (!pgstat_track_activities || !proc) + return; + + /* + * Since this is a four-byte field which is always read and written as + * four-bytes, updates are atomic. + */ + proc->wait_event_info = 0; } diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out index e4cdb780d0..5385f98a0f 100644 --- a/src/test/regress/expected/foreign_data.out +++ b/src/test/regress/expected/foreign_data.out @@ -1807,9 +1807,9 @@ Inherits: fd_pt1 -- TRUNCATE doesn't work on foreign tables, either directly or recursively TRUNCATE ft2; -- ERROR -ERROR: "ft2" is not a table +ERROR: foreign-data wrapper "dummy" has no handler TRUNCATE fd_pt1; -- ERROR -ERROR: "ft2" is not a table +ERROR: foreign-data wrapper "dummy" has no handler DROP TABLE fd_pt1 CASCADE; NOTICE: drop cascades to foreign table ft2 -- IMPORT FOREIGN SCHEMA @@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0); ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1); -- TRUNCATE doesn't work on foreign tables, either directly or recursively TRUNCATE fd_pt2_1; -- ERROR -ERROR: "fd_pt2_1" is not a table +ERROR: foreign-data wrapper "dummy" has no handler TRUNCATE fd_pt2; -- ERROR -ERROR: "fd_pt2_1" is not a table +ERROR: foreign-data wrapper "dummy" has no handler DROP FOREIGN TABLE fd_pt2_1; DROP TABLE fd_pt2; -- foreign table cannot be part of partition tree made of temporary diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 6a98064b2b..a173d604d6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -704,6 +704,7 @@ ForeignScanState ForeignServer ForeignServerInfo ForeignTable +ForeignTruncateInfo ForkNumber FormData_pg_aggregate FormData_pg_am