From 0156ccb547e6deffc0b2d68ca0377ea71b2e98e7 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 8 Feb 2023 11:00:01 +0900 Subject: [PATCH 3/3] Apply normalization to A_Const and utilities in pg_stat_statements Its value is now ignored and location is stored, so as it is possible to apply query normalization across more query types: - SET - CALL - COPY TO with queries - View, matviews and CTAS - EXPLAIN - Triggers - Rules - Statistics --- src/include/nodes/parsenodes.h | 8 +- src/include/nodes/primnodes.h | 9 +- src/backend/nodes/queryjumblefuncs.c | 23 +---- doc/src/sgml/pgstatstatements.sgml | 7 +- .../pg_stat_statements/expected/cursors.out | 32 +++---- .../expected/pg_stat_statements.out | 2 +- .../pg_stat_statements/expected/planning.out | 4 +- .../pg_stat_statements/expected/utility.out | 90 +++++++++---------- .../pg_stat_statements/pg_stat_statements.c | 4 +- 9 files changed, 79 insertions(+), 100 deletions(-) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 855da99ec0..d87340d4ac 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3216,14 +3216,18 @@ typedef struct InlineCodeBlock * list contains copies of the expressions for all output arguments, in the * order of the procedure's declared arguments. (outargs is never evaluated, * but is useful to the caller as a reference for what to assign to.) + * The transformed call state is not relevant in the query jumbling, only the + * function call is. * ---------------------- */ typedef struct CallStmt { NodeTag type; FuncCall *funccall; /* from the parser */ - FuncExpr *funcexpr; /* transformed call, with only input args */ - List *outargs; /* transformed output-argument expressions */ + /* transformed call, with only input args */ + FuncExpr *funcexpr pg_node_attr(query_jumble_ignore); + /* transformed output-argument expressions */ + List *outargs pg_node_attr(query_jumble_ignore); } CallStmt; typedef struct CallContext diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 6d740be5c0..0da482d542 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -128,8 +128,10 @@ typedef struct TableFunc * CREATE MATERIALIZED VIEW * * For CREATE MATERIALIZED VIEW, viewQuery is the parsed-but-not-rewritten - * SELECT Query for the view; otherwise it's NULL. (Although it's actually - * Query*, we declare it as Node* to avoid a forward reference.) + * SELECT Query for the view; otherwise it's NULL. This is irrelevant in + * the query jumbling as CreateTableAsStmt already includes a reference to + * its own Query, so ignore it. (Although it's actually Query*, we declare + * it as Node* to avoid a forward reference.) */ typedef struct IntoClause { @@ -141,7 +143,8 @@ typedef struct IntoClause List *options; /* options from WITH clause */ OnCommitAction onCommit; /* what do we do at COMMIT? */ char *tableSpaceName; /* table space to use, or NULL */ - Node *viewQuery; /* materialized view's SELECT query */ + /* materialized view's SELECT query */ + Node *viewQuery pg_node_attr(query_jumble_ignore); bool skipData; /* true for WITH NO DATA */ } IntoClause; diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d7fd72d70f..0f08f4c75e 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -323,29 +323,8 @@ _jumbleA_Const(JumbleState *jstate, Node *node) if (!expr->isnull) { JUMBLE_FIELD(val.node.type); - switch (nodeTag(&expr->val)) - { - case T_Integer: - JUMBLE_FIELD(val.ival.ival); - break; - case T_Float: - JUMBLE_STRING(val.fval.fval); - break; - case T_Boolean: - JUMBLE_FIELD(val.boolval.boolval); - break; - case T_String: - JUMBLE_STRING(val.sval.sval); - break; - case T_BitString: - JUMBLE_STRING(val.bsval.bsval); - break; - default: - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(&expr->val)); - break; - } } + JUMBLE_LOCATION(location); } static void diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index efc36da602..cee9376916 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -489,11 +489,12 @@ Plannable queries (that is, SELECT, INSERT, UPDATE, DELETE, and MERGE) are combined into a single pg_stat_statements entry whenever they have identical query - structures according to an internal hash calculation. Typically, two + structures according to an internal hash calculation. The same rule + applies to utility commands (that is, all other commands), and are normalized + when they have an identical hash calculation. Typically, two queries will be considered the same for this purpose if they are semantically equivalent except for the values of literal constants - appearing in the query. Utility commands (that is, all other commands) - are compared strictly on the basis of their textual query strings, however. + appearing in the query. diff --git a/contrib/pg_stat_statements/expected/cursors.out b/contrib/pg_stat_statements/expected/cursors.out index b31a4c77bb..fd2bef87bd 100644 --- a/contrib/pg_stat_statements/expected/cursors.out +++ b/contrib/pg_stat_statements/expected/cursors.out @@ -16,11 +16,11 @@ CLOSE cursor_stats_1; DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 2; CLOSE cursor_stats_1; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls | rows -------------------------------------------------------+-------+------ - CLOSE cursor_stats_1 | 2 | 0 - DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1 | 2 | 0 - SELECT pg_stat_statements_reset() | 1 | 1 + query | calls | rows +-------------------------------------------------------+-------+------ + CLOSE cursor_stats_1 | 2 | 0 + DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT $1 | 2 | 0 + SELECT pg_stat_statements_reset() | 1 | 1 (3 rows) SELECT pg_stat_statements_reset(); @@ -49,17 +49,17 @@ CLOSE cursor_stats_1; CLOSE cursor_stats_2; COMMIT; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls | rows -------------------------------------------------------+-------+------ - BEGIN | 1 | 0 - CLOSE cursor_stats_1 | 1 | 0 - CLOSE cursor_stats_2 | 1 | 0 - COMMIT | 1 | 0 - DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 2 | 1 | 0 - DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 3 | 1 | 0 - FETCH 1 IN cursor_stats_1 | 1 | 1 - FETCH 1 IN cursor_stats_2 | 1 | 1 - SELECT pg_stat_statements_reset() | 1 | 1 + query | calls | rows +-------------------------------------------------------+-------+------ + BEGIN | 1 | 0 + CLOSE cursor_stats_1 | 1 | 0 + CLOSE cursor_stats_2 | 1 | 0 + COMMIT | 1 | 0 + DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT $1 | 1 | 0 + DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT $1 | 1 | 0 + FETCH 1 IN cursor_stats_1 | 1 | 1 + FETCH 1 IN cursor_stats_2 | 1 | 1 + SELECT pg_stat_statements_reset() | 1 | 1 (9 rows) SELECT pg_stat_statements_reset(); diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index c7b6035268..f134805709 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -311,7 +311,7 @@ FROM pg_stat_statements ORDER BY query COLLATE "C"; wal_records > $2 as wal_records_generated, +| | | | | wal_records >= rows as wal_records_ge_rows +| | | | | FROM pg_stat_statements ORDER BY query COLLATE "C" | | | | | - SET pg_stat_statements.track_utility = FALSE | 1 | 0 | f | f | t + SET pg_stat_statements.track_utility = $1 | 1 | 0 | f | f | t UPDATE pgss_test SET b = $1 WHERE a > $2 | 1 | 3 | t | t | t (7 rows) diff --git a/contrib/pg_stat_statements/expected/planning.out b/contrib/pg_stat_statements/expected/planning.out index 216e46ea2f..33b0550ba6 100644 --- a/contrib/pg_stat_statements/expected/planning.out +++ b/contrib/pg_stat_statements/expected/planning.out @@ -57,13 +57,13 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE COMMIT | 0 | 1 | 0 COPY pgss_ctas (a, b) FROM STDIN | 0 | 1 | 3 CREATE MATERIALIZED VIEW pgss_matv AS SELECT * FROM pgss_ctas | 0 | 1 | 13 - CREATE TABLE pgss_ctas AS SELECT a, 'ctas' b FROM generate_series(1, 10) a | 0 | 1 | 10 + CREATE TABLE pgss_ctas AS SELECT a, $1 b FROM generate_series($2, $3) a | 0 | 1 | 10 DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv | 0 | 1 | 0 FETCH FORWARD 5 pgss_cursor | 0 | 1 | 5 FETCH FORWARD ALL pgss_cursor | 0 | 1 | 7 FETCH NEXT pgss_cursor | 0 | 1 | 1 REFRESH MATERIALIZED VIEW pgss_matv | 0 | 1 | 13 - SELECT generate_series(1, 10) c INTO pgss_select_into | 0 | 1 | 10 + SELECT generate_series($1, $2) c INTO pgss_select_into | 0 | 1 | 10 SELECT pg_stat_statements_reset() | 0 | 1 | 1 SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 | 0 | 0 (13 rows) diff --git a/contrib/pg_stat_statements/expected/utility.out b/contrib/pg_stat_statements/expected/utility.out index 700a37a42e..7d65753e66 100644 --- a/contrib/pg_stat_statements/expected/utility.out +++ b/contrib/pg_stat_statements/expected/utility.out @@ -32,11 +32,11 @@ NOTICE: table "tab_stats" does not exist, skipping NOTICE: table "tab_stats" does not exist, skipping NOTICE: table "tab_stats" does not exist, skipping query|calls|rows -ALTER TABLE tab_stats ADD CONSTRAINT a_nonzero CHECK (a <> 0)|1|0 -ALTER TABLE tab_stats ALTER COLUMN b TYPE text USING 'data' || b|1|0 -ALTER TABLE tab_stats ALTER COLUMN b set default 'a'|1|0 -CREATE INDEX index_stats ON tab_stats(b, (b || 'data1'), (b || 'data2')) WHERE a > 0|1|0 -CREATE TEMP TABLE tab_stats (a int, b char(20))|1|0 +ALTER TABLE tab_stats ADD CONSTRAINT a_nonzero CHECK (a <> $1)|1|0 +ALTER TABLE tab_stats ALTER COLUMN b TYPE text USING $1 || b|1|0 +ALTER TABLE tab_stats ALTER COLUMN b set default $1|1|0 +CREATE INDEX index_stats ON tab_stats(b, (b || $1), (b || $2)) WHERE a > $3|1|0 +CREATE TEMP TABLE tab_stats (a int, b char($1))|1|0 DROP TABLE IF EXISTS tab_stats|3|0 DROP TABLE tab_stats|1|0 SELECT $1|1|1 @@ -101,27 +101,27 @@ CREATE STATISTICS tab_expr_stats_1 (mcv) ON a, (2*a), (3*b) FROM tab_expr_stats; DROP TABLE tab_expr_stats; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows -ALTER FOREIGN TABLE foreign_stats ADD COLUMN b integer DEFAULT 1|1|0 -ALTER FOREIGN TABLE foreign_stats ADD CONSTRAINT b_nonzero CHECK (b <> 0)|1|0 +ALTER FOREIGN TABLE foreign_stats ADD COLUMN b integer DEFAULT $1|1|0 +ALTER FOREIGN TABLE foreign_stats ADD CONSTRAINT b_nonzero CHECK (b <> $1)|1|0 ALTER INDEX pt_stats_index ATTACH PARTITION pt_stats2_index|1|0 -ALTER TABLE pt_stats ATTACH PARTITION pt_stats1 FOR VALUES FROM (0) TO (100)|1|0 -ALTER VIEW view_stats ALTER COLUMN a SET DEFAULT 2|1|0 +ALTER TABLE pt_stats ATTACH PARTITION pt_stats1 FOR VALUES FROM ($1) TO ($2)|1|0 +ALTER VIEW view_stats ALTER COLUMN a SET DEFAULT $1|1|0 CREATE FOREIGN DATA WRAPPER wrapper_stats|1|0 CREATE FOREIGN TABLE foreign_stats (a int) SERVER server_stats|1|0 -CREATE FUNCTION func_stats(a text DEFAULT 'a_data', b text DEFAULT lower('b_data')) +CREATE FUNCTION func_stats(a text DEFAULT $1, b text DEFAULT lower($2)) RETURNS text AS $$ SELECT $1::text || '_' || $2::text; $$ LANGUAGE SQL|1|0 CREATE FUNCTION trigger_func_stats () RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN return OLD; end; $$|1|0 CREATE INDEX pt_stats2_index ON ONLY pt_stats2 (a)|1|0 CREATE INDEX pt_stats_index ON ONLY pt_stats (a)|1|0 -CREATE POLICY policy_stats ON tab_policy_stats USING (a = 5) WITH CHECK (b < 5)|1|0 +CREATE POLICY policy_stats ON tab_policy_stats USING (a = $1) WITH CHECK (b < $2)|1|0 CREATE RULE rules_stats AS ON INSERT TO tab_rule_stats DO INSTEAD - INSERT INTO tab_rule_stats_2 VALUES(new.*, 1, 2)|1|0 + INSERT INTO tab_rule_stats_2 VALUES(new.*, $1, $2)|1|0 CREATE SERVER server_stats FOREIGN DATA WRAPPER wrapper_stats|1|0 -CREATE STATISTICS tab_expr_stats_1 (mcv) ON a, (2*a), (3*b) FROM tab_expr_stats|1|0 +CREATE STATISTICS tab_expr_stats_1 (mcv) ON a, ($1*a), ($2*b) FROM tab_expr_stats|1|0 CREATE TABLE pt_stats (a int, b int) PARTITION BY range (a)|1|0 CREATE TABLE pt_stats1 (a int, b int)|1|0 -CREATE TABLE pt_stats2 PARTITION OF pt_stats FOR VALUES FROM (100) TO (200)|1|0 +CREATE TABLE pt_stats2 PARTITION OF pt_stats FOR VALUES FROM ($1) TO ($2)|1|0 CREATE TABLE tab_expr_stats (a int, b int)|1|0 CREATE TABLE tab_policy_stats (a int, b int)|1|0 CREATE TABLE tab_rule_stats (a int, b int)|1|0 @@ -129,10 +129,10 @@ CREATE TABLE tab_rule_stats_2 (a int, b int, c int, d int)|1|0 CREATE TABLE trigger_tab_stats (a int, b int)|1|0 CREATE TRIGGER trigger_tab_stats AFTER UPDATE ON trigger_tab_stats - FOR EACH ROW WHEN (OLD.a < 0 AND OLD.b < 1 AND true) + FOR EACH ROW WHEN (OLD.a < $1 AND OLD.b < $2 AND $3) EXECUTE FUNCTION trigger_func_stats()|1|0 -CREATE TYPE stats_type as (f1 numeric(35, 6), f2 numeric(35, 2))|1|0 -CREATE VIEW view_stats AS SELECT 1::int AS a, 2::int AS b|1|0 +CREATE TYPE stats_type as (f1 numeric($1, $2), f2 numeric($3, $4))|1|0 +CREATE VIEW view_stats AS SELECT $1::int AS a, $2::int AS b|1|0 DROP FOREIGN DATA WRAPPER wrapper_stats|1|0 DROP FOREIGN TABLE foreign_stats|1|0 DROP FUNCTION func_stats|1|0 @@ -177,8 +177,8 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows ABORT|4|0 BEGIN|6|0 -BEGIN ISOLATION LEVEL SERIALIZABLE|2|0 -BEGIN TRANSACTION DEFERRABLE|1|0 +BEGIN ISOLATION LEVEL $1|2|0 +BEGIN TRANSACTION $1|1|0 COMMIT WORK|5|0 SELECT pg_stat_statements_reset()|1|1 (6 rows) @@ -208,8 +208,8 @@ Function Scan on generate_series tab (2 rows) SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows -EXPLAIN (costs off) SELECT 1|2|0 -EXPLAIN (costs off) SELECT a FROM generate_series(1,10) AS tab(a) WHERE a = 3|2|0 +EXPLAIN (costs off) SELECT $1|2|0 +EXPLAIN (costs off) SELECT a FROM generate_series($1,$2) AS tab(a) WHERE a = $3|2|0 SELECT pg_stat_statements_reset()|1|1 (3 rows) -- CALL @@ -235,12 +235,10 @@ CALL sum_two(1,1); CALL sum_two(1,2); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows -CALL sum_one(199)|1|0 -CALL sum_one(3)|1|0 -CALL sum_two(1,1)|1|0 -CALL sum_two(1,2)|1|0 +CALL sum_one($1)|2|0 +CALL sum_two($1,$2)|2|0 SELECT pg_stat_statements_reset()|1|1 -(5 rows) +(3 rows) -- COPY CREATE TABLE copy_stats (a int, b int); SELECT pg_stat_statements_reset(); @@ -266,15 +264,12 @@ COPY (DELETE FROM copy_stats WHERE a = 1 RETURNING *) TO STDOUT; 1 4 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows -COPY (DELETE FROM copy_stats WHERE a = 1 RETURNING *) TO STDOUT|1|1 -COPY (INSERT INTO copy_stats VALUES (1, 1) RETURNING *) TO STDOUT|1|1 -COPY (INSERT INTO copy_stats VALUES (2, 2) RETURNING *) TO STDOUT|1|1 -COPY (SELECT 1) TO STDOUT|1|1 -COPY (SELECT 2) TO STDOUT|1|1 -COPY (UPDATE copy_stats SET b = b + 1 RETURNING *) TO STDOUT|1|2 -COPY (UPDATE copy_stats SET b = b + 2 RETURNING *) TO STDOUT|1|2 +COPY (DELETE FROM copy_stats WHERE a = $1 RETURNING *) TO STDOUT|1|1 +COPY (INSERT INTO copy_stats VALUES ($1, $2) RETURNING *) TO STDOUT|2|2 +COPY (SELECT $1) TO STDOUT|2|2 +COPY (UPDATE copy_stats SET b = b + $1 RETURNING *) TO STDOUT|2|4 SELECT pg_stat_statements_reset()|1|1 -(8 rows) +(5 rows) DROP TABLE copy_stats; SELECT pg_stat_statements_reset(); pg_stat_statements_reset @@ -296,10 +291,10 @@ CREATE TABLE ctas_stats_2 AS DROP TABLE ctas_stats_2; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows -CREATE TABLE ctas_stats_1 AS SELECT 1 AS a|2|2 +CREATE TABLE ctas_stats_1 AS SELECT $1 AS a|2|2 CREATE TABLE ctas_stats_2 AS - SELECT a AS col1, 2::int AS col2 - FROM generate_series(1, 10) AS tab(a) WHERE a < 5 AND a > 2|2|4 + SELECT a AS col1, $1::int AS col2 + FROM generate_series($2, $3) AS tab(a) WHERE a < $4 AND a > $5|2|4 DROP TABLE ctas_stats_1|2|0 DROP TABLE ctas_stats_2|2|0 SELECT pg_stat_statements_reset()|1|1 @@ -321,8 +316,8 @@ DROP MATERIALIZED VIEW matview_stats_1; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows CREATE MATERIALIZED VIEW matview_stats_1 AS - SELECT a AS col1, 2::int AS col2 - FROM generate_series(1, 10) AS tab(a) WHERE a < 5 AND a > 2|2|2 + SELECT a AS col1, $1::int AS col2 + FROM generate_series($2, $3) AS tab(a) WHERE a < $4 AND a > $5|2|2 DROP MATERIALIZED VIEW matview_stats_1|2|0 SELECT pg_stat_statements_reset()|1|1 (3 rows) @@ -342,14 +337,11 @@ DROP VIEW view_stats_1; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query|calls|rows CREATE VIEW view_stats_1 AS - SELECT a AS col1, 2::int AS col2 - FROM generate_series(1, 10) AS tab(a) WHERE a < 5 AND a > 2|1|0 -CREATE VIEW view_stats_1 AS - SELECT a AS col1, 4::int AS col2 - FROM generate_series(1, 5) AS tab(a) WHERE a < 4 AND a > 3|1|0 + SELECT a AS col1, $1::int AS col2 + FROM generate_series($2, $3) AS tab(a) WHERE a < $4 AND a > $5|2|0 DROP VIEW view_stats_1|2|0 SELECT pg_stat_statements_reset()|1|1 -(4 rows) +(3 rows) SELECT pg_stat_statements_reset(); pg_stat_statements_reset @@ -377,8 +369,6 @@ query|calls|rows RESET enable_seqscan|1|0 RESET work_mem|1|0 SELECT pg_stat_statements_reset()|1|1 -SET enable_seqscan = off|1|0 -SET enable_seqscan = on|1|0 -SET work_mem = '1MB'|2|0 -SET work_mem = '2MB'|1|0 -(7 rows) +SET enable_seqscan = $1|2|0 +SET work_mem = $1|3|0 +(5 rows) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ad1fe44496..5285c3f7fa 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -836,8 +836,10 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) if (query->utilityStmt) { if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt)) + { query->queryId = UINT64CONST(0); - return; + return; + } } /* -- 2.39.1