Re: compute_query_id and pg_stat_statements - Mailing list pgsql-hackers

From Kyotaro Horiguchi
Subject Re: compute_query_id and pg_stat_statements
Date
Msg-id 20210513.132637.954803924958976958.horikyota.ntt@gmail.com
Whole thread Raw
In response to Re: compute_query_id and pg_stat_statements  (Kyotaro Horiguchi <horikyota.ntt@gmail.com>)
Responses Re: compute_query_id and pg_stat_statements
List pgsql-hackers
At Thu, 13 May 2021 12:11:12 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in 
> As the result, even if we take the DLL approach, still not need to
> split out the query-id provider part. By the following config:
> 
> > query_id_provider = 'pg_stat_statements'
> 
> the core can obtain the entrypoint of, say, "_PG_calculate_query_id"
> to call it. And it can be of another module.
> 
> It seems like the only problem doing that is we don't have a means to
> call per-process intializer for a preload libralies.

So this is a crude PoC of that.

pg_stat_statemnts defines its own query-id provider function in
pg_stat_statements which calls in-core DefaultJumbeQuery (end emits a
log line).

If server started with query_id_provider='auto' and pg_stat_statements
is not loaded, pg_stat_activity.query_id is null.

If query_id_provider='auto' and pg_stat_statements is loaded,
pg_stat_activity.query_id is filled in with a query id.

If query_id_provider='default' or 'pg_stat_statements' and
pg_stat_statements is not loaded, pg_stat_activity.query_id is filled
in with a query id.

If query_id_provider='none' and pg_stat_statements is loaded, it
complains as "query id provider is not available" and refuss to start.

If showing the variable, it shows the real provider name instead of
the setting in postgresql.conf.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a85f962801..207c4362af 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,6 +295,7 @@ static bool pgss_save;            /* whether to save stats across shutdown */
 
 void        _PG_init(void);
 void        _PG_fini(void);
+JumbleState *_PG_calculate_query_id(Query *query, const char *querytext);
 
 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
 PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
@@ -478,6 +479,13 @@ _PG_fini(void)
     ProcessUtility_hook = prev_ProcessUtility;
 }
 
+/* Test queryid provider function */
+JumbleState *_PG_calculate_query_id(Query *query, const char *querytext)
+{
+    elog(LOG, "Called query id generatr of pg_stat_statements");
+    return DefaultJumbleQuery(query, querytext);
+}
+
 /*
  * shmem_startup hook: allocate or attach to shared memory,
  * then load any pre-existing statistics from file.
@@ -544,6 +552,11 @@ pgss_shmem_startup(void)
     if (!IsUnderPostmaster)
         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
 
+    /* request my defalt provider, but allow exisint one */
+    if (!queryIdWanted("pg_stat_statements", true))
+        ereport(ERROR,
+                (errmsg ("query_id provider is not available")));
+
     /*
      * Done if some other process already completed our initialization.
      */
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 1202bf85a3..be00564221 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -245,8 +245,7 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
     es->summary = (summary_set) ? es->summary : es->analyze;
 
     query = castNode(Query, stmt->query);
-    if (compute_query_id)
-        jstate = JumbleQuery(query, pstate->p_sourcetext);
+    jstate = JumbleQuery(query, pstate->p_sourcetext);
 
     if (post_parse_analyze_hook)
         (*post_parse_analyze_hook) (pstate, query, jstate);
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 168198acd1..bdf3a5a6d1 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -124,8 +124,7 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
 
     query = transformTopLevelStmt(pstate, parseTree);
 
-    if (compute_query_id)
-        jstate = JumbleQuery(query, sourceText);
+    jstate = JumbleQuery(query, sourceText);
 
     if (post_parse_analyze_hook)
         (*post_parse_analyze_hook) (pstate, query, jstate);
@@ -163,8 +162,7 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
     /* make sure all is well with parameter types */
     check_variable_parameters(pstate, query);
 
-    if (compute_query_id)
-        jstate = JumbleQuery(query, sourceText);
+    jstate = JumbleQuery(query, sourceText);
 
     if (post_parse_analyze_hook)
         (*post_parse_analyze_hook) (pstate, query, jstate);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 6200699ddd..1034dfea28 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -704,8 +704,7 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
 
     query = transformTopLevelStmt(pstate, parsetree);
 
-    if (compute_query_id)
-        jstate = JumbleQuery(query, query_string);
+    jstate = JumbleQuery(query, query_string);
 
     if (post_parse_analyze_hook)
         (*post_parse_analyze_hook) (pstate, query, jstate);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index eb7f7181e4..70d06b825e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -101,6 +101,7 @@
 #include "utils/plancache.h"
 #include "utils/portal.h"
 #include "utils/ps_status.h"
+#include "utils/queryjumble.h"
 #include "utils/rls.h"
 #include "utils/snapmgr.h"
 #include "utils/tzparser.h"
@@ -534,7 +535,6 @@ extern const struct config_enum_entry dynamic_shared_memory_options[];
 /*
  * GUC option variables that are exported from this module
  */
-bool        compute_query_id = false;
 bool        log_duration = false;
 bool        Debug_print_plan = false;
 bool        Debug_print_parse = false;
@@ -1441,15 +1441,6 @@ static struct config_bool ConfigureNamesBool[] =
         true,
         NULL, NULL, NULL
     },
-    {
-        {"compute_query_id", PGC_SUSET, STATS_MONITORING,
-            gettext_noop("Compute query identifiers."),
-            NULL
-        },
-        &compute_query_id,
-        false,
-        NULL, NULL, NULL
-    },
     {
         {"log_parser_stats", PGC_SUSET, STATS_MONITORING,
             gettext_noop("Writes parser performance statistics to the server log."),
@@ -4579,6 +4570,16 @@ static struct config_string ConfigureNamesString[] =
         check_backtrace_functions, assign_backtrace_functions, NULL
     },
 
+
+    {
+        {"query_id_provider", PGC_SUSET, CLIENT_CONN_PRELOAD,
+            gettext_noop("Sets the query-id provider."),
+        },
+        &query_id_provider,
+        "auto",
+        check_query_id_provider, assign_query_id_provider, NULL
+    },
+
     /* End-of-list marker */
     {
         {NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index efde01ee56..fc31ce15c4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -604,7 +604,7 @@
 
 # - Monitoring -
 
-#compute_query_id = off
+#query_id_provider = 'auto'
 #log_statement_stats = off
 #log_parser_stats = off
 #log_planner_stats = off
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index f004a9ce8c..709d654ea0 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -47,6 +47,96 @@ static void JumbleRangeTable(JumbleState *jstate, List *rtable);
 static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
 static void JumbleExpr(JumbleState *jstate, Node *node);
 static void RecordConstLocation(JumbleState *jstate, int location);
+static JumbleState *DummyJumbleQuery(Query *query, const char *querytext);
+
+char *query_id_provider = NULL;
+static bool lock_provider = false;
+
+typedef JumbleState *(*JumbleQueryType) (Query *query, const char *querytext);
+JumbleQueryType JumbleQuery = NULL;
+
+typedef struct QueryIdProviderExtra
+{
+    JumbleQueryType pfunc;
+} QueryIdProviderExtra;
+
+bool
+check_query_id_provider(char **newval, void **extra, GucSource source)
+{
+    QueryIdProviderExtra *param = NULL;
+
+    if (lock_provider)
+        return false;
+
+    param = (QueryIdProviderExtra *)malloc(sizeof(QueryIdProviderExtra));
+    if (param == NULL)
+        return false;
+
+    if (strcmp(*newval, "none") == 0 ||
+        strcmp(*newval, "auto") == 0)
+        param->pfunc = DummyJumbleQuery;
+    else if (strcmp(*newval, "default") == 0)
+        param->pfunc = DefaultJumbleQuery;
+    else
+        param->pfunc =
+            load_external_function(*newval, "_PG_calculate_query_id",
+                                   false, NULL);
+
+    if (param->pfunc == NULL)
+    {
+        free(param);
+        param = NULL;
+        GUC_check_errdetail("failed to load query id provider");
+        return false;
+    }
+
+    *extra = (void *) param;
+    return true;
+}
+
+void
+assign_query_id_provider(const char *newval, void *extra)
+{
+    QueryIdProviderExtra *param = (QueryIdProviderExtra *)extra;
+
+    JumbleQuery = param->pfunc;
+}
+
+bool
+queryIdWanted(char *provider_name, bool use_existing)
+{
+    JumbleQueryType func;
+
+    Assert(query_id_provider != NULL);
+    Assert(JumbleQuery != NULL);
+
+    if (lock_provider || strcmp(query_id_provider, "none") == 0)
+        return false;
+
+    /* use existing provider when use_existing */
+    if (strcmp(query_id_provider, "auto") != 0 && use_existing)
+        return true;
+
+    if (strcmp(provider_name, "default") == 0)
+        func = DefaultJumbleQuery;
+    else
+        func = load_external_function(provider_name, "_PG_calculate_query_id",
+                                      false, NULL);
+
+    if (func == NULL)
+        return false;
+
+    elog(LOG, "query-id provider \"%s\" loaded", provider_name);
+    JumbleQuery = func;
+
+    /* expose real provider name */
+    SetConfigOption("query_id_provider", provider_name,
+                    PGC_POSTMASTER, PGC_S_OVERRIDE);
+
+    lock_provider = true;
+
+    return true;
+}
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -92,7 +182,7 @@ CleanQuerytext(const char *query, int *location, int *len)
 }
 
 JumbleState *
-JumbleQuery(Query *query, const char *querytext)
+DefaultJumbleQuery(Query *query, const char *querytext)
 {
     JumbleState *jstate = NULL;
 
@@ -132,6 +222,12 @@ JumbleQuery(Query *query, const char *querytext)
     return jstate;
 }
 
+static JumbleState *
+DummyJumbleQuery(Query *query, const char *querytext)
+{
+    return NULL;
+}
+
 /*
  * Compute a query identifier for the given utility query string.
  */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 24a5d9d3fb..a7c3a4958e 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -247,7 +247,6 @@ extern bool log_btree_build_stats;
 extern PGDLLIMPORT bool check_function_bodies;
 extern bool session_auth_is_superuser;
 
-extern bool compute_query_id;
 extern bool log_duration;
 extern int    log_parameter_max_length;
 extern int    log_parameter_max_length_on_error;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 83ba7339fa..682d687b79 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
 #define QUERYJUBLE_H
 
 #include "nodes/parsenodes.h"
+#include "utils/guc.h"
 
 #define JUMBLE_SIZE                1024    /* query serialization buffer size */
 
@@ -52,7 +53,12 @@ typedef struct JumbleState
     int            highest_extern_param_id;
 } JumbleState;
 
+extern char *query_id_provider;
+extern JumbleState *(*JumbleQuery)(Query *query, const char *querytext);
+
+JumbleState *DefaultJumbleQuery(Query *query, const char *querytext);
+bool queryIdWanted(char *provider_name, bool use_existing);
+bool check_query_id_provider(char **newval, void **extra, GucSource source);
+void assign_query_id_provider(const char *newval, void *extra);
 const char *CleanQuerytext(const char *query, int *location, int *len);
-JumbleState *JumbleQuery(Query *query, const char *querytext);
-
 #endif                            /* QUERYJUMBLE_H */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index cda28098ba..16375d5596 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -477,7 +477,7 @@ select jsonb_pretty(
 (1 row)
 
 rollback;
-set compute_query_id = on;
+set compute_query_id = 'default';
 select explain_filter('explain (verbose) select * from int8_tbl i8');
                          explain_filter                         
 ----------------------------------------------------------------

pgsql-hackers by date:

Previous
From: Julien Rouhaud
Date:
Subject: Re: compute_query_id and pg_stat_statements
Next
From: Kyotaro Horiguchi
Date:
Subject: Re: compute_query_id and pg_stat_statements