Thread: how to use query_tree_walker to get all relations used in a query
Hello,
I am trying to get the list of all relations used in a SELECT query using the post parse analyze hook.
I can get all relations from top level FROM clause but I cannot get them for a simple subquery like:
select * from t1 where x1=(select max(x2) from t2);
My current code is:
static bool pgds_tree_walker(Query *node, void *context)
{
/*
* from setrefs.c
* extract_query_dependencies_walker
*/
if (node == NULL)
return false;
if (IsA(node, Query))
{
Query *query = (Query *) node;
ListCell *lc;
/* Collect relation OIDs in this Query's rtable */
foreach(lc, query->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
if (rte->rtekind == RTE_RELATION ||
(rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)) ||
(rte->rtekind == RTE_NAMEDTUPLESTORE && OidIsValid(rte->relid)))
{
/*
* from setrefs.c
* extract_query_dependencies_walker
*/
if (node == NULL)
return false;
if (IsA(node, Query))
{
Query *query = (Query *) node;
ListCell *lc;
/* Collect relation OIDs in this Query's rtable */
foreach(lc, query->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
if (rte->rtekind == RTE_RELATION ||
(rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)) ||
(rte->rtekind == RTE_NAMEDTUPLESTORE && OidIsValid(rte->relid)))
elog(INFO, "pgds_tree_walker: relid=%d", rte->relid);
}
/*
* from rewriteHandler.c
* AcquireRewriteLocks
*/
if (rte->rtekind == RTE_SUBQUERY)
return query_tree_walker(rte->subquery, pgds_tree_walker, (void *) context, QTW_EXAMINE_RTES_BEFORE);
}
/* And recurse ...*/
query_tree_walker(query, pgds_tree_walker, (void *) context, QTW_EXAMINE_RTES_BEFORE);
}
}
/*
* build_rel_array
*/
static void pgds_build_rel_array(Query *query)
{
ListCell *cell;
Oid rel_id;
void * context = NULL;
bool result;
foreach(cell, query->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(cell);
rel_id = rte->relid;
if (pgds_rel_index < MAX_REL )
{
pgds_rel_array[pgds_rel_index] = rel_id;
pgds_rel_index++;
}
else elog(ERROR, "pgds_build_rel_array: too many relations (%d)", MAX_REL);
}
result = query_tree_walker(query, pgds_tree_walker, context, QTW_EXAMINE_RTES_BEFORE);
}
/*
* from rewriteHandler.c
* AcquireRewriteLocks
*/
if (rte->rtekind == RTE_SUBQUERY)
return query_tree_walker(rte->subquery, pgds_tree_walker, (void *) context, QTW_EXAMINE_RTES_BEFORE);
}
/* And recurse ...*/
query_tree_walker(query, pgds_tree_walker, (void *) context, QTW_EXAMINE_RTES_BEFORE);
}
}
/*
* build_rel_array
*/
static void pgds_build_rel_array(Query *query)
{
ListCell *cell;
Oid rel_id;
void * context = NULL;
bool result;
foreach(cell, query->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(cell);
rel_id = rte->relid;
if (pgds_rel_index < MAX_REL )
{
pgds_rel_array[pgds_rel_index] = rel_id;
pgds_rel_index++;
}
else elog(ERROR, "pgds_build_rel_array: too many relations (%d)", MAX_REL);
}
result = query_tree_walker(query, pgds_tree_walker, context, QTW_EXAMINE_RTES_BEFORE);
}
Could someone help me ?
Thanks
Hi, On Sat, Aug 19, 2023 at 03:26:06PM +0200, Pierre Forstmann wrote: > > I am trying to get the list of all relations used in a SELECT query using > the post parse analyze hook. > > I can get all relations from top level FROM clause but I cannot get them > for a simple subquery like: > > select * from t1 where x1=(select max(x2) from t2); You're missing at least the sublinks. Since you're looking at AcquireRewriteLocks() as an example, look at how acquireLocksOnSubLinks() is being called. In general, if you're not sure of how some constructs are represented in a Query the easiest way to get a good overview is to simply do something like static void pgds_build_rel_array(Query *query) { [...] elog(WARNING, "found %s", nodeToString(query)); You will get a textual representation of the query (or any specific node if needed) in the client and server log, so you can easily look for a given relid, Var or anything and identify what you were missing and adapt your walker function.