Awhile back I complained that I didn't like the way that the index advisor
patch plugged into the system:
http://archives.postgresql.org/pgsql-hackers/2007-04/msg00346.php
Attached is a proposed replacement patch that keeps essentially all the
advisor logic outside the core backend, and uses the method I suggested of
modifying the result of get_relation_info() rather than installing phony
system-catalog entries. Most of the patch bulk is actually just a small
refactoring of ExplainOnePlan's API to make it more convenient to call
from an advisor plugin. I also added hooks to let an advisor work through
EXPLAIN, as I still maintain is a more useful behavior than doubling the
work involved in every planner call. However, the planner() hook is still
there for those who are insistent.
To test the code, I made up a silly little proof-of-concept "advisor" that
just checks to see if 2-column indexes would be more helpful if the column
order were switched. It's incomplete because I didn't do anything about
printing out a nice explanation of what the hypothetical index is.
Comments, objections?
regards, tom lane
regression=# create table fooey as select unique1,unique2 from tenk1;
SELECT
regression=# create index fooeyi on fooey(unique1,unique2);
CREATE INDEX
regression=# analyze fooey;
ANALYZE
regression=# explain select * from fooey order by unique2,unique1;
QUERY PLAN
-----------------------------------------------------------------
Sort (cost=809.39..834.39 rows=10000 width=8)
Sort Key: unique2, unique1
-> Seq Scan on fooey (cost=0.00..145.00 rows=10000 width=8)
(3 rows)
regression=# explain select * from fooey where unique2 in (1,2,3);
QUERY PLAN
-------------------------------------------------------
Seq Scan on fooey (cost=0.00..182.50 rows=3 width=8)
Filter: (unique2 = ANY ('{1,2,3}'::integer[]))
(2 rows)
regression=# load '/home/tgl/pgsql/advisor';
LOAD
regression=# explain select * from fooey order by unique2,unique1;
QUERY PLAN
----------------------------------------------------------------------------------------
Sort (cost=809.39..834.39 rows=10000 width=8)
Sort Key: unique2, unique1
-> Seq Scan on fooey (cost=0.00..145.00 rows=10000 width=8)
Plan with hypothetical indexes:
Index Scan using <hypothetical index> on fooey (cost=0.00..376.00 rows=10000 width=8)
(6 rows)
regression=# explain select * from fooey where unique2 in (1,2,3);
QUERY PLAN
------------------------------------------------------------------------------------
Seq Scan on fooey (cost=0.00..182.50 rows=3 width=8)
Filter: (unique2 = ANY ('{1,2,3}'::integer[]))
Plan with hypothetical indexes:
Bitmap Heap Scan on fooey (cost=12.78..22.49 rows=3 width=8)
Recheck Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
-> Bitmap Index Scan on <hypothetical index> (cost=0.00..12.77 rows=3 width=0)
Index Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
(8 rows)
#include "postgres.h"
#include "fmgr.h"
#include "commands/explain.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
PG_MODULE_MAGIC;
void _PG_init(void);
void _PG_fini(void);
static void my_get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
static const char *my_explain_get_index_name(Oid indexId);
/*
* Main hook to take over control during EXPLAIN of a query
*/
static void
my_ExplainOneQuery(Query *query, ExplainStmt *stmt, const char *queryString,
ParamListInfo params, TupOutputState *tstate)
{
Query *savequery = NULL;
PlannedStmt *plan;
/*
* Because planner scribbles on its input, we have to make a copy if
* we want to plan twice.
*/
if (!stmt->analyze)
savequery = copyObject(query);
/* plan the query */
plan = planner(query, 0, params);
/* run it (if needed) and produce output */
ExplainOnePlan(plan, params, stmt, tstate);
/* if not EXPLAIN ANALYZE, try it with hypothetical indexes */
if (!stmt->analyze)
{
PG_TRY();
{
PlannedStmt *hplan;
/* Enable special processing */
get_relation_info_hook = my_get_relation_info;
explain_get_index_name_hook = my_explain_get_index_name;
/* plan the query */
hplan = planner(savequery, 0, params);
/* if we got a better plan, print it */
if (hplan->planTree->total_cost < plan->planTree->total_cost)
{
do_text_output_oneline(tstate, ""); /* separator line */
do_text_output_oneline(tstate, "Plan with hypothetical indexes:");
ExplainOnePlan(hplan, params, stmt, tstate);
}
}
PG_CATCH();
{
/* Make sure hooks get cleared on error exit */
get_relation_info_hook = NULL;
explain_get_index_name_hook = NULL;
PG_RE_THROW();
}
PG_END_TRY();
get_relation_info_hook = NULL;
explain_get_index_name_hook = NULL;
}
}
/*
* Get control during planner's get_relation_info() function, which sets up
* a RelOptInfo struct based on the system catalog contents. We can modify
* the struct contents to cause the planner to work with a hypothetical
* situation rather than what's actually in the catalogs.
*
* This simplistic example looks for two-column indexes on any two columns
* (a,b), and sets up a hypothetical index in the other column order (b,a).
*/
static void
my_get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
RelOptInfo *rel)
{
ListCell *ilist;
/* Do nothing for an inheritance parent RelOptInfo */
if (inhparent)
return;
foreach(ilist, rel->indexlist)
{
IndexOptInfo *oldindex = (IndexOptInfo *) lfirst(ilist);
if (oldindex->ncolumns == 2)
{
IndexOptInfo *info;
int ncolumns = oldindex->ncolumns;
int i;
ListCell *lc;
info = makeNode(IndexOptInfo);
/* flat-copy as many fields as we can */
memcpy(info, oldindex, sizeof(IndexOptInfo));
/*
* In this toy example we just assign all hypothetical indexes
* OID 0, and the explain_get_index_name hook just prints
* <hypothetical index>. In a realistic situation we'd probably
* assume that OIDs smaller than, say, 100 are never the OID of
* any real index, allowing us to identify one of up to 100
* hypothetical indexes per plan. Then we'd need to save aside
* some state data that would let the explain hooks print info
* about the selected index.
*/
info->indexoid = InvalidOid;
/*
* Need to make opfamily array large enough to put a terminating
* zero at the end.
*/
info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
info->opfamily = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1));
/* initialize these to zeroes in case index is unordered */
info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
info->revsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);
/* Reverse the order of the per-column arrays */
for (i = 0; i < ncolumns; i++)
{
info->indexkeys[i] = oldindex->indexkeys[1 - i];
info->opfamily[i] = oldindex->opfamily[1 - i];
info->fwdsortop[i] = oldindex->fwdsortop[1 - i];
info->revsortop[i] = oldindex->revsortop[1 - i];
info->nulls_first[i] = oldindex->nulls_first[1 - i];
}
/* must reverse the order of the indexprs list, too */
info->indexprs = NIL;
foreach(lc, oldindex->indexprs)
{
Node *node = lfirst(lc);
info->indexprs = lcons(node, info->indexprs);
}
/*
* Put the phony IndexOptInfo on the front of the list, so that
* we don't get into an infinite loop.
*/
rel->indexlist = lcons(info, rel->indexlist);
}
}
}
/*
* Hook to let EXPLAIN print something for a nonexistent index
*
* This is too simplistic for real-world use.
*/
static const char *
my_explain_get_index_name(Oid indexId)
{
if (indexId == InvalidOid)
return "<hypothetical index>";
return NULL; /* allow default behavior */
}
/*
* _pg_init() - library load-time initialization
*
* DO NOT make this static nor change its name!
*/
void
_PG_init(void)
{
/* Get into the hooks we need to be in all the time */
ExplainOneQuery_hook = my_ExplainOneQuery;
}
/*
* _PG_fini() - library unload-time finalization
*
* DO NOT make this static nor change its name!
*/
void
_PG_fini(void)
{
/* Get out of all the hooks (just to be sure) */
ExplainOneQuery_hook = NULL;
get_relation_info_hook = NULL;
explain_get_index_name_hook = NULL;
}