Rewritten Index Advisor patch - Mailing list pgsql-patches

From Tom Lane
Subject Rewritten Index Advisor patch
Date
Msg-id 27516.1180053940@sss.pgh.pa.us
Whole thread Raw
Responses Re: Rewritten Index Advisor patch  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
Awhile back I complained that I didn't like the way that the index advisor
patch plugged into the system:
http://archives.postgresql.org/pgsql-hackers/2007-04/msg00346.php

Attached is a proposed replacement patch that keeps essentially all the
advisor logic outside the core backend, and uses the method I suggested of
modifying the result of get_relation_info() rather than installing phony
system-catalog entries.  Most of the patch bulk is actually just a small
refactoring of ExplainOnePlan's API to make it more convenient to call
from an advisor plugin.  I also added hooks to let an advisor work through
EXPLAIN, as I still maintain is a more useful behavior than doubling the
work involved in every planner call.  However, the planner() hook is still
there for those who are insistent.

To test the code, I made up a silly little proof-of-concept "advisor" that
just checks to see if 2-column indexes would be more helpful if the column
order were switched.  It's incomplete because I didn't do anything about
printing out a nice explanation of what the hypothetical index is.

Comments, objections?

            regards, tom lane

regression=# create table fooey as select unique1,unique2 from tenk1;
SELECT
regression=# create index fooeyi on fooey(unique1,unique2);
CREATE INDEX
regression=# analyze fooey;
ANALYZE
regression=# explain select * from fooey order by unique2,unique1;
                           QUERY PLAN
-----------------------------------------------------------------
 Sort  (cost=809.39..834.39 rows=10000 width=8)
   Sort Key: unique2, unique1
   ->  Seq Scan on fooey  (cost=0.00..145.00 rows=10000 width=8)
(3 rows)

regression=# explain select * from fooey where unique2 in (1,2,3);
                      QUERY PLAN
-------------------------------------------------------
 Seq Scan on fooey  (cost=0.00..182.50 rows=3 width=8)
   Filter: (unique2 = ANY ('{1,2,3}'::integer[]))
(2 rows)

regression=# load '/home/tgl/pgsql/advisor';
LOAD
regression=# explain select * from fooey order by unique2,unique1;
                                       QUERY PLAN
----------------------------------------------------------------------------------------
 Sort  (cost=809.39..834.39 rows=10000 width=8)
   Sort Key: unique2, unique1
   ->  Seq Scan on fooey  (cost=0.00..145.00 rows=10000 width=8)

 Plan with hypothetical indexes:
 Index Scan using <hypothetical index> on fooey  (cost=0.00..376.00 rows=10000 width=8)
(6 rows)

regression=# explain select * from fooey where unique2 in (1,2,3);
                                     QUERY PLAN
------------------------------------------------------------------------------------
 Seq Scan on fooey  (cost=0.00..182.50 rows=3 width=8)
   Filter: (unique2 = ANY ('{1,2,3}'::integer[]))

 Plan with hypothetical indexes:
 Bitmap Heap Scan on fooey  (cost=12.78..22.49 rows=3 width=8)
   Recheck Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
   ->  Bitmap Index Scan on <hypothetical index>  (cost=0.00..12.77 rows=3 width=0)
         Index Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
(8 rows)
#include "postgres.h"

#include "fmgr.h"
#include "commands/explain.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"


PG_MODULE_MAGIC;

void        _PG_init(void);
void        _PG_fini(void);

static void my_get_relation_info(PlannerInfo *root, Oid relationObjectId,
                                 bool inhparent, RelOptInfo *rel);
static const char *my_explain_get_index_name(Oid indexId);


/*
 * Main hook to take over control during EXPLAIN of a query
 */
static void
my_ExplainOneQuery(Query *query, ExplainStmt *stmt, const char *queryString,
                   ParamListInfo params, TupOutputState *tstate)
{
    Query    *savequery = NULL;
    PlannedStmt *plan;

    /*
     * Because planner scribbles on its input, we have to make a copy if
     * we want to plan twice.
     */
    if (!stmt->analyze)
        savequery = copyObject(query);

    /* plan the query */
    plan = planner(query, 0, params);

    /* run it (if needed) and produce output */
    ExplainOnePlan(plan, params, stmt, tstate);

    /* if not EXPLAIN ANALYZE, try it with hypothetical indexes */
    if (!stmt->analyze)
    {
        PG_TRY();
        {
            PlannedStmt *hplan;

            /* Enable special processing */
            get_relation_info_hook = my_get_relation_info;
            explain_get_index_name_hook = my_explain_get_index_name;

            /* plan the query */
            hplan = planner(savequery, 0, params);

            /* if we got a better plan, print it */
            if (hplan->planTree->total_cost < plan->planTree->total_cost)
            {
                do_text_output_oneline(tstate, ""); /* separator line */
                do_text_output_oneline(tstate, "Plan with hypothetical indexes:");
                ExplainOnePlan(hplan, params, stmt, tstate);
            }
        }
        PG_CATCH();
        {
            /* Make sure hooks get cleared on error exit */
            get_relation_info_hook = NULL;
            explain_get_index_name_hook = NULL;
            PG_RE_THROW();
        }
        PG_END_TRY();
        get_relation_info_hook = NULL;
        explain_get_index_name_hook = NULL;
    }
}


/*
 * Get control during planner's get_relation_info() function, which sets up
 * a RelOptInfo struct based on the system catalog contents.  We can modify
 * the struct contents to cause the planner to work with a hypothetical
 * situation rather than what's actually in the catalogs.
 *
 * This simplistic example looks for two-column indexes on any two columns
 * (a,b), and sets up a hypothetical index in the other column order (b,a).
 */
static void
my_get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
                     RelOptInfo *rel)
{
    ListCell   *ilist;

    /* Do nothing for an inheritance parent RelOptInfo */
    if (inhparent)
        return;

    foreach(ilist, rel->indexlist)
    {
        IndexOptInfo *oldindex = (IndexOptInfo *) lfirst(ilist);

        if (oldindex->ncolumns == 2)
        {
            IndexOptInfo *info;
            int            ncolumns = oldindex->ncolumns;
            int            i;
            ListCell   *lc;

            info = makeNode(IndexOptInfo);

            /* flat-copy as many fields as we can */
            memcpy(info, oldindex, sizeof(IndexOptInfo));

            /*
             * In this toy example we just assign all hypothetical indexes
             * OID 0, and the explain_get_index_name hook just prints
             * <hypothetical index>.  In a realistic situation we'd probably
             * assume that OIDs smaller than, say, 100 are never the OID of
             * any real index, allowing us to identify one of up to 100
             * hypothetical indexes per plan.  Then we'd need to save aside
             * some state data that would let the explain hooks print info
             * about the selected index.
             */
            info->indexoid = InvalidOid;

            /*
             * Need to make opfamily array large enough to put a terminating
             * zero at the end.
             */
            info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
            info->opfamily = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1));
            /* initialize these to zeroes in case index is unordered */
            info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
            info->revsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
            info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);

            /* Reverse the order of the per-column arrays */
            for (i = 0; i < ncolumns; i++)
            {
                info->indexkeys[i] = oldindex->indexkeys[1 - i];
                info->opfamily[i] = oldindex->opfamily[1 - i];
                info->fwdsortop[i] = oldindex->fwdsortop[1 - i];
                info->revsortop[i] = oldindex->revsortop[1 - i];
                info->nulls_first[i] = oldindex->nulls_first[1 - i];
            }

            /* must reverse the order of the indexprs list, too */
            info->indexprs = NIL;
            foreach(lc, oldindex->indexprs)
            {
                Node *node = lfirst(lc);

                info->indexprs = lcons(node, info->indexprs);
            }

            /*
             * Put the phony IndexOptInfo on the front of the list, so that
             * we don't get into an infinite loop.
             */
            rel->indexlist = lcons(info, rel->indexlist);
        }
    }
}


/*
 * Hook to let EXPLAIN print something for a nonexistent index
 *
 * This is too simplistic for real-world use.
 */
static const char *
my_explain_get_index_name(Oid indexId)
{
    if (indexId == InvalidOid)
        return "<hypothetical index>";

    return NULL;                /* allow default behavior */
}


/*
 * _pg_init()            - library load-time initialization
 *
 * DO NOT make this static nor change its name!
 */
void
_PG_init(void)
{
    /* Get into the hooks we need to be in all the time */
    ExplainOneQuery_hook = my_ExplainOneQuery;
}


/*
 * _PG_fini()            - library unload-time finalization
 *
 * DO NOT make this static nor change its name!
 */
void
_PG_fini(void)
{
    /* Get out of all the hooks (just to be sure) */
    ExplainOneQuery_hook = NULL;
    get_relation_info_hook = NULL;
    explain_get_index_name_hook = NULL;
}

Attachment

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: Configure template change to use SysV Semaphors on darwin
Next
From: Joachim Wieland
Date:
Subject: cluster test