Re: best way to fetch next/prev record based on index - Mailing list pgsql-performance

From Greg Stark
Subject Re: best way to fetch next/prev record based on index
Date
Msg-id 87isc8txg6.fsf@stark.xeocode.com
Whole thread Raw
In response to Re: best way to fetch next/prev record based on index  (Greg Stark <gsstark@mit.edu>)
List pgsql-performance
Greg Stark <gsstark@MIT.EDU> writes:

> Fixing it to write out complex boolean expressions wouldn't be too hard, but
> I'm not clear it would be worth it, since I suspect the end result would be as
> the comment indicates, to introduce a new runtime node.

Just to prove that it isn't really all that hard, I took a stab at doing this.
This is basically only my second attempt to write even trivial bits of server
backend code so I certainly don't suggest anyone try using this code.

In fact it doesn't quite compile, because I have a bit of confusion between
the char *oprname and List *opname variables. Now I could clear that up, but
I'm missing one piece of the puzzle. To make it work I do need a way to
construct a List *opname from ">" or "=" and I don't know how to do that.

I think that's all I'm missing, but perhaps in the morning I'll look at this
code and wonder "what was I thinking?!"


This approach won't get the optimizer to actually use an index for these
comparisons, but it will fix the semantics to match the spec. Later we can
either improve the optimizer to detect expressions like this (which I think
would be cooler since some users may write them by hand and not use the
row-expression approach, but I don't see how to do it), or introduce a new
run-time node and have the optimizer handle it. But at least we won't have to
worry about backwards-compatibility issues with the semantics changing.

Oh, I tried to stick to the style, but sometimes I couldn't help myself. I
suppose I would have to fix up the style the rest of the way if I got it
working and you wanted a patch to apply.


/*
 * Transform a "row op row" construct
 */
static Node *
make_row_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree)
{
    Node       *result = NULL;
    RowExpr    *lrow,
               *rrow;
    List       *largs,
               *rargs;
    char       *oprname;

    /* Inputs are untransformed RowExprs */
    lrow = (RowExpr *) transformExpr(pstate, ltree);
    rrow = (RowExpr *) transformExpr(pstate, rtree);
    Assert(IsA(lrow, RowExpr));
    Assert(IsA(rrow, RowExpr));
    largs = lrow->args;
    rargs = rrow->args;

    if (list_length(largs) != list_length(rargs))
        ereport(ERROR,
                (errcode(ERRCODE_SYNTAX_ERROR),
                 errmsg("unequal number of entries in row expression")));

    oprname = strVal(llast(opname));

    if (strcmp(oprname, "=") == 0)
    {
        result = make_row_op_simple(pstate, "=", largs, rargs);
    }

    else if (strcmp(oprname, "<>") == 0)
    {
        result = make_row_op_simple(pstate, "<>", largs, rargs);
    }

    else if ((strcmp(oprname, "<") == 0) ||
             (strcmp(oprname, ">") == 0))
    {
        result = make_row_op_complex(pstate, oprname, largs, rargs);
    }

    /* alternatively these last two could just create negated < and >
     * expressions. Which is better depends on whether the extra clause
     * confuses the optimizer more or less than having to push the NOTs down
     */

    else if (strcmp(oprname, ">=") == 0)
    {
        Node *branch = make_row_op_simple(pstate, "=", largs, rargs);
        result = make_row_op_complex(pstate, ">", largs, rargs);
        result = (Node *) makeBoolExpr(OR_EXPR, list_make2(result, branch));
    }

    else if (strcmp(oprname, "<=") == 0)
    {
        Node *branch = make_row_op_simple(pstate, "=", largs, rargs);
        result = make_row_op_complex(pstate, "<", largs, rargs);
        result = (Node *) makeBoolExpr(OR_EXPR, list_make2(result, branch));
    }


    else
    {
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("operator %s is not supported for row expressions",
                        oprname)));
    }

    return result;
}

/*
 * Handle something like
 * (A,B,C) = (X,Y,Z)
 * By constructing
 * (A=X) AND (B=Y) AND (C=Z)
 *
 */

static Node *
make_row_op_simple(ParseState *pstate, char *oprname,
                   List *largs, List *rargs)
{
    ListCell *l, *r;
    BoolExprType boolop;
    Node *result;

    boolop = strcmp(oprname, "<>")==0 ? OR_EXPR : AND_EXPR;

    forboth(l, largs, r, rargs)
    {
        Node    *larg = (Node *) lfirst(l);
        Node    *rarg = (Node *) lfirst(r);
        Node    *cmp;

        cmp = (Node *) make_op(pstate, opname, larg, rarg);
        cmp = coerce_to_boolean(pstate, cmp, "row comparison");
        if (result == NULL)
            result = cmp;
        else
            result = (Node *) makeBoolExpr(boolop,
                                           list_make2(result, cmp));
    }

    if (result == NULL)
    {
        /* zero-length rows?  Generate constant TRUE or FALSE */
        if (boolop == AND_EXPR)
            result = makeBoolConst(true, false);
        else
            result = makeBoolConst(false, false);
    }

    return result;
}


/*
 * Handles something like:
 * (A,B,C) > (X,Y,Z)
 *
 * By constructing something like:
 * ( ( A > X) OR (A=X AND B>Y) OR (A=X AND B=Y AND C>Z) )
 *
 */

static Node *
make_row_op_complex(ParseState *pstate, char *oprname,
                    List *largs, List *rargs)
{
    ListCell   *l, *outer_l,
               *r, *outer_r;
    Node *result;

    forboth(outer_l, largs, outer_r, rargs)
    {
        Node *outer_larg = (Node *) lfirst(outer_l);
        Node *outer_rarg = (Node *) lfirst(outer_r);
        Node *branch = NULL;
        Node *cmp;

        /* all leading elements have to be equal */
        forboth(l, largs, r, rargs)
        {
            Node    *larg = (Node *) lfirst(l);
            Node    *rarg = (Node *) lfirst(r);
            Node    *cmp;

            if (larg == outer_larg) {
                break;
            }

            cmp = (Node *) make_op(pstate, "=", larg, rarg);
            cmp = coerce_to_boolean(pstate, cmp, "row comparison");
            if (branch == NULL)
                branch = cmp;
            else
                branch = (Node *) makeBoolExpr(AND_EXPR,
                                               list_make2(branch, cmp));
        }

        /* trailing element has to be strictly greater or less than */

        cmp = (Node *) make_op(pstate, oprname, outer_larg, outer_rarg);
        cmp = coerce_to_boolean(pstate, cmp, "row comparison");
        branch = branch==NULL ? cmp : (Node *) makeBoolExpr(AND_EXPR, list_make2(branch, cmp));

        result = result==NULL ? branch : (Node *) makeBoolExpr(OR_EXPR, list_make2(result, branch));
    }

    if (result == NULL)
    {
        /* zero-length rows?  Generate constant FALSE */
        result = makeBoolConst(true, false);
    }

    return result;
}

--
greg

pgsql-performance by date:

Previous
From: Greg Stark
Date:
Subject: Re: best way to fetch next/prev record based on index
Next
From: "Merlin Moncure"
Date:
Subject: Re: best way to fetch next/prev record based on index