Re: GEQO: ERX - Mailing list pgsql-hackers
From | Adriano Lange |
---|---|
Subject | Re: GEQO: ERX |
Date | |
Msg-id | 4A132919.3040708@gmail.com Whole thread Raw |
In response to | Re: GEQO: ERX (Adriano Lange <alange0001@gmail.com>) |
List | pgsql-hackers |
Adriano Lange escreveu: > I implemented the 2PO algorithm last month but I didn't have much time > to do an extensive test and to comment all code. The code was posted in > this list in a previous thread. In that occasion, I was interested in a > kind of cache structure to avoid the constructing a complete RelOptInfo > from scratch every time when the cheapest total_cost must be calculated > (this occur in GEQO). > > I’m sending a patch for the 8.3 release. I forgot it. > > I also changed some GUC variables to facilitate some tests: > (remove) geqo > (remove) geqo_threshold > (add) ljqo_enable (bool) – activate Large Join Query Optimizer > (add) ljqo_algorithm {geqo|twopo} > (add) ljqo_threshold (int) – like geqo_threshold > (add) twopo_heuristic_states (bool) – initial heuristic states > (add) twopo_ii_stop (int) – II phase loop > (add) twopo_sa_phase (bool) – enable SA phase > (add) twopo_sa_initial_temperature (float) > (add) twopo_sa_temperature_reduction (float) > (add) twopo_sa_equilibrium (int) > > In my little tests, this algorithm seems equal or worse than geqo, > except when using heuristic in order to bias the initial state. Maybe > some tunings are needed but I prefer spend yet some time reading more > about the compressed annealing, cited in TODO list. Anyway, I think that > to build another annealing-like algorithm might be easier if some > structures and functions in 2PO source code are correct. > > Sincerely, > > Adriano Lange > Index: src/include/optimizer/ljqo.h =================================================================== --- src/include/optimizer/ljqo.h (revisão 0) +++ src/include/optimizer/ljqo.h (revisão 14) @@ -0,0 +1,39 @@ +/* + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * contributed by: + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * Adriano Lange * C3SL - Centro de Computação * + * adriano@c3sl.ufpr.br * Científica e Software Livre / * + * * Departamento de Informática / * + * * Universidade Federal do Paraná * + * * Curitiba, Brasil * + * * * + * * http://www.c3sl.ufpr.br * + * * * + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + */ + +#ifndef LJQO_H +#define LJQO_H + +#include "nodes/relation.h" + +typedef enum ljqo_algorithms { + ljqo_a_geqo, + ljqo_a_twopo, +} ljqo_algorithms; + +#define DEFAULT_LJQO_ENABLE true +#define MIN_LJQO_THRESHOLD 2 +#define DEFAULT_LJQO_THRESHOLD 10 +#define DEFAULT_LJQO_ALGORITHM ljqo_a_geqo +#define DEFAULT_LJQO_ALGORITHM_STR "geqo" + +const char * assign_ljqo_algorithm(const char *newval, bool doit); +RelOptInfo * ljqo(PlannerInfo *root, int levels_needed, List *initial_rels); + + +#endif /* LJQO_H */ Index: src/include/optimizer/twopo.h =================================================================== --- src/include/optimizer/twopo.h (revisão 0) +++ src/include/optimizer/twopo.h (revisão 14) @@ -0,0 +1,52 @@ +/* + * twopo.h + * Two Phase Optimization + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * contributed by: + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * Adriano Lange * C3SL - Centro de Computação * + * adriano@c3sl.ufpr.br * Científica e Software Livre / * + * * Departamento de Informática / * + * * Universidade Federal do Paraná * + * * Curitiba, Brasil * + * * * + * * http://www.c3sl.ufpr.br * + * * * + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * + * Implementation based on: + * Y. E. Ioannidis and Y. Kang, "Randomized algorithms for optimizing + * large join queries," SIGMOD Rec., vol. 19, no. 2, pp. 312–321, 1990. + */ + +#ifndef TWOPO_H +#define TWOPO_H + +#include "nodes/relation.h" + +#define DEFAULT_TWOPO_HEURISTIC_STATES true +#define DEFAULT_TWOPO_II_STOP 10 +#define MIN_TWOPO_II_STOP 1 +#define DEFAULT_TWOPO_SA_PHASE true +#define DEFAULT_TWOPO_SA_INITIAL_TEMPERATURE 0.2 +#define MIN_TWOPO_SA_INITIAL_TEMPERATURE 0.01 +#define DEFAULT_TWOPO_SA_TEMPERATURE_REDUCTION 0.95 +#define MIN_TWOPO_SA_TEMPERATURE_REDUCTION 0.1 +#define DEFAULT_TWOPO_SA_EQUILIBRIUM 16 +#define MIN_TWOPO_SA_EQUILIBRIUM 1 + +extern bool twopo_heuristic_states; +extern int twopo_ii_stop; +extern bool twopo_sa_phase; +extern double twopo_sa_initial_temperature; // T = X * cost(S0) +extern double twopo_sa_temperature_reduction; // Tnew = X * Told +extern int twopo_sa_equilibrium; // E * Joins + + +extern RelOptInfo *twopo(PlannerInfo *root, + int number_of_rels, List *initial_rels); + +#endif /* TWOPO_H */ Index: src/include/optimizer/paths.h =================================================================== --- src/include/optimizer/paths.h (revisão 2) +++ src/include/optimizer/paths.h (revisão 14) @@ -20,8 +20,8 @@ /* * allpaths.c */ -extern bool enable_geqo; -extern int geqo_threshold; +extern bool ljqo_enable; +extern int ljqo_threshold; /* Hook for plugins to replace standard_join_search() */ typedef RelOptInfo *(*join_search_hook_type) (PlannerInfo *root, Index: src/include/utils/guc_tables.h =================================================================== --- src/include/utils/guc_tables.h (revisão 2) +++ src/include/utils/guc_tables.h (revisão 14) @@ -55,6 +55,8 @@ QUERY_TUNING, QUERY_TUNING_METHOD, QUERY_TUNING_COST, + QUERY_TUNING_LJQO, + QUERY_TUNING_TWOPO, QUERY_TUNING_GEQO, QUERY_TUNING_OTHER, LOGGING, Index: src/backend/optimizer/twopo/twopo.c =================================================================== --- src/backend/optimizer/twopo/twopo.c (revisão 0) +++ src/backend/optimizer/twopo/twopo.c (revisão 14) @@ -0,0 +1,706 @@ +/* + * twopo.c + * Two Phase Optimization + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * contributed by: + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * Adriano Lange * C3SL - Centro de Computação * + * adriano@c3sl.ufpr.br * Científica e Software Livre / * + * * Departamento de Informática / * + * * Universidade Federal do Paraná * + * * Curitiba, Brasil * + * * * + * * http://www.c3sl.ufpr.br * + * * * + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * + * Implementation based on: + * Y. E. Ioannidis and Y. Kang, "Randomized algorithms for optimizing + * large join queries," SIGMOD Rec., vol. 19, no. 2, pp. 312–321, 1990. + */ + +#include "postgres.h" + +#include <math.h> + +#include "optimizer/twopo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/joininfo.h" +#include "parser/parsetree.h" +#include "utils/memutils.h" + +//#define TWOPO_DEBUG + +#define TWOPO_CACHE_PLANS + +#define nodeCost(node) node->rel->cheapest_total_path->total_cost + +#define swapValues(type,v1,v2) \ + { \ + type aux = v1; \ + v1 = v2; \ + v2 = aux; \ + } + +// heuristic initial states (see makeInitialState()) +bool twopo_heuristic_states = DEFAULT_TWOPO_HEURISTIC_STATES; +// number of initial states in Iterative Improvement (II) phase +int twopo_ii_stop = DEFAULT_TWOPO_II_STOP; +// enable Simulated Annealing (SA) phase +bool twopo_sa_phase = DEFAULT_TWOPO_SA_PHASE; +// SA initial temperature: T = X * cost( min_state_from_ii_phase ) +double twopo_sa_initial_temperature = DEFAULT_TWOPO_SA_INITIAL_TEMPERATURE; +// SA temperature reduction: Tnew = X * Told +double twopo_sa_temperature_reduction = DEFAULT_TWOPO_SA_TEMPERATURE_REDUCTION; +// SA inner loop equilibrium: for( i=0; i < E * Joins ; i++ ) +int twopo_sa_equilibrium = DEFAULT_TWOPO_SA_EQUILIBRIUM; + +/** + * treeNode: + * Optimizer's main struct. + * Represent either a base relation or a binary join operation. + * It has cache support (see joinNodes()). + */ +typedef struct treeNode { + RelOptInfo *rel; + List *parents; + struct treeNode *inner_child; + struct treeNode *outer_child; + // only for two-level nodes: (used in buildBushyTree()) + int head_idx; + int tail_idx; +} treeNode; + +/** + * tempCtx: + * Temporary memory context struct. + * Store main informations needed context switch. + */ +typedef struct tempCtx { + MemoryContext mycontext; + MemoryContext oldcxt; + int savelength; + struct HTAB *savehash; +} tempCtx; + +static treeNode* +createTreeNodes( int items ) +{ + return (treeNode*) palloc0(items * sizeof(treeNode)); +} + +static treeNode* +buildOneLevelNodes( List *initial_rels, int levels_needed ) +{ + int i = 0; + ListCell *x; + RelOptInfo *rel; + treeNode *oneLevelNodes = createTreeNodes( levels_needed ); + + foreach(x, initial_rels) + { + rel = (RelOptInfo *) lfirst(x); + oneLevelNodes[i++].rel = rel; + } + + return oneLevelNodes; +} + +static treeNode* +joinNodes( PlannerInfo *root, treeNode *inner_node, treeNode *outer_node ) +{ + treeNode *new_node = NULL; + RelOptInfo *jrel; + +# ifdef TWOPO_CACHE_PLANS + if ( inner_node->parents ) { + ListCell *x; + treeNode *node; + foreach( x, inner_node->parents ) + { + node = lfirst(x); + if( node->inner_child == outer_node + || node->outer_child == outer_node ) + { + new_node = node; + break; + } + } + } +# endif + + if ( ! new_node ) { + if( bms_overlap(inner_node->rel->relids, outer_node->rel->relids ) ){ + elog( ERROR, "joinNodes(): Overlap error!"); + } + jrel = make_join_rel(root, inner_node->rel, outer_node->rel); + if (jrel) { + set_cheapest( jrel ); + new_node = createTreeNodes(1); + new_node->rel = jrel; + new_node->inner_child = inner_node; + new_node->outer_child = outer_node; + inner_node->parents = lcons(new_node, + inner_node->parents); + outer_node->parents = lcons(new_node, + outer_node->parents); + } + + } + + return new_node; +} + +static List* +buildTwoLevelNodes( PlannerInfo *root, + treeNode *oneLevelNodes, int numOneLevelNodes) +{ + treeNode *new_node; + List *twolevelnodes = NULL; + int i,j; + RelOptInfo *rel1, + *rel2; + + for( i=0; i<numOneLevelNodes; i++ ) { + rel1 = oneLevelNodes[i].rel; + + for( j=i; j<numOneLevelNodes; j++ ) { + rel2 = oneLevelNodes[j].rel; + + if (!bms_overlap(rel1->relids, rel2->relids) && + (have_relevant_joinclause(root, rel1, rel2) || + have_join_order_restriction(root, rel1, rel2))) + { + new_node = joinNodes( root, oneLevelNodes+i, oneLevelNodes+j ); + if( new_node ){ + new_node->head_idx = i; + new_node->tail_idx = j; + twolevelnodes = lcons( new_node, twolevelnodes ); + } + } + } + if( ! oneLevelNodes[i].parents ) { + for( j=0; j<numOneLevelNodes; j++ ) { + if( i == j ) + continue; + rel2 = oneLevelNodes[j].rel; + + new_node = joinNodes( root, oneLevelNodes+i, oneLevelNodes+j ); + if( new_node ){ + new_node->head_idx = i; + new_node->tail_idx = j; + twolevelnodes = lcons( new_node, twolevelnodes ); + } + } + } + } + + return twolevelnodes; +} + +static inline int +find_root(int idx, int *parent_list) +{ + while( parent_list[idx] != idx ) + idx = parent_list[idx]; + + return idx; +} + +static void +join_trees(int *root1, int *root2, int *weight, int *parent, int *numTrees) +{ + if( weight[*root2]>weight[*root1] ){ + swapValues(int, *root1, *root2 ); + } + weight[*root1] += weight[*root2]; + parent[*root2] = parent[*root1]; + (*numTrees)--; +} + +static treeNode* +buildBushyTree(PlannerInfo *root, treeNode *leafNodes, int numLeaves, + treeNode **edgeList, int numEdges) +{ + treeNode **subtrees; /* partial plans of each tree */ + int i, + numSubtrees, /* number of trees */ + *parent, /* parent list. Used for tree detection */ + *weight, /* weight list. Used to decide the new root in join + * trees */ + last_join = -1; /* finally, point the index of final plan in + * subplan_list */ + + parent = (int*) palloc((numLeaves) * sizeof(int)); + weight = (int*) palloc((numLeaves) * sizeof(int)); + subtrees = (treeNode**) palloc((numLeaves) * sizeof(treeNode*)); + /* + * Initializing values... + */ + numSubtrees = numLeaves; + for (i=0; i < numLeaves; i++) + { + parent[i] = i; // todos os vértices são raízes de árvores + weight[i] = 1; // todas as árvores têm 1 vértice + subtrees[i] = NULL; + } + /* + * For each edge or while exists more that 1 sub-tree. + * Verify whether the edge belong to minimal spanning tree. + */ + for (i=0; i < numEdges && numSubtrees > 1; i++) // edge-by-edge loop + { + int root1, root2; + + /* + * Test the root of each relation in selected edge. + */ + root1 = find_root(edgeList[i]->head_idx, parent); + root2 = find_root(edgeList[i]->tail_idx, parent); + /* + * If both roots is not the same, the edge belong to the minimal + * spanning tree. Join the trees in parent[] and the execution plan + * in subplan_list[]. + */ + if (root1 != root2) + { + int other_root; + + /* + * Join two trees. root1 is the root of new tree. + */ + join_trees(&root1, &root2, weight, parent, &numSubtrees); + last_join = root1; + other_root = root2; + + /* + * Juntando planos de execução: + */ + if( ! subtrees[last_join] ){ + /* + * First join of tree. + */ + subtrees[last_join] = edgeList[i]; + } else if( ! subtrees[other_root] ) { + /* + * Left-deep join. + * Join one relation to a composed plan. + */ + treeNode *new_node; + new_node = joinNodes( root, subtrees[last_join], + leafNodes + other_root ); + if( new_node ){ + subtrees[last_join] = new_node; + } else { + elog(ERROR, "Não foi possível fazer left-deep join."); + } + } else { + /* + * Bushy-join. + * Join two composed plans. + */ + treeNode *new_node; + new_node = joinNodes(root, subtrees[last_join], + subtrees[other_root]); + if( new_node ){ + subtrees[last_join] = new_node; + } else { + elog(ERROR, "Não foi possível fazer bushy-join."); + } + } + } + } + + if( last_join == -1 ){ // exception + elog(ERROR,"Não foi possível gerar o plano."); + return NULL; + } + + return subtrees[last_join]; +} + +static void +randomState(treeNode **newState/*OUT*/, + treeNode **stateList/*IN*/, int stateSize/*IN*/) +{ + int i,j, + count = stateSize, + item; + treeNode **list; + + list = (treeNode**) palloc(stateSize * sizeof(treeNode*)); + for ( i=0; i<stateSize; i++ ){ + list[i] = stateList[i]; + } + for ( i=0; i<stateSize; i++ ){ + item = random()%count--; + newState[i] = list[item]; + for( j=item; j<count; j++ ){ + list[j] = list[j+1]; + } + } + pfree(list); +} + +static tempCtx* +createTemporaryContext( PlannerInfo *root ) +{ + tempCtx *ctx; + ctx = (tempCtx*) palloc(sizeof(tempCtx)); + + ctx->mycontext = AllocSetContextCreate(CurrentMemoryContext, + "TwoPO", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + ctx->oldcxt = MemoryContextSwitchTo(ctx->mycontext); + ctx->savelength = list_length(root->join_rel_list); + ctx->savehash = root->join_rel_hash; + + return ctx; +} + +static void +restoreOldContext( PlannerInfo *root, tempCtx *ctx, + treeNode **edgeList, int numEdges ) +{ + int i; + + root->join_rel_list = list_truncate(root->join_rel_list, + ctx->savelength); + root->join_rel_hash = ctx->savehash; + MemoryContextSwitchTo(ctx->oldcxt); + MemoryContextDelete(ctx->mycontext); + pfree(ctx); + + /* + * Cleaning parent nodes in edgeList deleted by MemoryContextDelete() + */ + for( i=0; i<numEdges; i++ ){ + edgeList[i]->parents = NULL; + } +} + +static void +neighbordState(treeNode** newState/*OUT*/, + treeNode** state/*IN*/, int stateSize/*IN*/) +{ + int i; + int item; + int method; + treeNode *aux; + + if( stateSize < 2 ) { + elog( ERROR, "neighbordState(): stateSize invalid ( < 2 )."); + } else if( stateSize == 2 ) { + newState[1] = state[0]; + newState[0] = state[1]; + } else { + for ( i=0; i<stateSize; i++ ){ + newState[i] = state[i] ; + } + + item = random() % stateSize; + method = random() % 2; + + switch( method ) { + case 0: // swap method + aux = newState[item]; + newState[item] = newState[(item+1)%stateSize]; + newState[(item+1)%stateSize] = aux; + break; + default: // 3cycle method (simple) + aux = newState[item]; + newState[item] = newState[(item+2)%stateSize]; + newState[(item+2)%stateSize] = newState[(item+1)%stateSize]; + newState[(item+1)%stateSize] = aux; + } + } +} + +static Cost +stateCost(PlannerInfo *root, treeNode *leafNodes, int numLeaves, + treeNode **state, int stateSize) +{ + treeNode *node; + node = buildBushyTree( root, leafNodes, numLeaves, + state, stateSize); + return nodeCost(node); +} + +static Cost +improveState(PlannerInfo *root/*IN*/, + treeNode *leafNodes/*IN*/, int numLeaves/*IN*/, + treeNode **currentState/*IN*/, int stateSize/*IN*/, + treeNode **newState/*OUT*/) +{ + treeNode **new_state; + treeNode **cheapest_state; + Cost new_cost; + Cost cheapest_cost; + int i; + int local_minimum = stateSize; + + new_state = (treeNode**) palloc(stateSize * sizeof(treeNode*)); + cheapest_state = (treeNode**) palloc(stateSize * sizeof(treeNode*)); + + for( i=0; i<stateSize; i++ ) + cheapest_state[i] = currentState[i]; + cheapest_cost = stateCost( root, leafNodes, numLeaves, + cheapest_state, stateSize); + + i = 0; + while( i < local_minimum ){ + neighbordState(new_state,cheapest_state,stateSize); + new_cost = stateCost( root, leafNodes, numLeaves, + new_state, stateSize); + if( new_cost < cheapest_cost ) + { + swapValues(treeNode**,cheapest_state,new_state); + cheapest_cost = new_cost; + i=0; + } else { + i++; + } + } + + for( i=0; i<stateSize; i++ ){ + newState[i] = cheapest_state[i]; + } + pfree(new_state); + pfree(cheapest_state); + + return cheapest_cost; +} + +static void +prepareOptimizer( PlannerInfo *root, int levels_needed, List *initial_rels, + treeNode **leafNodes/*OUT*/, + treeNode ***edgeList/*OUT*/, int *numEdges/*OUT*/) +{ + ListCell *x; + int i; + List *twoLevelNodesList; + treeNode *node; + + *leafNodes = buildOneLevelNodes(initial_rels,levels_needed); + + twoLevelNodesList = buildTwoLevelNodes(root, *leafNodes, levels_needed); + if( !twoLevelNodesList ) { + elog(ERROR, "prepareOptimizer(): No two-level joins found."); + } + + *numEdges = list_length( twoLevelNodesList ); + + *edgeList = (treeNode**) palloc((*numEdges) * sizeof(treeNode*)); + i = 0; + foreach( x, twoLevelNodesList ){ + node = lfirst(x); + (*edgeList)[i++] = node; + } +} + +static int +initialStateHeuristic_1(const void* xin, const void* yin) +{ + treeNode **x,**y; + Cost cx, cy; + + x=(treeNode**) xin; + y=(treeNode**) yin; + cx=nodeCost((*x)); + cy=nodeCost((*y)); + + if (cx > cy) + return 1; + else if (cx < cy) + return -1; + + return 0; +} + +static void +makeInitialState(treeNode **newState /*OUT*/, + treeNode **edgeList/*IN*/, int numEdges/*IN*/, int iteratorIndex/*IN*/ ) +{ + int i; + + if( twopo_heuristic_states && iteratorIndex == 0 ) { // initial state bias: + //sort edges using heuristic 1 + for( i=0; i<numEdges; i++ ) + newState[i] = edgeList[i]; + qsort(newState,numEdges,sizeof(treeNode**),initialStateHeuristic_1); + } else { // random states: + randomState( newState, edgeList, numEdges ); + } +} + +inline static bool +saProbability( Cost delta, double temperature ) +{ + double e = exp( - delta / temperature ); + int r = random() % 100; +# ifndef TWOPO_DEBUG + return r <= ((int)(100.0*e)); +# else + if ( r <= ((int)(100.0*e)) ) { + fprintf(stderr, " sa_prob_ok" ); + return true; + } + return false; +# endif +} + +#ifdef TWOPO_DEBUG + +static void +print_state(treeNode **edgeList, int numEdges, + treeNode *leafNodes, int numLeaves) +{ + int i; + for( i=0; i<numEdges; i++ ){ + fprintf(stderr, "(%d,%d) ", + (int)(edgeList[i]->inner_child - leafNodes), + (int)(edgeList[i]->outer_child - leafNodes)); + } + fprintf(stderr,"\n"); +} + +static void +verificar(treeNode **state, treeNode **edgeList, int numEdges) +{ + int i,j; + for( i=0; i<numEdges; i++ ){ + for( j=0; j<numEdges; j++ ){ + if( state[i] == edgeList[j] ) + break; + } + if( j >= numEdges ) + fprintf(stderr, " ERRO:edge_não_encontrado"); + } +} + +static void +verificar_iguais(treeNode **state, treeNode **edgeList, int numEdges) +{ + int i; + for( i=0; i<numEdges; i++ ){ + if( state[i] != edgeList[i] ) + return; + } + fprintf(stderr, " ERRO:states_iguais"); +} + +#endif + + +RelOptInfo * +twopo(PlannerInfo *root, int levels_needed, List *initial_rels) +{ + tempCtx *ctx; + treeNode *leafNodes; + treeNode **edgeList; + int numEdges; + + treeNode **min_state; + treeNode **new_state; + treeNode **improved_state; + Cost min_cost = 0; + Cost improved_cost = 0; + treeNode *node; + int i; + + prepareOptimizer(root, levels_needed, initial_rels, + &leafNodes, &edgeList, &numEdges); + + if( numEdges == 1 ) + return edgeList[0]->rel; + + min_state = (treeNode**) palloc(numEdges * sizeof(treeNode*)); + new_state = (treeNode**) palloc(numEdges * sizeof(treeNode*)); + improved_state = (treeNode**) palloc(numEdges * sizeof(treeNode*)); + + ///////////////// Temporary memory context area //////////////////////// + ctx = createTemporaryContext( root ); + + ////////////// II phase ////////////// + for( i=0; i<twopo_ii_stop; i++ ){ + makeInitialState( new_state, edgeList, numEdges, i ); + + improved_cost = improveState( root, leafNodes, levels_needed, + new_state, numEdges, improved_state ); + if( !i || min_cost > improved_cost ) { + swapValues( treeNode**, min_state, improved_state ); + min_cost = improved_cost; + } + } + ////////////// SA phase /////////////// + if( twopo_sa_phase ) { + double temperature = twopo_sa_initial_temperature * (double) min_cost; + int equilibrium = twopo_sa_equilibrium * numEdges; + int stage_count = 0; + Cost new_cost = 0; + Cost delta_cost; + +# ifdef TWOPO_DEBUG + fprintf(stderr, " min_cost:%.1lf", min_cost); +# endif + + improved_cost = min_cost; + for( i=0; i<numEdges; i++ ){ // setting S state + improved_state[i] = min_state[i]; + } + while( temperature >= 1 && stage_count < 5 ){ // frozen condition + + for( i=0; i<equilibrium; i++ ){ + neighbordState( new_state, improved_state, numEdges ); + new_cost = stateCost( root, leafNodes, levels_needed, + new_state, numEdges ); + delta_cost = new_cost - improved_cost; + + if( delta_cost <= 0 || saProbability(delta_cost,temperature) ){ +# ifdef TWOPO_DEBUG + verificar(new_state,edgeList,numEdges); + verificar_iguais(new_state,improved_state,numEdges); +# endif + + swapValues(treeNode**,new_state,improved_state); + improved_cost = new_cost; + + if( improved_cost < min_cost ){ + int j; + for( j=0; j<numEdges; j++ ) { // update min_state + min_state[j] = improved_state[j]; + } + min_cost = improved_cost; + stage_count = 0; +# ifdef TWOPO_DEBUG + fprintf(stderr, " sa_new_min_cost:%.2lf", min_cost); +# endif + } + } + } + + stage_count++; + temperature *= twopo_sa_temperature_reduction; //reducing temperature + } + } + + + restoreOldContext( root, ctx, edgeList, numEdges ); + //////////////// end of temporary memory context area ////////////////// + + // rebuild best state in correct memory context + node = buildBushyTree( root, leafNodes, levels_needed, + min_state, numEdges); + + pfree(min_state); + pfree(new_state); + pfree(improved_state); + + return node->rel; +} Index: src/backend/optimizer/twopo/Makefile =================================================================== --- src/backend/optimizer/twopo/Makefile (revisão 0) +++ src/backend/optimizer/twopo/Makefile (revisão 14) @@ -0,0 +1,30 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# +# Copyright (c) 1994, Regents of the University of California +# +# +# +#------------------------------------------------------------------------- + +subdir = src/backend/optimizer/twopo +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = twopo.o + +all: SUBSYS.o + +SUBSYS.o: $(OBJS) + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) + +depend dep: + $(CC) -MM $(CFLAGS) *.c >depend + +clean: + rm -f SUBSYS.o $(OBJS) + +ifeq (depend,$(wildcard depend)) +include depend +endif Index: src/backend/optimizer/path/ljqo.c =================================================================== --- src/backend/optimizer/path/ljqo.c (revisão 0) +++ src/backend/optimizer/path/ljqo.c (revisão 14) @@ -0,0 +1,73 @@ +/* + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * contributed by: + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + * Adriano Lange * C3SL - Centro de Computação * + * adriano@c3sl.ufpr.br * Científica e Software Livre / * + * * Departamento de Informática / * + * * Universidade Federal do Paraná * + * * Curitiba, Brasil * + * * * + * * http://www.c3sl.ufpr.br * + * * * + *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + */ + +#define LJQO_DEBUG + +#include "postgres.h" + +#include "optimizer/ljqo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "parser/parsetree.h" + +// LJQO Optimizers +#include "optimizer/geqo.h" +#include "optimizer/twopo.h" + +static ljqo_algorithms ljqo_algorithm = DEFAULT_LJQO_ALGORITHM; + +const char * +assign_ljqo_algorithm(const char *newval, bool doit) +{ + if (pg_strcasecmp(newval, "geqo") == 0) + { + if (doit) + ljqo_algorithm = ljqo_a_geqo; + } + else if (pg_strcasecmp(newval, "twopo") == 0) + { + if (doit) + ljqo_algorithm = ljqo_a_twopo; + } + else + return NULL; + return newval; +} + +RelOptInfo * +ljqo(PlannerInfo *root, int levels_needed, List *initial_rels) +{ + RelOptInfo *ret = NULL; + switch( ljqo_algorithm ){ + case ljqo_a_geqo: +# ifdef LJQO_DEBUG + fprintf(stderr, "GEQO:"); +# endif + ret = geqo(root,levels_needed,initial_rels); + break; + case ljqo_a_twopo: +# ifdef LJQO_DEBUG + fprintf(stderr, "TwoPO:"); +# endif + ret = twopo(root,levels_needed,initial_rels); + break; + default: + elog(ERROR, "Large join query optimizer not defined."); + } + return ret; +} Index: src/backend/optimizer/path/allpaths.c =================================================================== --- src/backend/optimizer/path/allpaths.c (revisão 2) +++ src/backend/optimizer/path/allpaths.c (revisão 14) @@ -12,15 +12,15 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" +//#define OPTIMIZER_DEBUG #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif #include "optimizer/clauses.h" #include "optimizer/cost.h" -#include "optimizer/geqo.h" +#include "optimizer/ljqo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" @@ -32,11 +32,11 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" - /* These parameters are set by GUC */ -bool enable_geqo = false; /* just in case GUC doesn't set it */ -int geqo_threshold; +bool ljqo_enable = DEFAULT_LJQO_ENABLE; +int ljqo_threshold = DEFAULT_LJQO_THRESHOLD; + /* Hook for plugins to replace standard_join_search() */ join_search_hook_type join_search_hook = NULL; @@ -187,7 +187,7 @@ } #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + //debug_print_rel(root, rel); #endif } @@ -620,6 +620,8 @@ List *initial_rels; ListCell *jl; + RelOptInfo *ret; + /* * Count the number of child joinlist nodes. This is the depth of the * dynamic-programming algorithm we must employ to consider all ways of @@ -680,12 +682,18 @@ */ root->initial_rels = initial_rels; - if (join_search_hook) - return (*join_search_hook) (root, levels_needed, initial_rels); - else if (enable_geqo && levels_needed >= geqo_threshold) - return geqo(root, levels_needed, initial_rels); - else - return standard_join_search(root, levels_needed, initial_rels); + fprintf(stderr, "Calling optimizer (rels:%d) ", levels_needed); + if (join_search_hook) { + ret = (*join_search_hook) (root, levels_needed, initial_rels); + } else if ( ljqo_enable && levels_needed >= ljqo_threshold) { + ret = ljqo(root, levels_needed, initial_rels); + } else { + fprintf(stderr, "Standard:"); + ret = standard_join_search(root, levels_needed, initial_rels); + } + fprintf(stderr, " cost: %.2lf\n", ret->cheapest_total_path->total_cost); + + return ret; } } @@ -750,6 +758,11 @@ */ joinitems[lev] = join_search_one_level(root, lev, joinitems); +# ifdef OPTIMIZER_DEBUG + elog(LOG,"standard_join_search: itens em joinitem[%d]:%d", + lev,list_length(joinitems[lev])); +# endif + /* * Do cleanup work on each just-processed rel. */ @@ -761,7 +774,7 @@ set_cheapest(rel); #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + //debug_print_rel(root, rel); #endif } } @@ -1108,8 +1121,18 @@ #ifdef OPTIMIZER_DEBUG +static const char* +get_renation_name(PlannerInfo *root, int relid) +{ + RangeTblEntry *rte; + + Assert(relid <= list_length(root->parse->rtable)); + rte = rt_fetch(relid, root->parse->rtable); + return rte->eref->aliasname; +} + static void -print_relids(Relids relids) +print_relids(PlannerInfo *root, Relids relids) { Relids tmprelids; int x; @@ -1120,7 +1143,7 @@ { if (!first) printf(" "); - printf("%d", x); + printf("[%d]%s", x, get_renation_name(root, x)); first = false; } bms_free(tmprelids); @@ -1207,7 +1230,7 @@ if (path->parent) { printf("("); - print_relids(path->parent->relids); + print_relids(root, path->parent->relids); printf(") rows=%.0f", path->parent->rows); } printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); @@ -1258,7 +1281,7 @@ ListCell *l; printf("RELOPTINFO ("); - print_relids(rel->relids); + print_relids(root, rel->relids); printf("): rows=%.0f width=%d\n", rel->rows, rel->width); if (rel->baserestrictinfo) Index: src/backend/optimizer/path/Makefile =================================================================== --- src/backend/optimizer/path/Makefile (revisão 2) +++ src/backend/optimizer/path/Makefile (revisão 14) @@ -13,7 +13,8 @@ include $(top_builddir)/src/Makefile.global OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o + joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o \ + ljqo.o all: SUBSYS.o Index: src/backend/optimizer/Makefile =================================================================== --- src/backend/optimizer/Makefile (revisão 2) +++ src/backend/optimizer/Makefile (revisão 14) @@ -8,7 +8,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = geqo path plan prep util +SUBDIRS = geqo path plan prep util twopo SUBDIROBJS = $(SUBDIRS:%=%/SUBSYS.o) all: SUBSYS.o Index: src/backend/utils/misc/guc.c =================================================================== --- src/backend/utils/misc/guc.c (revisão 2) +++ src/backend/utils/misc/guc.c (revisão 14) @@ -40,6 +40,8 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "optimizer/cost.h" +#include "optimizer/ljqo.h" +#include "optimizer/twopo.h" #include "optimizer/geqo.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" @@ -179,7 +181,9 @@ static const char *show_tcp_keepalives_count(void); static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source); static bool assign_maxconnections(int newval, bool doit, GucSource source); +static const char *assign_ljqo_algorithm_value(const char *newval, bool doit, GucSource source); + /* * GUC option variables that are exported from this module */ @@ -268,6 +272,7 @@ static int max_identifier_length; static int block_size; static bool integer_datetimes; +static char *ljqo_algorithm_string; /* should be static, but commands/variable.c needs to get at these */ char *role_string; @@ -344,6 +349,10 @@ gettext_noop("Query Tuning / Planner Method Configuration"), /* QUERY_TUNING_COST */ gettext_noop("Query Tuning / Planner Cost Constants"), + /* QUERY_TUNING_LJQO */ + gettext_noop("Query Tuning / Large Join Query Optimizer Selector"), + /* QUERY_TUNING_TWOPO */ + gettext_noop("Query Tuning / Two Phase Optimizer"), /* QUERY_TUNING_GEQO */ gettext_noop("Query Tuning / Genetic Query Optimizer"), /* QUERY_TUNING_OTHER */ @@ -509,6 +518,30 @@ true, NULL, NULL }, { + {"ljqo_enable", PGC_USERSET, QUERY_TUNING_LJQO, + gettext_noop("Enables the large join query optimization."), + NULL + }, + &ljqo_enable, + DEFAULT_LJQO_ENABLE, NULL, NULL + }, + { + {"twopo_heuristic_states", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: Enable heuristic initial states."), + NULL + }, + &twopo_heuristic_states, + DEFAULT_TWOPO_HEURISTIC_STATES, NULL, NULL + }, + { + {"twopo_sa_phase", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: Enable Simulated Annealing phase."), + NULL + }, + &twopo_sa_phase, + DEFAULT_TWOPO_SA_PHASE, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), gettext_noop("Child table scans will be skipped if their " @@ -518,15 +551,6 @@ false, NULL, NULL }, { - {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, - gettext_noop("Enables genetic query optimization."), - gettext_noop("This algorithm attempts to do planning without " - "exhaustive searching.") - }, - &enable_geqo, - true, NULL, NULL - }, - { /* Not for general use --- used by SET SESSION AUTHORIZATION */ {"is_superuser", PGC_INTERNAL, UNGROUPED, gettext_noop("Shows whether the current user is a superuser."), @@ -1152,14 +1176,31 @@ 8, 1, INT_MAX, NULL, NULL }, { - {"geqo_threshold", PGC_USERSET, QUERY_TUNING_GEQO, - gettext_noop("Sets the threshold of FROM items beyond which GEQO is used."), + {"ljqo_threshold", PGC_USERSET, QUERY_TUNING_LJQO, + gettext_noop("Sets the threshold of FROM items beyond which LJQO-selector is used."), NULL }, - &geqo_threshold, - 12, 2, INT_MAX, NULL, NULL + &ljqo_threshold, + DEFAULT_LJQO_THRESHOLD, MIN_LJQO_THRESHOLD, INT_MAX, NULL, NULL }, { + {"twopo_ii_stop", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: II phase, stop condition."), + NULL + }, + &twopo_ii_stop, + DEFAULT_TWOPO_II_STOP, MIN_TWOPO_II_STOP, INT_MAX, NULL, NULL + }, + { + {"twopo_sa_equilibrium", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: SA phase, equilibrium condition."), + NULL + }, + &twopo_sa_equilibrium, + DEFAULT_TWOPO_SA_EQUILIBRIUM, MIN_TWOPO_SA_EQUILIBRIUM, INT_MAX, + NULL, NULL + }, + { {"geqo_effort", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: effort is used to set the default for other GEQO parameters."), NULL @@ -1183,7 +1224,6 @@ &Geqo_generations, 0, 0, INT_MAX, NULL, NULL }, - { {"deadlock_timeout", PGC_SIGHUP, LOCK_MANAGEMENT, gettext_noop("Sets the time to wait on a lock before checking for deadlock."), @@ -1830,6 +1870,26 @@ }, { + {"twopo_sa_initial_temperature", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: SA phase, initial temperature."), + NULL + }, + &twopo_sa_initial_temperature, + DEFAULT_TWOPO_SA_INITIAL_TEMPERATURE, MIN_TWOPO_SA_INITIAL_TEMPERATURE, + DBL_MAX, NULL, NULL + }, + { + {"twopo_sa_temperature_reduction", PGC_USERSET, QUERY_TUNING_TWOPO, + gettext_noop("TwoPO: SA phase, temperature reduction."), + NULL + }, + &twopo_sa_temperature_reduction, + DEFAULT_TWOPO_SA_TEMPERATURE_REDUCTION, + MIN_TWOPO_SA_TEMPERATURE_REDUCTION, + DBL_MAX, NULL, NULL + }, + + { {"geqo_selection_bias", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: selective pressure within the population."), NULL @@ -2448,6 +2508,16 @@ "pg_catalog.simple", assignTSCurrentConfig, NULL }, + { + {"ljqo_algorithm", PGC_USERSET, QUERY_TUNING_LJQO, + gettext_noop("Sets default LJQO algorithm."), + NULL + }, + &ljqo_algorithm_string, + DEFAULT_LJQO_ALGORITHM_STR, assign_ljqo_algorithm_value, NULL + }, + + #ifdef USE_SSL { {"ssl_ciphers", PGC_POSTMASTER, CONN_AUTH_SECURITY, @@ -6932,6 +7002,13 @@ } static const char * +assign_ljqo_algorithm_value(const char *newval, bool doit, GucSource source) +{ + return assign_ljqo_algorithm( newval, doit ); +} + + +static const char * assign_backslash_quote(const char *newval, bool doit, GucSource source) { BackslashQuoteType bq; Index: src/backend/utils/misc/postgresql.conf.sample =================================================================== --- src/backend/utils/misc/postgresql.conf.sample (revisão 2) +++ src/backend/utils/misc/postgresql.conf.sample (revisão 14) @@ -207,15 +207,29 @@ #cpu_operator_cost = 0.0025 # same scale as above #effective_cache_size = 128MB +# - Large Join Query Optimizer Selector - + +#ljqo_enable = true +#ljqo_threshold = 12 +#ljqo_algorithm = geqo # geqo or twopo + # - Genetic Query Optimizer - -#geqo = on -#geqo_threshold = 12 #geqo_effort = 5 # range 1-10 #geqo_pool_size = 0 # selects default based on effort #geqo_generations = 0 # selects default based on effort #geqo_selection_bias = 2.0 # range 1.5-2.0 +# - Two Phase Optimizer + +#twopo_heuristic_states = true +#twopo_ii_stop = 10 +#twopo_sa_phase = true +#twopo_sa_initial_temperature = 0.1 +#twopo_sa_temperature_reduction = 0.95 +#twopo_sa_equilibrium = 16 + + # - Other Planner Options - #default_statistics_target = 10 # range 1-1000
pgsql-hackers by date: