Re: check for interrupts in set_rtable_names - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: check for interrupts in set_rtable_names |
Date | |
Msg-id | 16882.1447632852@sss.pgh.pa.us Whole thread Raw |
In response to | Re: check for interrupts in set_rtable_names (Jeff Janes <jeff.janes@gmail.com>) |
Responses |
Re: check for interrupts in set_rtable_names
(Tom Lane <tgl@sss.pgh.pa.us>)
|
List | pgsql-hackers |
Jeff Janes <jeff.janes@gmail.com> writes: > On Fri, Nov 13, 2015 at 3:13 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote: >> There's probably no reason not to do that, but I'd be much more interested >> in eliminating the slowness to begin with ... > I was thinking about that as well, but I don't think that would be > back-patchable, at least not the way I was envisioning it. > I was thinking of detecting bad cases (had to count to over 10 before > finding a novel name, more than 10 times) and then switching from an > object-local count, to a global count, for the numbers to add to the > name. But that would be a behavior change under some conditions. I experimented with using a hash table to avoid the O(N^2) behavior. This seems to work quite well, and I think it doesn't change the results (leastwise, it does not break the regression tests). It would be possible to do something similar to dodge the O(N^2) costs in make_colname_unique/colname_is_unique; but it would be a lot messier, and the tests I did suggest that it's fairly hard to get into a regime where those functions are a huge part of the runtime anyway, because we have limits on the numbers of columns. So I'm inclined to leave that alone. regards, tom lane diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 3388f7c..2b6de8f 100644 *** a/src/backend/utils/adt/ruleutils.c --- b/src/backend/utils/adt/ruleutils.c *************** *** 55,60 **** --- 55,61 ---- #include "utils/array.h" #include "utils/builtins.h" #include "utils/fmgroids.h" + #include "utils/hsearch.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/ruleutils.h" *************** typedef struct *** 267,272 **** --- 268,282 ---- #define deparse_columns_fetch(rangetable_index, dpns) \ ((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1)) + /* + * Entry in set_rtable_names' hash table + */ + typedef struct + { + char name[NAMEDATALEN]; /* Hash key --- must be first */ + int counter; /* Largest addition used so far for name */ + } NameHashEntry; + /* ---------- * Global data *************** static void print_function_rettype(Strin *** 312,319 **** static void print_function_trftypes(StringInfo buf, HeapTuple proctup); static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces, Bitmapset *rels_used); - static bool refname_is_unique(char *refname, deparse_namespace *dpns, - List *parent_namespaces); static void set_deparse_for_query(deparse_namespace *dpns, Query *query, List *parent_namespaces); static void set_simple_column_names(deparse_namespace *dpns); --- 322,327 ---- *************** static void *** 2676,2690 **** set_rtable_names(deparse_namespace *dpns, List *parent_namespaces, Bitmapset *rels_used) { ListCell *lc; - int rtindex = 1; dpns->rtable_names = NIL; foreach(lc, dpns->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); char *refname; if (rels_used && !bms_is_member(rtindex, rels_used)) { /* Ignore unreferenced RTE */ --- 2684,2744 ---- set_rtable_names(deparse_namespace *dpns, List *parent_namespaces, Bitmapset *rels_used) { + HASHCTL hash_ctl; + HTAB *names_hash; + NameHashEntry *hentry; + bool found; + int rtindex; ListCell *lc; dpns->rtable_names = NIL; + /* nothing more to do if empty rtable */ + if (dpns->rtable == NIL) + return; + + /* + * We use a hash table to hold known names, so that this process is O(N) + * not O(N^2) for N names. + */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = NAMEDATALEN; + hash_ctl.entrysize = sizeof(NameHashEntry); + hash_ctl.hcxt = CurrentMemoryContext; + names_hash = hash_create("set_rtable_names names", + list_length(dpns->rtable), + &hash_ctl, + HASH_ELEM | HASH_CONTEXT); + /* Preload the hash table with names appearing in parent_namespaces */ + foreach(lc, parent_namespaces) + { + deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, olddpns->rtable_names) + { + char *oldname = (char *) lfirst(lc2); + + if (oldname == NULL) + continue; + hentry = (NameHashEntry *) hash_search(names_hash, + oldname, + HASH_ENTER, + &found); + /* we do not complain about duplicate names in parent namespaces */ + hentry->counter = 0; + } + } + + /* Now we can scan the rtable */ + rtindex = 1; foreach(lc, dpns->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); char *refname; + /* Just in case this takes an unreasonable amount of time ... */ + CHECK_FOR_INTERRUPTS(); + if (rels_used && !bms_is_member(rtindex, rels_used)) { /* Ignore unreferenced RTE */ *************** set_rtable_names(deparse_namespace *dpns *** 2712,2767 **** } /* ! * If the selected name isn't unique, append digits to make it so */ ! if (refname && ! !refname_is_unique(refname, dpns, parent_namespaces)) { ! char *modname = (char *) palloc(strlen(refname) + 32); ! int i = 0; ! do { ! sprintf(modname, "%s_%d", refname, ++i); ! } while (!refname_is_unique(modname, dpns, parent_namespaces)); ! refname = modname; } dpns->rtable_names = lappend(dpns->rtable_names, refname); rtindex++; } - } - - /* - * refname_is_unique: is refname distinct from all already-chosen RTE names? - */ - static bool - refname_is_unique(char *refname, deparse_namespace *dpns, - List *parent_namespaces) - { - ListCell *lc; - - foreach(lc, dpns->rtable_names) - { - char *oldname = (char *) lfirst(lc); - - if (oldname && strcmp(oldname, refname) == 0) - return false; - } - foreach(lc, parent_namespaces) - { - deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc); - ListCell *lc2; - - foreach(lc2, olddpns->rtable_names) - { - char *oldname = (char *) lfirst(lc2); ! if (oldname && strcmp(oldname, refname) == 0) ! return false; ! } ! } ! return true; } /* --- 2766,2809 ---- } /* ! * If the selected name isn't unique, append digits to make it so, and ! * make a new hash entry for it once we've got a unique name. */ ! if (refname) { ! hentry = (NameHashEntry *) hash_search(names_hash, ! refname, ! HASH_ENTER, ! &found); ! if (found) ! { ! /* Name already in use, must choose a new one */ ! char *modname = (char *) palloc(strlen(refname) + 32); ! NameHashEntry *hentry2; ! do ! { ! sprintf(modname, "%s_%d", refname, ++(hentry->counter)); ! hentry2 = (NameHashEntry *) hash_search(names_hash, ! modname, ! HASH_ENTER, ! &found); ! } while (found); ! hentry2->counter = 0; /* init new hash entry */ ! refname = modname; ! } ! else { ! /* Name not previously used, need only initialize hentry */ ! hentry->counter = 0; ! } } dpns->rtable_names = lappend(dpns->rtable_names, refname); rtindex++; } ! hash_destroy(names_hash); } /*
pgsql-hackers by date: