In my first attached, applied patch, I have found a way to speed
relations lookups in pg_upgrade. I knew there was a way to optimize
this but it was not clear until my major cleanups. Instead of doing
effectively a nested loop join on old/new relations, I now order them
and use a 1:1 mergejoin. This should speed up pg_upgrade for many
relations.
The second patch removes a hack for toast relations that is unnecessary
now that we always preserve pg_class.oid. The old code preserved
relfilenodes for non-toast tables and oids for toast relations, which
was obviously confusing and non-optimal.
Remember, this code was not originally written by me but someone at
EnteprriseDB, so I didn't fully understand it until now.
I love removing functions!
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ It's impossible for everything to be true. +
commit 002c105a0706bd1c1e939fe0f47ecdceeae6c52d
Author: Bruce Momjian <bruce@momjian.us>
Date: Sat Jan 8 13:44:44 2011 -0500
In pg_upgrade, remove functions that did sequential array scans looking
up relations, but rather order old/new relations and use the same array
index value for both. This should speed up pg_upgrade for databases
with many relations.
diff --git a/contrib/pg_upgrade/function.c b/contrib/pg_upgrade/function.c
index cb9576a..d7c790c 100644
*** /tmp/UF8KGb_function.c Sat Jan 8 13:46:55 2011
--- /tmp/4GeXcc_function.c Sat Jan 8 13:46:55 2011
***************
*** 13,25 ****
/*
! * install_support_functions_in_db()
*
* pg_upgrade requires some support functions that enable it to modify
* backend behavior.
*/
void
! install_support_functions_in_db(const char *db_name)
{
PGconn *conn = connectToServer(&new_cluster, db_name);
--- 13,25 ----
/*
! * install_support_functions_in_new_db()
*
* pg_upgrade requires some support functions that enable it to modify
* backend behavior.
*/
void
! install_support_functions_in_new_db(const char *db_name)
{
PGconn *conn = connectToServer(&new_cluster, db_name);
*************** install_support_functions_in_db(const ch
*** 87,93 ****
void
! uninstall_support_functions(void)
{
int dbnum;
--- 87,93 ----
void
! uninstall_support_functions_from_new_cluster(void)
{
int dbnum;
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
index 50e4de2..c805a04 100644
*** /tmp/KMuVfb_info.c Sat Jan 8 13:46:55 2011
--- /tmp/gznt7a_info.c Sat Jan 8 13:46:55 2011
*************** static void create_rel_filename_map(cons
*** 21,30 ****
const DbInfo *old_db, const DbInfo *new_db,
const RelInfo *old_rel, const RelInfo *new_rel,
FileNameMap *map);
- static RelInfo *relarr_lookup_rel_name(ClusterInfo *cluster, RelInfoArr *rel_arr,
- const char *nspname, const char *relname);
- static RelInfo *relarr_lookup_rel_oid(ClusterInfo *cluster, RelInfoArr *rel_arr,
- Oid oid);
/*
--- 21,26 ----
*************** gen_db_file_maps(DbInfo *old_db, DbInfo
*** 42,59 ****
int relnum;
int num_maps = 0;
maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
old_db->rel_arr.nrels);
for (relnum = 0; relnum < old_db->rel_arr.nrels; relnum++)
{
RelInfo *old_rel = &old_db->rel_arr.rels[relnum];
! RelInfo *new_rel;
!
! /* old/new relation names always match */
! new_rel = relarr_lookup_rel_name(&new_cluster, &new_db->rel_arr,
! old_rel->nspname, old_rel->relname);
create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
old_rel, new_rel, maps + num_maps);
num_maps++;
--- 38,59 ----
int relnum;
int num_maps = 0;
+ if (old_db->rel_arr.nrels != new_db->rel_arr.nrels)
+ pg_log(PG_FATAL, "old and new databases \"%s\" have a different number of relations\n",
+ old_db->db_name);
+
maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
old_db->rel_arr.nrels);
for (relnum = 0; relnum < old_db->rel_arr.nrels; relnum++)
{
RelInfo *old_rel = &old_db->rel_arr.rels[relnum];
! RelInfo *new_rel = &old_db->rel_arr.rels[relnum];
+ if (old_rel->reloid != new_rel->reloid)
+ pg_log(PG_FATAL, "mismatch of relation id: database \"%s\", old relid %d, new relid %d\n",
+ old_db->db_name, old_rel->reloid, new_rel->reloid);
+
create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
old_rel, new_rel, maps + num_maps);
num_maps++;
*************** get_db_infos(ClusterInfo *cluster)
*** 153,159 ****
"FROM pg_catalog.pg_database d "
" LEFT OUTER JOIN pg_catalog.pg_tablespace t "
" ON d.dattablespace = t.oid "
! "WHERE d.datallowconn = true");
i_datname = PQfnumber(res, "datname");
i_oid = PQfnumber(res, "oid");
--- 153,161 ----
"FROM pg_catalog.pg_database d "
" LEFT OUTER JOIN pg_catalog.pg_tablespace t "
" ON d.dattablespace = t.oid "
! "WHERE d.datallowconn = true "
! /* we don't preserve pg_database.oid so we sort by name */
! "ORDER BY 2");
i_datname = PQfnumber(res, "datname");
i_oid = PQfnumber(res, "oid");
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 258,264 ****
"GROUP BY c.oid, n.nspname, c.relname, c.relfilenode,"
" c.reltoastrelid, t.spclocation, "
" n.nspname "
! "ORDER BY t.spclocation, n.nspname, c.relname;",
FirstNormalObjectId,
/* does pg_largeobject_metadata need to be migrated? */
(GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
--- 260,267 ----
"GROUP BY c.oid, n.nspname, c.relname, c.relfilenode,"
" c.reltoastrelid, t.spclocation, "
" n.nspname "
! /* we preserve pg_class.oid so we sort by it to match old/new */
! "ORDER BY 1;",
FirstNormalObjectId,
/* does pg_largeobject_metadata need to be migrated? */
(GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 308,393 ****
dbinfo->rel_arr.rels = relinfos;
dbinfo->rel_arr.nrels = num_rels;
- dbinfo->rel_arr.last_relname_lookup = 0;
- }
-
-
- /*
- * dbarr_lookup_db()
- *
- * Returns the pointer to the DbInfo structure
- */
- DbInfo *
- dbarr_lookup_db(DbInfoArr *db_arr, const char *db_name)
- {
- int dbnum;
-
- for (dbnum = 0; dbnum < db_arr->ndbs; dbnum++)
- {
- if (strcmp(db_arr->dbs[dbnum].db_name, db_name) == 0)
- return &db_arr->dbs[dbnum];
- }
-
- return NULL;
- }
-
-
- /*
- * relarr_lookup_rel_name()
- *
- * Searches "relname" in rel_arr. Returns the *real* pointer to the
- * RelInfo structure.
- */
- static RelInfo *
- relarr_lookup_rel_name(ClusterInfo *cluster, RelInfoArr *rel_arr,
- const char *nspname, const char *relname)
- {
- int relnum;
-
- /* Test next lookup first, for speed */
- if (rel_arr->last_relname_lookup + 1 < rel_arr->nrels &&
- strcmp(rel_arr->rels[rel_arr->last_relname_lookup + 1].nspname, nspname) == 0 &&
- strcmp(rel_arr->rels[rel_arr->last_relname_lookup + 1].relname, relname) == 0)
- {
- rel_arr->last_relname_lookup++;
- return &rel_arr->rels[rel_arr->last_relname_lookup];
- }
-
- for (relnum = 0; relnum < rel_arr->nrels; relnum++)
- {
- if (strcmp(rel_arr->rels[relnum].nspname, nspname) == 0 &&
- strcmp(rel_arr->rels[relnum].relname, relname) == 0)
- {
- rel_arr->last_relname_lookup = relnum;
- return &rel_arr->rels[relnum];
- }
- }
- pg_log(PG_FATAL, "Could not find %s.%s in %s cluster\n",
- nspname, relname, CLUSTER_NAME(cluster));
- return NULL;
- }
-
-
- /*
- * relarr_lookup_rel_oid()
- *
- * Returns a pointer to the RelInfo structure for the
- * given oid or NULL if the desired entry cannot be
- * found.
- */
- static RelInfo *
- relarr_lookup_rel_oid(ClusterInfo *cluster, RelInfoArr *rel_arr, Oid oid)
- {
- int relnum;
-
- for (relnum = 0; relnum < rel_arr->nrels; relnum++)
- {
- if (rel_arr->rels[relnum].reloid == oid)
- return &rel_arr->rels[relnum];
- }
- pg_log(PG_FATAL, "Could not find %d in %s cluster\n",
- oid, CLUSTER_NAME(cluster));
- return NULL;
}
--- 311,316 ----
*************** free_rel_arr(RelInfoArr *rel_arr)
*** 396,402 ****
{
pg_free(rel_arr->rels);
rel_arr->nrels = 0;
- rel_arr->last_relname_lookup = 0;
}
--- 319,324 ----
diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
index a428a03..485412d 100644
*** /tmp/oNcxDc_pg_upgrade.c Sat Jan 8 13:46:55 2011
--- /tmp/YfgMvc_pg_upgrade.c Sat Jan 8 13:46:55 2011
*************** prepare_new_databases(void)
*** 229,235 ****
* Install support functions in the database accessed by
* GLOBALS_DUMP_FILE because it can preserve pg_authid.oid.
*/
! install_support_functions_in_db(os_info.user);
/*
* We have to create the databases first so we can install support
--- 229,235 ----
* Install support functions in the database accessed by
* GLOBALS_DUMP_FILE because it can preserve pg_authid.oid.
*/
! install_support_functions_in_new_db(os_info.user);
/*
* We have to create the databases first so we can install support
*************** prepare_new_databases(void)
*** 244,249 ****
--- 244,250 ----
GLOBALS_DUMP_FILE, log_opts.filename);
check_ok();
+ /* we load this to get a current list of databases */
get_db_and_rel_infos(&new_cluster);
stop_postmaster(false, false);
*************** create_new_objects(void)
*** 266,272 ****
/* skip db we already installed */
if (strcmp(new_db->db_name, os_info.user) != 0)
! install_support_functions_in_db(new_db->db_name);
}
check_ok();
--- 267,273 ----
/* skip db we already installed */
if (strcmp(new_db->db_name, os_info.user) != 0)
! install_support_functions_in_new_db(new_db->db_name);
}
check_ok();
*************** create_new_objects(void)
*** 279,289 ****
DB_DUMP_FILE, log_opts.filename);
check_ok();
! /* regenerate now that we have db schemas */
dbarr_free(&new_cluster.dbarr);
get_db_and_rel_infos(&new_cluster);
! uninstall_support_functions();
stop_postmaster(false, false);
}
--- 280,290 ----
DB_DUMP_FILE, log_opts.filename);
check_ok();
! /* regenerate now that we have objects in the databases */
dbarr_free(&new_cluster.dbarr);
get_db_and_rel_infos(&new_cluster);
! uninstall_support_functions_from_new_cluster();
stop_postmaster(false, false);
}
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
index acd453b..18cbc19 100644
*** /tmp/iyC1Eb_pg_upgrade.h Sat Jan 8 13:46:55 2011
--- /tmp/kjwEac_pg_upgrade.h Sat Jan 8 13:46:55 2011
*************** typedef struct
*** 77,83 ****
{
RelInfo *rels;
int nrels;
- int last_relname_lookup; /* cache of last lookup location */
} RelInfoArr;
/*
--- 77,82 ----
*************** void check_hard_link(void);
*** 321,328 ****
/* function.c */
! void install_support_functions_in_db(const char *db_name);
! void uninstall_support_functions(void);
void get_loadable_libraries(void);
void check_loadable_libraries(void);
--- 320,327 ----
/* function.c */
! void install_support_functions_in_new_db(const char *db_name);
! void uninstall_support_functions_from_new_cluster(void);
void get_loadable_libraries(void);
void check_loadable_libraries(void);
*************** void check_loadable_libraries(void);
*** 331,338 ****
FileNameMap *gen_db_file_maps(DbInfo *old_db,
DbInfo *new_db, int *nmaps, const char *old_pgdata,
const char *new_pgdata);
! void get_db_and_rel_infos(ClusterInfo *cluster);
! DbInfo *dbarr_lookup_db(DbInfoArr *db_arr, const char *db_name);
void dbarr_free(DbInfoArr *db_arr);
void print_maps(FileNameMap *maps, int n,
const char *dbName);
--- 330,336 ----
FileNameMap *gen_db_file_maps(DbInfo *old_db,
DbInfo *new_db, int *nmaps, const char *old_pgdata,
const char *new_pgdata);
! void get_db_and_rel_infos(ClusterInfo *cluster);
void dbarr_free(DbInfoArr *db_arr);
void print_maps(FileNameMap *maps, int n,
const char *dbName);
diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c
index b30f5e0..ca96407 100644
*** /tmp/0UKbOa_relfilenode.c Sat Jan 8 13:46:55 2011
--- /tmp/eAIxgb_relfilenode.c Sat Jan 8 13:46:55 2011
*************** char scandir_file_pattern[MAXPGPATH];
*** 29,50 ****
* physically link the databases.
*/
const char *
! transfer_all_new_dbs(DbInfoArr *olddb_arr,
! DbInfoArr *newdb_arr, char *old_pgdata, char *new_pgdata)
{
int dbnum;
const char *msg = NULL;
prep_status("Restoring user relation files\n");
! for (dbnum = 0; dbnum < newdb_arr->ndbs; dbnum++)
{
! DbInfo *new_db = &newdb_arr->dbs[dbnum];
! DbInfo *old_db = dbarr_lookup_db(olddb_arr, new_db->db_name);
FileNameMap *mappings;
int n_maps;
pageCnvCtx *pageConverter = NULL;
n_maps = 0;
mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata,
new_pgdata);
--- 29,57 ----
* physically link the databases.
*/
const char *
! transfer_all_new_dbs(DbInfoArr *old_db_arr,
! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata)
{
int dbnum;
const char *msg = NULL;
prep_status("Restoring user relation files\n");
! if (old_db_arr->ndbs != new_db_arr->ndbs)
! pg_log(PG_FATAL, "old and new clusters have a different number of databases\n");
!
! for (dbnum = 0; dbnum < old_db_arr->ndbs; dbnum++)
{
! DbInfo *old_db = &old_db_arr->dbs[dbnum];
! DbInfo *new_db = &new_db_arr->dbs[dbnum];
FileNameMap *mappings;
int n_maps;
pageCnvCtx *pageConverter = NULL;
+ if (strcmp(old_db->db_name, new_db->db_name) != 0)
+ pg_log(PG_FATAL, "old and new databases have a different names: old \"%s\", new \"%s\"\n",
+ old_db->db_name, new_db->db_name);
+
n_maps = 0;
mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata,
new_pgdata);
commit a60b32b3dcb4d49a60c96558ce405764ac17c799
Author: Bruce Momjian <bruce@momjian.us>
Date: Sat Jan 8 08:01:12 2011 -0500
In pg_upgrade, remove unnecessary separate handling of toast tables now
that we restore by oid; they can be handled like regular tables when
creating the file mapping structure.
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
index 4360d39..50e4de2 100644
*** /tmp/W2sFHd_info.c Sat Jan 8 13:46:40 2011
--- /tmp/2POKce_info.c Sat Jan 8 13:46:40 2011
*************** gen_db_file_maps(DbInfo *old_db, DbInfo
*** 50,59 ****
RelInfo *old_rel = &old_db->rel_arr.rels[relnum];
RelInfo *new_rel;
- /* toast tables are handled by their parents */
- if (strcmp(old_rel->nspname, "pg_toast") == 0)
- continue;
-
/* old/new relation names always match */
new_rel = relarr_lookup_rel_name(&new_cluster, &new_db->rel_arr,
old_rel->nspname, old_rel->relname);
--- 50,55 ----
*************** gen_db_file_maps(DbInfo *old_db, DbInfo
*** 61,99 ****
create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
old_rel, new_rel, maps + num_maps);
num_maps++;
-
- /*
- * So much for mapping this relation; now we need a mapping
- * for its corresponding toast relation and toast index, if any.
- */
- if (old_rel->toastrelid > 0)
- {
- char old_name[MAXPGPATH], new_name[MAXPGPATH];
- RelInfo *old_toast, *new_toast;
-
- /* use the toast relids from the rel_arr for lookups */
- old_toast = relarr_lookup_rel_oid(&old_cluster, &old_db->rel_arr,
- old_rel->toastrelid);
- new_toast = relarr_lookup_rel_oid(&new_cluster, &new_db->rel_arr,
- new_rel->toastrelid);
-
- create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
- old_toast, new_toast, maps + num_maps);
- num_maps++;
-
- /* toast indexes are the same, except with an "_index" suffix */
- snprintf(old_name, sizeof(old_name), "%s_index", old_toast->relname);
- snprintf(new_name, sizeof(new_name), "%s_index", new_toast->relname);
-
- old_toast = relarr_lookup_rel_name(&old_cluster, &old_db->rel_arr,
- "pg_toast", old_name);
- new_toast = relarr_lookup_rel_name(&new_cluster, &new_db->rel_arr,
- "pg_toast", new_name);
-
- create_rel_filename_map(old_pgdata, new_pgdata, old_db,
- new_db, old_toast, new_toast, maps + num_maps);
- num_maps++;
- }
}
*nmaps = num_maps;
--- 57,62 ----