Re: pg_upgrade with parallel tablespace copying - Mailing list pgsql-hackers
From | Bruce Momjian |
---|---|
Subject | Re: pg_upgrade with parallel tablespace copying |
Date | |
Msg-id | 20130109135826.GF8789@momjian.us Whole thread Raw |
In response to | pg_upgrade with parallel tablespace copying (Bruce Momjian <bruce@momjian.us>) |
List | pgsql-hackers |
Slightly modified patch applied. This is my last planned pg_upgrade change for 9.3. --------------------------------------------------------------------------- On Mon, Jan 7, 2013 at 10:51:21PM -0500, Bruce Momjian wrote: > Pg_upgrade by default (without --link) copies heap/index files from the > old to new cluster. This patch implements parallel heap/index file > copying in pg_upgrade using the --jobs option. It uses the same > infrastructure used for pg_upgrade parallel dump/restore. Here are the > performance results: > > --- seconds --- > GB git patched > 2 62.09 63.75 > 4 95.93 107.22 > 8 194.96 195.29 > 16 494.38 348.93 > 32 983.28 644.23 > 64 2227.73 1244.08 > 128 4735.83 2547.09 > > Because of the kernel cache, you only see a big win when the amount of > copy data exceeds the kernel cache. For testing, I used a 24GB, 16-core > machine with two magnetic disks with one tablespace on each. Using more > tablespaces would yield larger improvements. My test script is > attached. > > I consider this patch ready for application. This is the last > pg_upgrade performance improvement idea I am considering. > > -- > Bruce Momjian <bruce@momjian.us> http://momjian.us > EnterpriseDB http://enterprisedb.com > > + It's impossible for everything to be true. + > diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c > new file mode 100644 > index 59f8fd0..1780788 > *** a/contrib/pg_upgrade/check.c > --- b/contrib/pg_upgrade/check.c > *************** create_script_for_old_cluster_deletion(c > *** 606,612 **** > fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata)); > > /* delete old cluster's alternate tablespaces */ > ! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++) > { > /* > * Do the old cluster's per-database directories share a directory > --- 606,612 ---- > fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata)); > > /* delete old cluster's alternate tablespaces */ > ! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) > { > /* > * Do the old cluster's per-database directories share a directory > *************** create_script_for_old_cluster_deletion(c > *** 621,634 **** > /* remove PG_VERSION? */ > if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) > fprintf(script, RM_CMD " %s%s%cPG_VERSION\n", > ! fix_path_separator(os_info.tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix), > PATH_SEPARATOR); > > for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++) > { > fprintf(script, RMDIR_CMD " %s%s%c%d\n", > ! fix_path_separator(os_info.tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix), > PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid); > } > --- 621,634 ---- > /* remove PG_VERSION? */ > if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) > fprintf(script, RM_CMD " %s%s%cPG_VERSION\n", > ! fix_path_separator(os_info.old_tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix), > PATH_SEPARATOR); > > for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++) > { > fprintf(script, RMDIR_CMD " %s%s%c%d\n", > ! fix_path_separator(os_info.old_tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix), > PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid); > } > *************** create_script_for_old_cluster_deletion(c > *** 640,646 **** > * or a version-specific subdirectory. > */ > fprintf(script, RMDIR_CMD " %s%s\n", > ! fix_path_separator(os_info.tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix)); > } > > --- 640,646 ---- > * or a version-specific subdirectory. > */ > fprintf(script, RMDIR_CMD " %s%s\n", > ! fix_path_separator(os_info.old_tablespaces[tblnum]), > fix_path_separator(old_cluster.tablespace_suffix)); > } > > diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c > new file mode 100644 > index 0c11ff8..7fd4584 > *** a/contrib/pg_upgrade/info.c > --- b/contrib/pg_upgrade/info.c > *************** create_rel_filename_map(const char *old_ > *** 106,125 **** > * relation belongs to the default tablespace, hence relfiles should > * exist in the data directories. > */ > ! snprintf(map->old_dir, sizeof(map->old_dir), "%s/base/%u", old_data, > ! old_db->db_oid); > ! snprintf(map->new_dir, sizeof(map->new_dir), "%s/base/%u", new_data, > ! new_db->db_oid); > } > else > { > /* relation belongs to a tablespace, so use the tablespace location */ > ! snprintf(map->old_dir, sizeof(map->old_dir), "%s%s/%u", old_rel->tablespace, > ! old_cluster.tablespace_suffix, old_db->db_oid); > ! snprintf(map->new_dir, sizeof(map->new_dir), "%s%s/%u", new_rel->tablespace, > ! new_cluster.tablespace_suffix, new_db->db_oid); > } > > /* > * old_relfilenode might differ from pg_class.oid (and hence > * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL. > --- 106,130 ---- > * relation belongs to the default tablespace, hence relfiles should > * exist in the data directories. > */ > ! strlcpy(map->old_tablespace, old_data, sizeof(map->old_tablespace)); > ! strlcpy(map->new_tablespace, new_data, sizeof(map->new_tablespace)); > ! strlcpy(map->old_tablespace_suffix, "/base", sizeof(map->old_tablespace_suffix)); > ! strlcpy(map->new_tablespace_suffix, "/base", sizeof(map->new_tablespace_suffix)); > } > else > { > /* relation belongs to a tablespace, so use the tablespace location */ > ! strlcpy(map->old_tablespace, old_rel->tablespace, sizeof(map->old_tablespace)); > ! strlcpy(map->new_tablespace, new_rel->tablespace, sizeof(map->new_tablespace)); > ! strlcpy(map->old_tablespace_suffix, old_cluster.tablespace_suffix, > ! sizeof(map->old_tablespace_suffix)); > ! strlcpy(map->new_tablespace_suffix, new_cluster.tablespace_suffix, > ! sizeof(map->new_tablespace_suffix)); > } > > + map->old_db_oid = old_db->db_oid; > + map->new_db_oid = new_db->db_oid; > + > /* > * old_relfilenode might differ from pg_class.oid (and hence > * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL. > diff --git a/contrib/pg_upgrade/parallel.c b/contrib/pg_upgrade/parallel.c > new file mode 100644 > index 8ea36bc..d157511 > *** a/contrib/pg_upgrade/parallel.c > --- b/contrib/pg_upgrade/parallel.c > *************** typedef struct { > *** 34,44 **** > char log_file[MAXPGPATH]; > char opt_log_file[MAXPGPATH]; > char cmd[MAX_STRING]; > ! } thread_arg; > > ! thread_arg **thread_args; > > ! DWORD win32_exec_prog(thread_arg *args); > > #endif > > --- 34,57 ---- > char log_file[MAXPGPATH]; > char opt_log_file[MAXPGPATH]; > char cmd[MAX_STRING]; > ! } exec_thread_arg; > > ! typedef struct { > ! DbInfoArr *old_db_arr; > ! DbInfoArr *new_db_arr; > ! char old_pgdata[MAXPGPATH]; > ! char new_pgdata[MAXPGPATH]; > ! char old_tablespace[MAXPGPATH]; > ! } transfer_thread_arg; > > ! exec_thread_arg **exec_thread_args; > ! transfer_thread_arg **transfer_thread_args; > ! > ! /* track current thread_args struct so reap_child() can be used for all cases */ > ! void **cur_thread_args; > ! > ! DWORD win32_exec_prog(exec_thread_arg *args); > ! DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args); > > #endif > > *************** parallel_exec_prog(const char *log_file, > *** 58,64 **** > pid_t child; > #else > HANDLE child; > ! thread_arg *new_arg; > #endif > > va_start(args, fmt); > --- 71,77 ---- > pid_t child; > #else > HANDLE child; > ! exec_thread_arg *new_arg; > #endif > > va_start(args, fmt); > *************** parallel_exec_prog(const char *log_file, > *** 71,77 **** > else > { > /* parallel */ > ! > /* harvest any dead children */ > while (reap_child(false) == true) > ; > --- 84,92 ---- > else > { > /* parallel */ > ! #ifdef WIN32 > ! cur_thread_args = (void **)exec_thread_args; > ! #endif > /* harvest any dead children */ > while (reap_child(false) == true) > ; > *************** parallel_exec_prog(const char *log_file, > *** 100,106 **** > int i; > > thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE)); > ! thread_args = pg_malloc(user_opts.jobs * sizeof(thread_arg *)); > > /* > * For safety and performance, we keep the args allocated during > --- 115,121 ---- > int i; > > thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE)); > ! exec_thread_args = pg_malloc(user_opts.jobs * sizeof(exec_thread_arg *)); > > /* > * For safety and performance, we keep the args allocated during > *************** parallel_exec_prog(const char *log_file, > *** 108,118 **** > * in a thread different from the one that allocated it. > */ > for (i = 0; i < user_opts.jobs; i++) > ! thread_args[i] = pg_malloc(sizeof(thread_arg)); > } > > /* use first empty array element */ > ! new_arg = thread_args[parallel_jobs-1]; > > /* Can only pass one pointer into the function, so use a struct */ > strcpy(new_arg->log_file, log_file); > --- 123,133 ---- > * in a thread different from the one that allocated it. > */ > for (i = 0; i < user_opts.jobs; i++) > ! exec_thread_args[i] = pg_malloc(sizeof(exec_thread_arg)); > } > > /* use first empty array element */ > ! new_arg = exec_thread_args[parallel_jobs-1]; > > /* Can only pass one pointer into the function, so use a struct */ > strcpy(new_arg->log_file, log_file); > *************** parallel_exec_prog(const char *log_file, > *** 134,140 **** > > #ifdef WIN32 > DWORD > ! win32_exec_prog(thread_arg *args) > { > int ret; > > --- 149,155 ---- > > #ifdef WIN32 > DWORD > ! win32_exec_prog(exec_thread_arg *args) > { > int ret; > > *************** win32_exec_prog(thread_arg *args) > *** 147,152 **** > --- 162,273 ---- > > > /* > + * parallel_transfer_all_new_dbs > + * > + * This has the same API as transfer_all_new_dbs, except it does parallel execution > + * by transfering multiple tablespaces in parallel > + */ > + void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, > + char *old_pgdata, char *new_pgdata, > + char *old_tablespace) > + { > + #ifndef WIN32 > + pid_t child; > + #else > + HANDLE child; > + transfer_thread_arg *new_arg; > + #endif > + > + if (user_opts.jobs <= 1) > + /* throw_error must be true to allow jobs */ > + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL); > + else > + { > + /* parallel */ > + #ifdef WIN32 > + cur_thread_args = (void **)transfer_thread_args; > + #endif > + /* harvest any dead children */ > + while (reap_child(false) == true) > + ; > + > + /* must we wait for a dead child? */ > + if (parallel_jobs >= user_opts.jobs) > + reap_child(true); > + > + /* set this before we start the job */ > + parallel_jobs++; > + > + /* Ensure stdio state is quiesced before forking */ > + fflush(NULL); > + > + #ifndef WIN32 > + child = fork(); > + if (child == 0) > + { > + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, > + old_tablespace); > + /* if we take another exit path, it will be non-zero */ > + /* use _exit to skip atexit() functions */ > + _exit(0); > + } > + else if (child < 0) > + /* fork failed */ > + pg_log(PG_FATAL, "could not create worker process: %s\n", strerror(errno)); > + #else > + if (thread_handles == NULL) > + { > + int i; > + > + thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE)); > + transfer_thread_args = pg_malloc(user_opts.jobs * sizeof(transfer_thread_arg *)); > + > + /* > + * For safety and performance, we keep the args allocated during > + * the entire life of the process, and we don't free the args > + * in a thread different from the one that allocated it. > + */ > + for (i = 0; i < user_opts.jobs; i++) > + transfer_thread_args[i] = pg_malloc(sizeof(transfer_thread_arg)); > + } > + > + /* use first empty array element */ > + new_arg = transfer_thread_args[parallel_jobs-1]; > + > + /* Can only pass one pointer into the function, so use a struct */ > + new_arg->old_db_arr = old_db_arr; > + new_arg->new_db_arr = new_db_arr; > + strcpy(new_arg->old_pgdata, old_pgdata); > + strcpy(new_arg->new_pgdata, new_pgdata); > + strcpy(new_arg->old_tablespace, old_tablespace); > + > + child = (HANDLE) _beginthreadex(NULL, 0, (void *) win32_exec_prog, > + new_arg, 0, NULL); > + if (child == 0) > + pg_log(PG_FATAL, "could not create worker thread: %s\n", strerror(errno)); > + > + thread_handles[parallel_jobs-1] = child; > + #endif > + } > + > + return; > + } > + > + > + #ifdef WIN32 > + DWORD > + win32_transfer_all_new_dbs(transfer_thread_arg *args) > + { > + transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, args->old_pgdata, > + args->new_pgdata, args->old_tablespace); > + > + /* terminates thread */ > + return 0; > + } > + #endif > + > + > + /* > * collect status from a completed worker child > */ > bool > *************** reap_child(bool wait_for_child) > *** 195,201 **** > /* Move last slot into dead child's position */ > if (thread_num != parallel_jobs - 1) > { > ! thread_arg *tmp_args; > > thread_handles[thread_num] = thread_handles[parallel_jobs - 1]; > > --- 316,322 ---- > /* Move last slot into dead child's position */ > if (thread_num != parallel_jobs - 1) > { > ! void *tmp_args; > > thread_handles[thread_num] = thread_handles[parallel_jobs - 1]; > > *************** reap_child(bool wait_for_child) > *** 205,213 **** > * reused by the next created thread. Instead, the new thread > * will use the arg struct of the thread that just died. > */ > ! tmp_args = thread_args[thread_num]; > ! thread_args[thread_num] = thread_args[parallel_jobs - 1]; > ! thread_args[parallel_jobs - 1] = tmp_args; > } > #endif > > --- 326,334 ---- > * reused by the next created thread. Instead, the new thread > * will use the arg struct of the thread that just died. > */ > ! tmp_args = cur_thread_args[thread_num]; > ! cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1]; > ! cur_thread_args[parallel_jobs - 1] = tmp_args; > } > #endif > > diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c > new file mode 100644 > index 70c749d..85997e5 > *** a/contrib/pg_upgrade/pg_upgrade.c > --- b/contrib/pg_upgrade/pg_upgrade.c > *************** main(int argc, char **argv) > *** 133,139 **** > if (user_opts.transfer_mode == TRANSFER_MODE_LINK) > disable_old_cluster(); > > ! transfer_all_new_dbs(&old_cluster.dbarr, &new_cluster.dbarr, > old_cluster.pgdata, new_cluster.pgdata); > > /* > --- 133,139 ---- > if (user_opts.transfer_mode == TRANSFER_MODE_LINK) > disable_old_cluster(); > > ! transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr, > old_cluster.pgdata, new_cluster.pgdata); > > /* > diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h > new file mode 100644 > index c1a2f53..d5c3fa9 > *** a/contrib/pg_upgrade/pg_upgrade.h > --- b/contrib/pg_upgrade/pg_upgrade.h > *************** typedef struct > *** 134,141 **** > */ > typedef struct > { > ! char old_dir[MAXPGPATH]; > ! char new_dir[MAXPGPATH]; > > /* > * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes > --- 134,145 ---- > */ > typedef struct > { > ! char old_tablespace[MAXPGPATH]; > ! char new_tablespace[MAXPGPATH]; > ! char old_tablespace_suffix[MAXPGPATH]; > ! char new_tablespace_suffix[MAXPGPATH]; > ! Oid old_db_oid; > ! Oid new_db_oid; > > /* > * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes > *************** typedef struct > *** 276,283 **** > const char *progname; /* complete pathname for this program */ > char *exec_path; /* full path to my executable */ > char *user; /* username for clusters */ > ! char **tablespaces; /* tablespaces */ > ! int num_tablespaces; > char **libraries; /* loadable libraries */ > int num_libraries; > ClusterInfo *running_cluster; > --- 280,287 ---- > const char *progname; /* complete pathname for this program */ > char *exec_path; /* full path to my executable */ > char *user; /* username for clusters */ > ! char **old_tablespaces; /* tablespaces */ > ! int num_old_tablespaces; > char **libraries; /* loadable libraries */ > int num_libraries; > ClusterInfo *running_cluster; > *************** void get_sock_dir(ClusterInfo *cluster, > *** 398,406 **** > /* relfilenode.c */ > > void get_pg_database_relfilenode(ClusterInfo *cluster); > ! void transfer_all_new_dbs(DbInfoArr *olddb_arr, > ! DbInfoArr *newdb_arr, char *old_pgdata, char *new_pgdata); > ! > > /* tablespace.c */ > > --- 402,412 ---- > /* relfilenode.c */ > > void get_pg_database_relfilenode(ClusterInfo *cluster); > ! void transfer_all_new_tablespaces(DbInfoArr *old_db_arr, > ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata); > ! void transfer_all_new_dbs(DbInfoArr *old_db_arr, > ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, > ! char *old_tablespace); > > /* tablespace.c */ > > *************** void old_8_3_invalidate_bpchar_pattern_o > *** 464,472 **** > char *old_8_3_create_sequence_script(ClusterInfo *cluster); > > /* parallel.c */ > ! void parallel_exec_prog(const char *log_file, const char *opt_log_file, > const char *fmt,...) > __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); > ! > ! bool reap_child(bool wait_for_child); > > --- 470,480 ---- > char *old_8_3_create_sequence_script(ClusterInfo *cluster); > > /* parallel.c */ > ! void parallel_exec_prog(const char *log_file, const char *opt_log_file, > const char *fmt,...) > __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); > ! void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, > ! char *old_pgdata, char *new_pgdata, > ! char *old_tablespace); > ! bool reap_child(bool wait_for_child); > > diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c > new file mode 100644 > index 9d0d5a0..cfdd39c > *** a/contrib/pg_upgrade/relfilenode.c > --- b/contrib/pg_upgrade/relfilenode.c > *************** > *** 16,42 **** > > > static void transfer_single_new_db(pageCnvCtx *pageConverter, > ! FileNameMap *maps, int size); > static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, > const char *suffix); > > > /* > * transfer_all_new_dbs() > * > * Responsible for upgrading all database. invokes routines to generate mappings and then > * physically link the databases. > */ > void > ! transfer_all_new_dbs(DbInfoArr *old_db_arr, > ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata) > { > int old_dbnum, > new_dbnum; > > - pg_log(PG_REPORT, "%s user relation files\n", > - user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying"); > - > /* Scan the old cluster databases and transfer their files */ > for (old_dbnum = new_dbnum = 0; > old_dbnum < old_db_arr->ndbs; > --- 16,81 ---- > > > static void transfer_single_new_db(pageCnvCtx *pageConverter, > ! FileNameMap *maps, int size, char *old_tablespace); > static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, > const char *suffix); > > > /* > + * transfer_all_new_tablespaces() > + * > + * Responsible for upgrading all database. invokes routines to generate mappings and then > + * physically link the databases. > + */ > + void > + transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, > + char *old_pgdata, char *new_pgdata) > + { > + pg_log(PG_REPORT, "%s user relation files\n", > + user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying"); > + > + /* > + * Transfering files by tablespace is tricky because a single database > + * can use multiple tablespaces. For non-parallel mode, we just pass a > + * NULL tablespace path, which matches all tablespaces. In parallel mode, > + * we pass the default tablespace and all user-created tablespaces > + * and let those operations happen in parallel. > + */ > + if (user_opts.jobs <= 1) > + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, > + new_pgdata, NULL); > + else > + { > + int tblnum; > + > + for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) > + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, > + new_pgdata, os_info.old_tablespaces[tblnum]); > + /* reap all children */ > + while (reap_child(true) == true) > + ; > + } > + > + end_progress_output(); > + check_ok(); > + > + return; > + } > + > + > + /* > * transfer_all_new_dbs() > * > * Responsible for upgrading all database. invokes routines to generate mappings and then > * physically link the databases. > */ > void > ! transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, > ! char *old_pgdata, char *new_pgdata, char *old_tablespace) > { > int old_dbnum, > new_dbnum; > > /* Scan the old cluster databases and transfer their files */ > for (old_dbnum = new_dbnum = 0; > old_dbnum < old_db_arr->ndbs; > *************** transfer_all_new_dbs(DbInfoArr *old_db_a > *** 75,89 **** > #ifdef PAGE_CONVERSION > pageConverter = setupPageConverter(); > #endif > ! transfer_single_new_db(pageConverter, mappings, n_maps); > > pg_free(mappings); > } > } > > - end_progress_output(); > - check_ok(); > - > return; > } > > --- 114,126 ---- > #ifdef PAGE_CONVERSION > pageConverter = setupPageConverter(); > #endif > ! transfer_single_new_db(pageConverter, mappings, n_maps, > ! old_tablespace); > > pg_free(mappings); > } > } > > return; > } > > *************** get_pg_database_relfilenode(ClusterInfo > *** 125,131 **** > */ > static void > transfer_single_new_db(pageCnvCtx *pageConverter, > ! FileNameMap *maps, int size) > { > int mapnum; > bool vm_crashsafe_match = true; > --- 162,168 ---- > */ > static void > transfer_single_new_db(pageCnvCtx *pageConverter, > ! FileNameMap *maps, int size, char *old_tablespace) > { > int mapnum; > bool vm_crashsafe_match = true; > *************** transfer_single_new_db(pageCnvCtx *pageC > *** 140,157 **** > > for (mapnum = 0; mapnum < size; mapnum++) > { > ! /* transfer primary file */ > ! transfer_relfile(pageConverter, &maps[mapnum], ""); > ! > ! /* fsm/vm files added in PG 8.4 */ > ! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) > { > ! /* > ! * Copy/link any fsm and vm files, if they exist > ! */ > ! transfer_relfile(pageConverter, &maps[mapnum], "_fsm"); > ! if (vm_crashsafe_match) > ! transfer_relfile(pageConverter, &maps[mapnum], "_vm"); > } > } > } > --- 177,198 ---- > > for (mapnum = 0; mapnum < size; mapnum++) > { > ! if (old_tablespace == NULL || > ! strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) > { > ! /* transfer primary file */ > ! transfer_relfile(pageConverter, &maps[mapnum], ""); > ! > ! /* fsm/vm files added in PG 8.4 */ > ! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) > ! { > ! /* > ! * Copy/link any fsm and vm files, if they exist > ! */ > ! transfer_relfile(pageConverter, &maps[mapnum], "_fsm"); > ! if (vm_crashsafe_match) > ! transfer_relfile(pageConverter, &maps[mapnum], "_vm"); > ! } > } > } > } > *************** transfer_relfile(pageCnvCtx *pageConvert > *** 187,196 **** > else > snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); > > ! snprintf(old_file, sizeof(old_file), "%s/%u%s%s", map->old_dir, > ! map->old_relfilenode, type_suffix, extent_suffix); > ! snprintf(new_file, sizeof(new_file), "%s/%u%s%s", map->new_dir, > ! map->new_relfilenode, type_suffix, extent_suffix); > > /* Is it an extent, fsm, or vm file? */ > if (type_suffix[0] != '\0' || segno != 0) > --- 228,239 ---- > else > snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); > > ! snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", map->old_tablespace, > ! map->old_tablespace_suffix, map->old_db_oid, map->old_relfilenode, > ! type_suffix, extent_suffix); > ! snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", map->new_tablespace, > ! map->new_tablespace_suffix, map->new_db_oid, map->new_relfilenode, > ! type_suffix, extent_suffix); > > /* Is it an extent, fsm, or vm file? */ > if (type_suffix[0] != '\0' || segno != 0) > *************** transfer_relfile(pageCnvCtx *pageConvert > *** 239,241 **** > --- 282,285 ---- > > return; > } > + > diff --git a/contrib/pg_upgrade/tablespace.c b/contrib/pg_upgrade/tablespace.c > new file mode 100644 > index a93c517..321738d > *** a/contrib/pg_upgrade/tablespace.c > --- b/contrib/pg_upgrade/tablespace.c > *************** init_tablespaces(void) > *** 23,29 **** > set_tablespace_directory_suffix(&old_cluster); > set_tablespace_directory_suffix(&new_cluster); > > ! if (os_info.num_tablespaces > 0 && > strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0) > pg_log(PG_FATAL, > "Cannot upgrade to/from the same system catalog version when\n" > --- 23,29 ---- > set_tablespace_directory_suffix(&old_cluster); > set_tablespace_directory_suffix(&new_cluster); > > ! if (os_info.num_old_tablespaces > 0 && > strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0) > pg_log(PG_FATAL, > "Cannot upgrade to/from the same system catalog version when\n" > *************** get_tablespace_paths(void) > *** 57,72 **** > > res = executeQueryOrDie(conn, "%s", query); > > ! if ((os_info.num_tablespaces = PQntuples(res)) != 0) > ! os_info.tablespaces = (char **) pg_malloc( > ! os_info.num_tablespaces * sizeof(char *)); > else > ! os_info.tablespaces = NULL; > > i_spclocation = PQfnumber(res, "spclocation"); > > ! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++) > ! os_info.tablespaces[tblnum] = pg_strdup( > PQgetvalue(res, tblnum, i_spclocation)); > > PQclear(res); > --- 57,72 ---- > > res = executeQueryOrDie(conn, "%s", query); > > ! if ((os_info.num_old_tablespaces = PQntuples(res)) != 0) > ! os_info.old_tablespaces = (char **) pg_malloc( > ! os_info.num_old_tablespaces * sizeof(char *)); > else > ! os_info.old_tablespaces = NULL; > > i_spclocation = PQfnumber(res, "spclocation"); > > ! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) > ! os_info.old_tablespaces[tblnum] = pg_strdup( > PQgetvalue(res, tblnum, i_spclocation)); > > PQclear(res); > diff --git a/doc/src/sgml/pgupgrade.sgml b/doc/src/sgml/pgupgrade.sgml > new file mode 100644 > index 53781e4..3e5d548 > *** a/doc/src/sgml/pgupgrade.sgml > --- b/doc/src/sgml/pgupgrade.sgml > *************** NET STOP pgsql-8.3 (<productname>Postgr > *** 342,351 **** > > <para> > The <option>--jobs</> option allows multiple CPU cores to be used > ! to dump and reload database schemas in parallel; a good place to > ! start is the number of CPU cores on the server. This option can > ! dramatically reduce the time to upgrade a multi-database server > ! running on a multiprocessor machine. > </para> > > <para> > --- 342,353 ---- > > <para> > The <option>--jobs</> option allows multiple CPU cores to be used > ! for file copy operations and to dump and reload database schemas in > ! parallel; a good place to start is the number of CPU cores on the > ! server, or the number of tablespaces if not using the > ! <option>--link</> option. This option can dramatically reduce the > ! time to upgrade a multi-database server running on a multiprocessor > ! machine. > </para> > > <para> > : > > . traprm > > export QUIET=$((QUIET + 1)) > > > /rtmp/out > > OLDREL=9.2 > NEWREL=9.3 > BRANCH=tablespace > SSD="f" > > export PGOPTIONS="-c synchronous_commit=off" > > for CYCLES in 2 4 8 16 32 64 128 > do > echo "$CYCLES" >> /rtmp/out > for JOBLIMIT in 1 2 > do > cd /pgsql/$REL > pgsw $BRANCH > cd - > tools/setup $OLDREL $NEWREL > sleep 2 > [ "$SSD" = "f" ] && tools/mv_to_archive > > # need for +16k > for CONFIG in /u/pgsql.old/data/postgresql.conf /u/pgsql/data/postgresql.conf > do > pipe sed 's/#max_locks_per_transaction = 64/max_locks_per_transaction = 64000/' "$CONFIG" > pipe sed 's/shared_buffers = 128MB/shared_buffers = 1GB/' "$CONFIG" > done > > pgstart -w /u/pgsql.old/data > for DIR in /archive/tmp/t1 /backup0/tmp/t2 > do rm -rf "$DIR" > mkdir "$DIR" > chown postgres "$DIR" > chmod 0700 "$DIR" > done > > echo "CREATE TABLESPACE t1 LOCATION '/archive/tmp/t1';" | sql --echo-all test > echo "CREATE TABLESPACE t2 LOCATION '/backup0/tmp/t2';" | sql --echo-all test > > for SPNO in $(jot 2) > do > for TBLNO in $(jot $(($CYCLES / 2)) ) > do > echo "CREATE TABLE test${SPNO}_${TBLNO} (x TEXT) TABLESPACE t$SPNO;" > echo "ALTER TABLE test${SPNO}_${TBLNO} ALTER COLUMN x SET STORAGE EXTERNAL;" > echo "INSERT INTO test${SPNO}_${TBLNO} SELECT repeat('x', 999000000);" > done | > PGOPTIONS="-c synchronous_commit=off" sql --single-transaction --echo-all test > done > pgstop /u/pgsql.old/data > sleep 2 > # clear cache > echo 3 > /proc/sys/vm/drop_caches > # allow system to repopulate > sleep 15 > /usr/bin/time --output=/rtmp/out --append --format '%e' tools/upgrade -j $JOBLIMIT || exit > sleep 2 > done > done > > bell > > -- > Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) > To make changes to your subscription: > http://www.postgresql.org/mailpref/pgsql-hackers -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
pgsql-hackers by date: