From 09a261ef837420aaceef9d352a3626cf5b126797 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 9 Mar 2025 03:06:51 +0200 Subject: [PATCH v1] revert: reindexdb: Add the index-level REINDEX with multiple jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit reverts 47f99a407d. The code was written assuming that running run_reindex_command() with async == true can schedule a number of queries for a connection. That's not true, and the second query sent using run_reindex_command() will wait for the completion of the previous one. That makes this feature almost useless. However, there is still an ability to call index-level reindex in parallel mode. It would issue a warning and fallback to serial mode, as it might already be in some user scripts. Reported-by: Álvaro Herrera Discussion: https://postgr.es/m/202503071820.j25zn3lo4hvn%40alvherre.pgsql --- doc/src/sgml/ref/reindexdb.sgml | 3 +- src/bin/scripts/reindexdb.c | 162 +++++------------------------ src/bin/scripts/t/090_reindexdb.pl | 8 +- 3 files changed, 31 insertions(+), 142 deletions(-) diff --git a/doc/src/sgml/ref/reindexdb.sgml b/doc/src/sgml/ref/reindexdb.sgml index 98c3333228f..a877439dc5b 100644 --- a/doc/src/sgml/ref/reindexdb.sgml +++ b/doc/src/sgml/ref/reindexdb.sgml @@ -179,7 +179,8 @@ PostgreSQL documentation setting is high enough to accommodate all connections. - Note that this option is incompatible with the option. + Note that this option is incompatible with the + and options. diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c index b00c8112869..5ddd83f3828 100644 --- a/src/bin/scripts/reindexdb.c +++ b/src/bin/scripts/reindexdb.c @@ -205,8 +205,23 @@ main(int argc, char *argv[]) setup_cancel_handler(NULL); - if (concurrentCons > 1 && syscatalog) - pg_fatal("cannot use multiple jobs to reindex system catalogs"); + if (concurrentCons > 1) + { + /* + * Index-level REINDEX is not supported with multiple jobs as we + * cannot control the concurrent processing of multiple indexes + * depending on the same relation. + */ + if (indexes.head != NULL) + { + pg_log_warning("cannot use multiple jobs to reindex indexes, " + "fallback to single job"); + concurrentCons = 1; + } + + if (syscatalog) + pg_fatal("cannot use multiple jobs to reindex system catalogs"); + } if (alldb) { @@ -246,7 +261,7 @@ main(int argc, char *argv[]) if (indexes.head != NULL) reindex_one_database(&cparams, REINDEX_INDEX, &indexes, progname, echo, verbose, - concurrently, concurrentCons, tablespace); + concurrently, 1, tablespace); if (tables.head != NULL) reindex_one_database(&cparams, REINDEX_TABLE, &tables, @@ -276,16 +291,12 @@ reindex_one_database(ConnParams *cparams, ReindexType type, { PGconn *conn; SimpleStringListCell *cell; - SimpleStringListCell *indices_tables_cell = NULL; bool parallel = concurrentCons > 1; SimpleStringList *process_list = user_list; - SimpleStringList *indices_tables_list = NULL; ReindexType process_type = type; ParallelSlotArray *sa; bool failed = false; int items_count = 0; - char *prev_index_table_name = NULL; - ParallelSlot *free_slot = NULL; conn = connectDatabase(cparams, progname, echo, false, true); @@ -361,36 +372,8 @@ reindex_one_database(ConnParams *cparams, ReindexType type, } break; - case REINDEX_INDEX: - Assert(user_list != NULL); - - /* - * Build a list of relations from the indices. This will - * accordingly reorder the list of indices too. - */ - indices_tables_list = get_parallel_object_list(conn, process_type, - user_list, echo); - - /* - * Bail out if nothing to process. 'user_list' was modified - * in-place, so check if it has at least one cell. - */ - if (user_list->head == NULL) - { - PQfinish(conn); - return; - } - - /* - * Assuming 'user_list' is not empty, 'indices_tables_list' - * shouldn't be empty as well. - */ - Assert(indices_tables_list != NULL); - indices_tables_cell = indices_tables_list->head; - - break; - case REINDEX_SYSTEM: + case REINDEX_INDEX: /* not supported */ Assert(false); break; @@ -431,7 +414,7 @@ reindex_one_database(ConnParams *cparams, ReindexType type, do { const char *objname = cell->val; - bool need_new_slot = true; + ParallelSlot *free_slot = NULL; if (CancelRequested) { @@ -439,33 +422,14 @@ reindex_one_database(ConnParams *cparams, ReindexType type, goto finish; } - /* - * For parallel index-level REINDEX, the indices of the same table are - * ordered together and they are to be processed by the same job. So, - * we don't switch the job as soon as the index belongs to the same - * table as the previous one. - */ - if (parallel && process_type == REINDEX_INDEX) + free_slot = ParallelSlotsGetIdle(sa, NULL); + if (!free_slot) { - if (prev_index_table_name != NULL && - strcmp(prev_index_table_name, indices_tables_cell->val) == 0) - need_new_slot = false; - prev_index_table_name = indices_tables_cell->val; - indices_tables_cell = indices_tables_cell->next; - } - - if (need_new_slot) - { - free_slot = ParallelSlotsGetIdle(sa, NULL); - if (!free_slot) - { - failed = true; - goto finish; - } - - ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); + failed = true; + goto finish; } + ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); run_reindex_command(free_slot->connection, process_type, objname, echo, verbose, concurrently, true, tablespace); @@ -482,12 +446,6 @@ finish: pg_free(process_list); } - if (indices_tables_list) - { - simple_string_list_destroy(indices_tables_list); - pg_free(indices_tables_list); - } - ParallelSlotsTerminate(sa); pfree(sa); @@ -705,61 +663,8 @@ get_parallel_object_list(PGconn *conn, ReindexType type, } break; - case REINDEX_INDEX: - { - SimpleStringListCell *cell; - - Assert(user_list != NULL); - - /* - * Straight-forward index-level REINDEX is not supported with - * multiple jobs as we cannot control the concurrent - * processing of multiple indexes depending on the same - * relation. But we can extract the appropriate table name - * for the index and put REINDEX INDEX commands into different - * jobs, according to the parent tables. - * - * We will order the results to group the same tables - * together. We fetch index names as well to build a new list - * of them with matching order. - */ - appendPQExpBufferStr(&catalog_query, - "SELECT t.relname, n.nspname, i.relname\n" - "FROM pg_catalog.pg_index x\n" - "JOIN pg_catalog.pg_class t ON t.oid = x.indrelid\n" - "JOIN pg_catalog.pg_class i ON i.oid = x.indexrelid\n" - "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = t.relnamespace\n" - "WHERE x.indexrelid OPERATOR(pg_catalog.=) ANY(ARRAY['"); - - for (cell = user_list->head; cell; cell = cell->next) - { - if (cell != user_list->head) - appendPQExpBufferStr(&catalog_query, "', '"); - - appendQualifiedRelation(&catalog_query, cell->val, conn, echo); - } - - /* - * Order tables by the size of its greatest index. Within the - * table, order indexes by their sizes. - */ - appendPQExpBufferStr(&catalog_query, - "']::pg_catalog.regclass[])\n" - "ORDER BY max(i.relpages) OVER \n" - " (PARTITION BY n.nspname, t.relname),\n" - " n.nspname, t.relname, i.relpages;\n"); - - /* - * We're going to re-order the user_list to match the order of - * tables. So, empty the user_list to fill it from the query - * result. - */ - simple_string_list_destroy(user_list); - user_list->head = user_list->tail = NULL; - } - break; - case REINDEX_SYSTEM: + case REINDEX_INDEX: case REINDEX_TABLE: Assert(false); break; @@ -791,21 +696,6 @@ get_parallel_object_list(PGconn *conn, ReindexType type, simple_string_list_append(tables, buf.data); resetPQExpBuffer(&buf); - - if (type == REINDEX_INDEX) - { - /* - * For index-level REINDEX, rebuild the list of indexes to match - * the order of tables list. - */ - appendPQExpBufferStr(&buf, - fmtQualifiedIdEnc(PQgetvalue(res, i, 1), - PQgetvalue(res, i, 2), - PQclientEncoding(conn))); - - simple_string_list_append(user_list, buf.data); - resetPQExpBuffer(&buf); - } } termPQExpBuffer(&buf); PQclear(res); diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl index 378f8ad7a58..2c4b641c9b2 100644 --- a/src/bin/scripts/t/090_reindexdb.pl +++ b/src/bin/scripts/t/090_reindexdb.pl @@ -263,11 +263,9 @@ $node->safe_psql( CREATE SCHEMA s1; CREATE TABLE s1.t1(id integer); CREATE INDEX ON s1.t1(id); - CREATE INDEX i1 ON s1.t1(id); CREATE SCHEMA s2; CREATE TABLE s2.t2(id integer); CREATE INDEX ON s2.t2(id); - CREATE INDEX i2 ON s2.t2(id); -- empty schema CREATE SCHEMA s3; |); @@ -279,11 +277,11 @@ $node->command_ok( [ 'reindexdb', '--jobs' => '2', - '--index' => 's1.i1', - '--index' => 's2.i2', + '--index' => 's1.t1_id_idx', + '--index' => 's2.t2_id_idx', 'postgres', ], - 'parallel reindexdb for indices'); + 'failover for parallel reindexdb for indices'); # Note that the ordering of the commands is not stable, so the second # command for s2.t2 is not checked after. $node->issues_sql_like( -- 2.39.5 (Apple Git-154)