From 7d6ad6345b97d6b9588dcdab39fab614c8cb6ece Mon Sep 17 00:00:00 2001 From: David Zhang Date: Thu, 17 Nov 2022 12:22:16 -0800 Subject: [PATCH 2/4] support global unique index create --- src/backend/access/nbtree/nbtsort.c | 364 ++++++++++++++++++++++- src/backend/commands/indexcmds.c | 158 ++++++++++ src/backend/storage/file/sharedfileset.c | 10 + src/backend/utils/sort/tuplesort.c | 55 +++- src/include/commands/defrem.h | 2 + src/include/nodes/execnodes.h | 5 + src/include/nodes/parsenodes.h | 4 + src/include/storage/sharedfileset.h | 1 + src/include/utils/tuplesort.h | 10 + src/test/regress/expected/indexing.out | 12 + src/test/regress/sql/indexing.sql | 12 + 11 files changed, 622 insertions(+), 11 deletions(-) diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 501e011ce1..deb399e36a 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -71,6 +71,7 @@ #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) #define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xA000000000000005) #define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000006) +#define PARALLEL_KEY_TUPLESORT_GLOBAL UINT64CONST(0xA000000000000007) /* * DISABLE_LEADER_PARTICIPATION disables the leader's participation in @@ -110,6 +111,7 @@ typedef struct BTShared bool nulls_not_distinct; bool isconcurrent; int scantuplesortstates; + bool isglobal; /* * workersdonecv is used to monitor the progress of workers. All parallel @@ -197,6 +199,7 @@ typedef struct BTLeader Snapshot snapshot; WalUsage *walusage; BufferUsage *bufferusage; + Sharedsort *sharedsortglobal; } BTLeader; /* @@ -226,6 +229,16 @@ typedef struct BTBuildState * BTBuildState. Workers have their own spool and spool2, though.) */ BTLeader *btleader; + + /* + * global unique index related parameters + */ + BTSpool *spoolglobal; /* spoolglobal is used on global unique index + * build in parallel */ + bool global_index; /* true if index is global */ + int globalIndexPart; /* partition number indication */ + int nparts; /* number of partitions involved in global + * unique index build in parallel */ } BTBuildState; /* @@ -291,8 +304,29 @@ static void _bt_leader_participate_as_worker(BTBuildState *buildstate); static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, - bool progress); + bool progress, Sharedsort *sharedsortglobal, + bool isworker); +static BTSpool *global_btspool; +Sharedsort *global_sharedsort; + +static void +btinit_global_spool(Relation heap, Relation index, BTBuildState *buildstate) +{ + elog(DEBUG2, "%s: init global index spool", __FUNCTION__); + global_btspool = (BTSpool *) palloc0(sizeof(BTSpool)); + global_btspool->heap = heap; + global_btspool->index = index; + global_btspool->isunique = buildstate->isunique; + + global_btspool->sortstate = + tuplesort_begin_index_btree(heap, index, buildstate->isunique, + buildstate->nulls_not_distinct, + maintenance_work_mem, NULL, + false); + + tuplesort_mark_global_sort(global_btspool->sortstate); +} /* * btbuild() -- build a new btree index. @@ -318,22 +352,149 @@ btbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.indtuples = 0; buildstate.btleader = NULL; + if (indexInfo->ii_Global_index && indexInfo->ii_Unique) + { + /* copy global unique index related parameters to buildstate */ + buildstate.global_index = indexInfo->ii_Global_index; + buildstate.globalIndexPart = indexInfo->ii_GlobalIndexPart; + buildstate.nparts = indexInfo->ii_Nparts; + } + else + { + /* disable global unique check */ + buildstate.global_index = false; + buildstate.globalIndexPart = 0; + buildstate.nparts = 0; + } + /* * We expect to be called exactly once for any index relation. If that's - * not the case, big trouble's what we have. + * not the case, big trouble's what we have unless you set + * buildGlobalSpool to true, which only builds the global spool for global + * uniqueness check and not physical index tuples */ - if (RelationGetNumberOfBlocks(index) != 0) + if (indexInfo->ii_BuildGlobalSpool == false && RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo); + if (indexInfo->ii_ParallelWorkers > 0) + { + /* + * global uniqueness check in parallel build case + */ + if (indexInfo->ii_Global_index && indexInfo->ii_Unique && global_btspool) + { + /* + * indexInfo->ii_GlobalIndexPart <= 0 indicates the first and + * intermediate partition to build index on. For parallel global + * unique index build, we need to clean up global_btspool + * structure to prevent resource leak + */ + if (indexInfo->ii_GlobalIndexPart <= 0) + { + _bt_spooldestroy(global_btspool); + global_btspool = NULL; + } + + /* + * indexInfo->ii_GlobalIndexPart > 0 indicates the last partition + * to build index on. For parallel global unique index build, we + * need to call tuplesort_performsort to merge all the tapes + * created from previous partition build runs and do a final + * sorting to determine global uniqueness + */ + if (indexInfo->ii_GlobalIndexPart > 0) + { + IndexTuple itup; + + elog(DEBUG2, "last partitioned to build global index in parallel. Perform merge run and " + "uniqueness check now..."); + tuplesort_performsort(buildstate.spoolglobal->sortstate); + + /* + * this loop checks for uniqueness after all tapes have been + * merged. If a duplicate is found, we will error out. + */ + while ((itup = tuplesort_getindextuple(buildstate.spoolglobal->sortstate, + true)) != NULL) + { + /* + * simply checking for global uniqueness, nothing to do + * here + */ + } + + /* + * no global uniqueness violation is found at this point, + * remove all the tapes (temp files) and destroy resources and + * continue to build the actual index. + */ + _bt_spooldestroy(buildstate.spoolglobal); + _bt_spooldestroy(global_btspool); + global_btspool = NULL; + if (global_sharedsort) + { + pfree(global_sharedsort); + global_sharedsort = NULL; + } + } + } + } + else + { + /* + * global uniqueness check in serial build case + */ + if (indexInfo->ii_GlobalIndexPart > 0 && indexInfo->ii_Global_index && + indexInfo->ii_Unique && global_btspool) + { + /* + * indexInfo->ii_GlobalIndexPart > 0 indicates the last partition + * to build index on. For serial global unique index build, we + * call tuplesort_performsort on global_btspool->sortstate which + * should contain index tuples from all partitions. If a duplicate + * is found, we will error out. + */ + elog(DEBUG2, "last partitioned to build global index serially. Sorting global_btspool for " + "uniqueness check now..."); + tuplesort_performsort(global_btspool->sortstate); + + /* + * no global uniqueness violation is found at this point, destroy + * global_btspool structure and continue to build the actual + * index. + */ + _bt_spooldestroy(global_btspool); + global_btspool = NULL; + } + } + /* - * Finish the build by (1) completing the sort of the spool file, (2) - * inserting the sorted tuples into btree pages and (3) building the upper - * levels. Finally, it may also be necessary to end use of parallelism. + * if indexInfo->ii_BuildGlobalSpool is set, we will not continue to build + * the actual index. This is used during a new partition attach, where we + * just want to populate the global_btspool from current partitions + * without building the actual indexes (because they exist already). Then, + * we take that global_btspool and sort it with the tuples in newly + * attached partition to determine if attach would violate global + * uniquenes check */ - _bt_leafbuild(buildstate.spool, buildstate.spool2); + if (indexInfo->ii_BuildGlobalSpool) + { + elog(DEBUG2, "ii_BuildGlobalSpool is set. Skip building actual index content"); + } + else + { + /* + * Finish the build by (1) completing the sort of the spool file, (2) + * inserting the sorted tuples into btree pages and (3) building the + * upper levels. Finally, it may also be necessary to end use of + * parallelism. + */ + _bt_leafbuild(buildstate.spool, buildstate.spool2); + } + _bt_spooldestroy(buildstate.spool); if (buildstate.spool2) _bt_spooldestroy(buildstate.spool2); @@ -374,6 +535,7 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool)); SortCoordinate coordinate = NULL; double reltuples = 0; + SortCoordinate coordinateglobal = NULL; /* * We size the sort area as maintenance_work_mem rather than work_mem to @@ -395,8 +557,40 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, /* Attempt to launch parallel worker scan when required */ if (indexInfo->ii_ParallelWorkers > 0) + { + /* + * while building the last partition table in parallel global unique + * index build, we need to allocate a primary spoolglobal, which will + * be used in the leader process later to "take over" all tapes + * created by previous partition runs + */ + if (buildstate->isunique && buildstate->global_index && + buildstate->globalIndexPart > 0) + { + elog(DEBUG2, "init primary spoolglobal in last partition for parallel index build"); + buildstate->spoolglobal = (BTSpool *) palloc0(sizeof(BTSpool)); + buildstate->spoolglobal->heap = heap; + buildstate->spoolglobal->index = index; + buildstate->spoolglobal->isunique = indexInfo->ii_Unique; + } _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent, indexInfo->ii_ParallelWorkers); + } + else + { + /* + * in serial global unique index build, we just need to allocate a + * single global_btspool structure at the first partition build. The + * rest of the partitions will add their index tuples to this single + * global spool structure + */ + if (buildstate->isunique && buildstate->global_index && + buildstate->globalIndexPart < 0) + { + elog(DEBUG2, "init new global_btspool for serial index build"); + btinit_global_spool(heap, index, buildstate); + } + } /* * If parallel build requested and at least one worker process was @@ -409,6 +603,22 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, coordinate->nParticipants = buildstate->btleader->nparticipanttuplesorts; coordinate->sharedsort = buildstate->btleader->sharedsort; + + /* + * set up a coordinator state for primary spoolglobal if we are doing + * global unique index build in parallel. We do this at the last + * partition create in parallel mode + */ + if (buildstate->isunique && buildstate->global_index && buildstate->globalIndexPart > 0) + { + elog(DEBUG2, "set up coordinate state for primary spoolglobal for " + "parallel index build case"); + coordinateglobal = (SortCoordinate) palloc0(sizeof(SortCoordinateData)); + coordinateglobal->isWorker = false; + coordinateglobal->nParticipants = + tuplesort_get_curr_workers(buildstate->btleader->sharedsortglobal); + coordinateglobal->sharedsort = buildstate->btleader->sharedsortglobal; + } } /* @@ -438,6 +648,23 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, maintenance_work_mem, coordinate, TUPLESORT_NONE); + /* + * initialize primary spoolglobal if we are doing global unique index + * build in parallel. We do this at the last partition create in parallel + * mode + */ + if (buildstate->btleader && buildstate->isunique && + buildstate->global_index && buildstate->globalIndexPart > 0) + { + elog(DEBUG2, "tuplesort_begin_index_btree for primary spoolglobal for " + "parallel index build case"); + buildstate->spoolglobal->sortstate = + tuplesort_begin_index_btree(heap, index, buildstate->isunique, + buildstate->nulls_not_distinct, + maintenance_work_mem, coordinateglobal, + false); + } + /* * If building a unique index, put dead tuples in a second spool to keep * them out of the uniqueness check. We expect that the second spool (for @@ -487,6 +714,19 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, reltuples = _bt_parallel_heapscan(buildstate, &indexInfo->ii_BrokenHotChain); + /* + * all parallel workers should finish at this point, make backup of + * sharedsortglobal, which is needed to persist the logical tape and temp + * file information for next partition build when building global unique + * index in parallel + */ + if (buildstate->btleader && buildstate->global_index && + buildstate->isunique) + { + elog(DEBUG2, "all workers finished, backup sharedsortglobal"); + tuplesort_copy_sharedsort2(global_sharedsort, global_btspool->sortstate); + } + /* * Set the progress target for the next phase. Reset the block number * values set by table_index_build_scan @@ -599,7 +839,11 @@ _bt_build_callback(Relation index, * processing */ if (tupleIsAlive || buildstate->spool2 == NULL) + { _bt_spool(buildstate->spool, tid, values, isnull); + if (buildstate->global_index && buildstate->isunique && global_btspool) + _bt_spool(global_btspool, tid, values, isnull); + } else { /* dead tuples are put into spool2 */ @@ -1458,9 +1702,11 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) Snapshot snapshot; Size estbtshared; Size estsort; + Size estsortglobal = 0; BTShared *btshared; Sharedsort *sharedsort; Sharedsort *sharedsort2; + Sharedsort *sharedsortglobal; BTSpool *btspool = buildstate->spool; BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader)); WalUsage *walusage; @@ -1504,6 +1750,13 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) estsort = tuplesort_estimate_shared(scantuplesortstates); shm_toc_estimate_chunk(&pcxt->estimator, estsort); + if (buildstate->isunique && buildstate->global_index) + { + /* global unique index case will estimate 1 more sharesort struct */ + estsortglobal = tuplesort_estimate_shared(scantuplesortstates * buildstate->nparts); + shm_toc_estimate_chunk(&pcxt->estimator, estsortglobal); + } + /* * Unique case requires a second spool, and so we may have to account for * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2 @@ -1571,6 +1824,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btshared->havedead = false; btshared->indtuples = 0.0; btshared->brokenhotchain = false; + btshared->isglobal = buildstate->global_index; table_parallelscan_initialize(btspool->heap, ParallelTableScanFromBTShared(btshared), snapshot); @@ -1603,6 +1857,43 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2); } + if (buildstate->isunique && buildstate->global_index) + { + /* global unique index case will allocate 1 more sharesort struct */ + sharedsortglobal = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsortglobal); + if (!sharedsortglobal) + elog(ERROR, "failed to allocate shared memory space"); + + if (buildstate->globalIndexPart == -1) + { + elog(DEBUG2, "initialize and make a copy of sharedsortglobal for first time"); + tuplesort_initialize_shared(sharedsortglobal, scantuplesortstates * buildstate->nparts, NULL); + + /* save a copy of sharedsortglobal */ + global_sharedsort = (Sharedsort *) palloc(estsortglobal); + tuplesort_copy_sharedsort(global_sharedsort, sharedsortglobal); + } + else if (buildstate->globalIndexPart == 0 || buildstate->globalIndexPart == 1) + { + elog(DEBUG2, "restore the copy of sharedsortglobal for subsequent processing"); + tuplesort_copy_sharedsort(sharedsortglobal, global_sharedsort); + + /* register for cleanup at the last partition index build */ + if (buildstate->globalIndexPart == 1) + { + tuplesort_register_cleanup_callback(sharedsortglobal, pcxt->seg); + } + } + else + { + elog(ERROR, "invalid global inedx partition value %d", buildstate->globalIndexPart); + } + + shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_GLOBAL, sharedsortglobal); + } + else + sharedsortglobal = NULL; + /* Store query string for workers */ if (debug_query_string) { @@ -1636,6 +1927,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btleader->snapshot = snapshot; btleader->walusage = walusage; btleader->bufferusage = bufferusage; + btleader->sharedsortglobal = sharedsortglobal; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -1780,7 +2072,8 @@ _bt_leader_participate_as_worker(BTBuildState *buildstate) /* Perform work common to all participants */ _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared, btleader->sharedsort, btleader->sharedsort2, - sortmem, true); + sortmem, true, btleader->sharedsortglobal, + false); #ifdef BTREE_BUILD_STATS if (log_btree_build_stats) @@ -1803,6 +2096,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) BTShared *btshared; Sharedsort *sharedsort; Sharedsort *sharedsort2; + Sharedsort *sharedsortglobal; Relation heapRel; Relation indexRel; LOCKMODE heapLockmode; @@ -1878,13 +2172,26 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) tuplesort_attach_shared(sharedsort2, seg); } + if (btshared->isunique && btshared->isglobal) + { + sharedsortglobal = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_GLOBAL, false); + tuplesort_attach_shared(sharedsortglobal, seg); + elog(DEBUG2, "worker %d processing global unique index", MyProcPid); + } + else + { + sharedsortglobal = NULL; + elog(DEBUG2, "worker %d processing regular index", MyProcPid); + } + /* Prepare to track buffer usage during parallel execution */ InstrStartParallelQuery(); /* Perform sorting of spool, and possibly a spool2 */ sortmem = maintenance_work_mem / btshared->scantuplesortstates; _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort, - sharedsort2, sortmem, false); + sharedsort2, sortmem, false, + sharedsortglobal, true); /* Report WAL/buffer usage during parallel execution */ bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); @@ -1919,7 +2226,8 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, - Sharedsort *sharedsort2, int sortmem, bool progress) + Sharedsort *sharedsort2, int sortmem, bool progress, + Sharedsort *sharedsortglobal, bool isworker) { SortCoordinate coordinate; BTBuildState buildstate; @@ -1965,6 +2273,28 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, false); } + + /* global index */ + if (sharedsortglobal) + { + SortCoordinate coordinate3; + + global_btspool = (BTSpool *) palloc0(sizeof(BTSpool)); + global_btspool->heap = btspool->heap; + global_btspool->index = btspool->index; + global_btspool->isunique = btspool->isunique; + + coordinate3 = palloc0(sizeof(SortCoordinateData)); + coordinate3->isWorker = true; + coordinate3->nParticipants = -1; + coordinate3->sharedsort = sharedsortglobal; + global_btspool->sortstate = + tuplesort_begin_index_btree(global_btspool->heap, global_btspool->index, global_btspool->isunique, + btspool->nulls_not_distinct, sortmem, coordinate3, + false); + } + + /* Fill in buildstate for _bt_build_callback() */ buildstate.isunique = btshared->isunique; buildstate.nulls_not_distinct = btshared->nulls_not_distinct; @@ -1975,6 +2305,12 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, buildstate.indtuples = 0; buildstate.btleader = NULL; + if (btshared->isglobal && btshared->isunique) + { + /* fill global unique index related parameters in buildstate */ + buildstate.global_index = btshared->isglobal; + } + /* Join parallel scan */ indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; @@ -1997,6 +2333,11 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, tuplesort_performsort(btspool2->sortstate); } + if (global_btspool) + { + tuplesort_performsort(global_btspool->sortstate); + } + /* * Done. Record ambuild statistics, and whether we encountered a broken * HOT chain. @@ -2018,4 +2359,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, tuplesort_end(btspool->sortstate); if (btspool2) tuplesort_end(btspool2->sortstate); + + if (global_btspool && isworker) + tuplesort_end(global_btspool->sortstate); } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 099bf68e77..51f6b46c99 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1145,6 +1145,13 @@ DefineIndex(Oid relationId, flags |= INDEX_CREATE_PARTITIONED; if (stmt->primary) flags |= INDEX_CREATE_IS_PRIMARY; + /* copy the partition indication -1 = first, 0 = N/A, 1 = last */ + if (stmt->global_index) + { + indexInfo->ii_Global_index = stmt->global_index; + indexInfo->ii_GlobalIndexPart = stmt->globalIndexPart; + indexInfo->ii_Nparts = stmt->nparts; + } /* * If the table is partitioned, and recursion was declined but partitions @@ -1380,6 +1387,24 @@ DefineIndex(Oid relationId, bool found_whole_row; ListCell *lc; + if (i == nparts - 1 && stmt->global_index) + { + elog(DEBUG2, "mark as last partitioned to scan"); + childStmt->globalIndexPart = 1; + } + + if (i == 0 && stmt->global_index) + { + elog(DEBUG2, "mark as first partitioned to scan"); + childStmt->globalIndexPart = -1; + } + + if (stmt->global_index) + { + childStmt->nparts = nparts; + elog(DEBUG2, "total partitions to build global index %d", childStmt->nparts); + } + /* * We can't use the same index name for the child index, * so clear idxname to let the recursive invocation choose @@ -4367,3 +4392,136 @@ set_indexsafe_procflags(void) ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; LWLockRelease(ProcArrayLock); } + +bool +PopulateGlobalSpool(Relation idxRel, Relation heapRel, IndexStmt *stmt) +{ + IndexInfo *idxinfo; + int numberOfKeyAttributes; + int numberOfAttributes; + List *allIndexParams; + Oid accessMethodId; + Form_pg_am accessMethodForm; + char *accessMethodName; + HeapTuple tuple; + bool concurrent; + Oid *typeObjectId; + Oid *collationObjectId; + Oid *classObjectId; + int16 *coloptions; + IndexAmRoutine *amRoutine; + bool amcanorder; + Oid root_save_userid; + int root_save_sec_context; + int root_save_nestlevel; + + root_save_nestlevel = NewGUCNestLevel(); + + if (stmt->concurrent && get_rel_persistence(RelationGetRelid(heapRel)) != RELPERSISTENCE_TEMP) + concurrent = true; + else + concurrent = false; + + allIndexParams = list_concat_copy(stmt->indexParams, + stmt->indexIncludingParams); + + numberOfKeyAttributes = list_length(stmt->indexParams); + numberOfAttributes = list_length(allIndexParams); + + accessMethodName = stmt->accessMethod; + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + if (!HeapTupleIsValid(tuple)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + accessMethodName))); + } + + accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); + accessMethodId = accessMethodForm->oid; + amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler); + + GetUserIdAndSecContext(&root_save_userid, &root_save_sec_context); + SetUserIdAndSecContext(heapRel->rd_rel->relowner, + root_save_sec_context | SECURITY_RESTRICTED_OPERATION); + + idxinfo = makeIndexInfo(numberOfAttributes, + numberOfKeyAttributes, + accessMethodId, + NIL, /* expressions, NIL for now */ + make_ands_implicit((Expr *) stmt->whereClause), + stmt->unique, + stmt->nulls_not_distinct, + !concurrent, + concurrent); + + typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16)); + amcanorder = amRoutine->amcanorder; + + pfree(amRoutine); + ReleaseSysCache(tuple); + + ComputeIndexAttrs(idxinfo, + typeObjectId, collationObjectId, classObjectId, + coloptions, allIndexParams, + stmt->excludeOpNames, RelationGetRelid(heapRel), + accessMethodName, accessMethodId, + amcanorder, stmt->isconstraint, root_save_userid, + root_save_sec_context, &root_save_nestlevel); + + /* Fill global unique index related parameters */ + idxinfo->ii_GlobalIndexPart = stmt->globalIndexPart; + idxinfo->ii_BuildGlobalSpool = true; + idxinfo->ii_Nparts = stmt->nparts; + idxinfo->ii_Global_index = stmt->global_index; + + /* + * Determine worker process details for parallel CREATE INDEX. Currently, + * only btree has support for parallel builds. + */ + if (IsNormalProcessingMode() && idxRel->rd_rel->relam == BTREE_AM_OID) + { + idxinfo->ii_ParallelWorkers = + plan_create_index_workers(RelationGetRelid(heapRel), + RelationGetRelid(idxRel)); + } + + idxRel->rd_indam->ambuild(heapRel, idxRel, + idxinfo); + + return true; +} + +void +ChangeRelKind(Relation idxRel, char kind) +{ + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(idxRel))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(idxRel)); + + classform = (Form_pg_class) GETSTRUCT(tuple); + + classform->relkind = kind; + idxRel->rd_rel->relkind = kind; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + heap_freetuple(tuple); + + table_close(pg_class, RowExclusiveLock); +} diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c index 0782f50ba6..d82d1abcc6 100644 --- a/src/backend/storage/file/sharedfileset.c +++ b/src/backend/storage/file/sharedfileset.c @@ -91,6 +91,16 @@ SharedFileSetDeleteAll(SharedFileSet *fileset) FileSetDeleteAll(&fileset->fs); } +/* + * Register cleanup callback of an already initialized fileset. + */ +void +SharedFileSetRegisterCleanupCallback(SharedFileSet *fileset, dsm_segment *seg) +{ + /* Register our cleanup callback. */ + if (seg) + on_dsm_detach(seg, SharedFileSetOnDetach, PointerGetDatum(fileset)); +} /* * Callback function that will be invoked when this backend detaches from a * DSM segment holding a SharedFileSet that it has created or attached to. If diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 416f02ba3c..ef613e2fe2 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -328,6 +328,7 @@ struct Tuplesortstate */ int64 abbrevNext; /* Tuple # at which to next check * applicability */ + bool isglobalsort; /* * Resource snapshot for time of sort start. @@ -2190,7 +2191,14 @@ mergeruns(Tuplesortstate *state) /* Tell logtape.c we won't be writing anymore */ LogicalTapeSetForgetFreeSpace(state->tapeset); /* Initialize for the final merge pass */ - beginmerge(state); + if (state->isglobalsort) + { + elog(DEBUG2, "global unique index final merge run..."); + mergeonerun(state); + } + else + beginmerge(state); + state->status = TSS_FINALMERGE; return; } @@ -3002,6 +3010,51 @@ tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) SharedFileSetAttach(&shared->fileset, seg); } +void +tuplesort_mark_global_sort(Tuplesortstate *state) +{ + state->isglobalsort = true; +} + +void +tuplesort_copy_sharedsort(Sharedsort *shared1, Sharedsort *shared2) +{ + if (!shared1 || !shared2) + elog(ERROR, "%s: cannot do sharedsort copy due to bad input", __FUNCTION__); + + shared1->currentWorker = shared2->currentWorker; + shared1->mutex = shared2->mutex; + shared1->nTapes = shared2->nTapes; + shared1->fileset = shared2->fileset; + shared1->workersFinished = shared2->workersFinished; + memcpy(shared1->tapes, shared2->tapes, sizeof(TapeShare) * shared2->nTapes); +} + +void +tuplesort_copy_sharedsort2(Sharedsort *shared1, Tuplesortstate *state) +{ + if (!shared1 || !state) + elog(ERROR, "%s: cannot do sharedsort copy due to bad input", __FUNCTION__); + + shared1->currentWorker = state->shared->currentWorker; + shared1->mutex = state->shared->mutex; + shared1->nTapes = state->shared->nTapes; + shared1->fileset = state->shared->fileset; + shared1->workersFinished = state->shared->workersFinished; + memcpy(shared1->tapes, state->shared->tapes, sizeof(TapeShare) * state->shared->nTapes); +} + +int +tuplesort_get_curr_workers(Sharedsort *shared) +{ + return shared->currentWorker; +} + +void tuplesort_register_cleanup_callback(Sharedsort *shared, dsm_segment *seg) +{ + SharedFileSetRegisterCleanupCallback(&shared->fileset, seg); +} + /* * worker_get_identifier - Assign and return ordinal identifier for worker * diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 1d3ce246c9..1593357d5d 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -47,6 +47,8 @@ extern bool CheckIndexCompatible(Oid oldId, extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); extern Oid ResolveOpClass(List *opclass, Oid attrType, const char *accessMethodName, Oid accessMethodId); +extern bool PopulateGlobalSpool(Relation ixsRel, Relation heapRel, IndexStmt *stmt); +extern void ChangeRelKind(Relation idxRel, char kind); /* commands/functioncmds.c */ extern ObjectAddress CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 01b1727fc0..c2c8039d3d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -197,6 +197,11 @@ typedef struct IndexInfo int ii_ParallelWorkers; Oid ii_Am; void *ii_AmCache; + bool ii_Global_index; /* true if index is global */ + int ii_GlobalIndexPart; /* partition number indication */ + bool ii_BuildGlobalSpool; /* indicate to build global spool only */ + int ii_Nparts; /* num partitions for global index build in + * parallel */ MemoryContext ii_Context; } IndexInfo; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 880dd6011a..8febb506f0 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2992,6 +2992,10 @@ typedef struct IndexStmt bool reset_default_tblspc; /* reset default_tablespace prior to * executing */ bool global_index; /* true if index is global */ + int globalIndexPart; /* partition number indication */ + int nparts; /* num partitions for global index build in + * parallel */ + } IndexStmt; /* ---------------------- diff --git a/src/include/storage/sharedfileset.h b/src/include/storage/sharedfileset.h index b1cde36d0b..50580d5140 100644 --- a/src/include/storage/sharedfileset.h +++ b/src/include/storage/sharedfileset.h @@ -33,5 +33,6 @@ typedef struct SharedFileSet extern void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg); extern void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg); extern void SharedFileSetDeleteAll(SharedFileSet *fileset); +extern void SharedFileSetRegisterCleanupCallback(SharedFileSet *fileset, dsm_segment *seg); #endif diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 15f2a4a795..ccc389c1f4 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -247,6 +247,16 @@ typedef struct elog(ERROR, "unexpected end of data"); \ } while(0) +extern void tuplesort_mark_global_sort(Tuplesortstate *state); + +extern void tuplesort_copy_sharedsort(Sharedsort *shared1, Sharedsort *shared2); + +extern void tuplesort_copy_sharedsort2(Sharedsort *shared1, Tuplesortstate *state); + +extern int tuplesort_get_curr_workers(Sharedsort *shared); + +extern void tuplesort_register_cleanup_callback(Sharedsort *shared, dsm_segment *seg); + /* * We provide multiple interfaces to what is essentially the same code, * since different callers have different data to be sorted and want to diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index eed20063c1..30a3ce0f20 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -1461,3 +1461,15 @@ Partitions: gidxpart1_b_idx, drop index gidx_u; drop table gidxpart; +-- Test the cross-partition uniqueness with non-partition key with global unique index +create table gidxpart (a int, b int, c text) partition by range (a); +create table gidxpart1 partition of gidxpart for values from (0) to (100000); +create table gidxpart2 partition of gidxpart for values from (100000) to (199999); +insert into gidxpart (a, b, c) values (42, 572814, 'inserted first on gidxpart1'); +insert into gidxpart (a, b, c) values (150000, 572814, 'inserted second on gidxpart2'); +create unique index on gidxpart (b) global; -- should fail +ERROR: could not create unique index "gidxpart1_b_idx" +DETAIL: Key (b)=(572814) is duplicated. +delete from gidxpart where a = 150000 and b = 572814; +create unique index on gidxpart (b) global; +drop table gidxpart; diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 2169f28e69..84dde4df93 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -771,3 +771,15 @@ select relname, relhasindex, relkind from pg_class where relname like '%gidx%' o \d+ gidx_u drop index gidx_u; drop table gidxpart; + +-- Test the cross-partition uniqueness with non-partition key with global unique index +create table gidxpart (a int, b int, c text) partition by range (a); +create table gidxpart1 partition of gidxpart for values from (0) to (100000); +create table gidxpart2 partition of gidxpart for values from (100000) to (199999); +insert into gidxpart (a, b, c) values (42, 572814, 'inserted first on gidxpart1'); +insert into gidxpart (a, b, c) values (150000, 572814, 'inserted second on gidxpart2'); +create unique index on gidxpart (b) global; -- should fail +delete from gidxpart where a = 150000 and b = 572814; +create unique index on gidxpart (b) global; +drop table gidxpart; + -- 2.17.1