WIP: Page space reservation (pgupgrade) - Mailing list pgsql-hackers
From | Zdenek Kotala |
---|---|
Subject | WIP: Page space reservation (pgupgrade) |
Date | |
Msg-id | 491619D2.2090602@sun.com Whole thread Raw |
Responses |
Re: WIP: Page space reservation (pgupgrade)
|
List | pgsql-hackers |
Attached patch allows to setup storage parameter for space reservation. I use reloptions capability for it. You can use it: CREATE TABLE test(id int) with (reservedspace=10); The idea is to reduce freespace value about reservedspace on places where PageGet(Heap)FreeSpace is called. I need perform more tests on this patch, however I need feedback if it is reasonable way. It seems to me that the patch could be backported without any problem. I have still following doubts/questions: 1) GiST - gist uses gistnospace() function to check correct amount of space. unfortunately, it does not have information about Relation. The function is called from: gistContinueInsert(), gistplacetopage(), and gistVacuumUpdate(). It seems to me that better that modify this function should be modified these callers (exclude gistContinueInsert see 2) 2) WAL - I do not modify freespace during WAL replay. I think that when reservedspace is set, then WAL record cannot break a space reservation. 3) vacuum - PageGetFreeSpaceWithFillFactor It look likes that vacuum uses fill factor to check possible free space on page, but it does not try to free space on page to satisfy fill factor criteria. Is it correct or I'm wrong? Thanks for your comments. Zdenek -- Zdenek Kotala Sun Microsystems Prague, Czech Republic http://sun.com/postgresql diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/common/reloptions.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/common/reloptions.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/common/reloptions.c 2008-11-08 23:19:47.795930570 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/common/reloptions.c 2008-11-08 23:19:47.910535376 +0100 *************** *** 286,330 **** default_reloptions(Datum reloptions, bool validate, int minFillfactor, int defaultFillfactor) { ! static const char *const default_keywords[1] = {"fillfactor"}; ! char *values[1]; ! int fillfactor; StdRdOptions *result; ! parseRelOptions(reloptions, 1, default_keywords, values, validate); /* * If no options, we can just return NULL rather than doing anything. * (defaultFillfactor is thus not used, but we require callers to pass it * anyway since we would need it if more options were added.) */ ! if (values[0] == NULL) return NULL; ! if (!parse_int(values[0], &fillfactor, 0, NULL)) { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("fillfactor must be an integer: \"%s\"", ! values[0]))); ! return NULL; } ! if (fillfactor < minFillfactor || fillfactor > 100) { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("fillfactor=%d is out of range (should be between %d and 100)", ! fillfactor, minFillfactor))); ! return NULL; } result = (StdRdOptions *) palloc(sizeof(StdRdOptions)); SET_VARSIZE(result, sizeof(StdRdOptions)); result->fillfactor = fillfactor; return (bytea *) result; } --- 286,360 ---- default_reloptions(Datum reloptions, bool validate, int minFillfactor, int defaultFillfactor) { ! static const char *const default_keywords[2] = {"fillfactor","reservedspace"}; ! char *values[2]; ! int fillfactor=defaultFillfactor; ! int reservedspace=0; StdRdOptions *result; ! parseRelOptions(reloptions, 2, default_keywords, values, validate); /* * If no options, we can just return NULL rather than doing anything. * (defaultFillfactor is thus not used, but we require callers to pass it * anyway since we would need it if more options were added.) */ ! if ((values[0] == NULL) && (values[1] == NULL)) return NULL; ! /* fill factor */ ! if (values[0] != NULL) { ! if (!parse_int(values[0], &fillfactor, 0, NULL)) ! { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("fillfactor must be an integer: \"%s\"", ! values[0]))); ! return NULL; ! } ! ! if (fillfactor < minFillfactor || fillfactor > 100) ! { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("fillfactor=%d is out of range (should be between %d and 100)", ! fillfactor, minFillfactor))); ! return NULL; ! } } ! /* reserved space */ ! if (values[1] != NULL) { ! if (!parse_int(values[1], &reservedspace, 0, NULL)) ! { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("reservedspace must be an integer: \"%s\"", ! values[1]))); ! return NULL; ! } ! ! if (reservedspace < 0 || reservedspace > BLCKSZ/4) ! { ! if (validate) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("reservedspace=%d is out of range (should be between 0 and %d)", ! reservedspace, BLCKSZ/4))); ! return NULL; ! } } result = (StdRdOptions *) palloc(sizeof(StdRdOptions)); SET_VARSIZE(result, sizeof(StdRdOptions)); result->fillfactor = fillfactor; + result->reservedspace = reservedspace; return (bytea *) result; } diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/gin/ginentrypage.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/gin/ginentrypage.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/gin/ginentrypage.c 2008-11-08 23:19:47.799408407 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/gin/ginentrypage.c 2008-11-08 23:19:47.911560880 +0100 *************** *** 314,320 **** itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } ! if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData)) return true; return false; --- 314,321 ---- itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } ! if (PageGetFreeSpace(page) - RelationGetReservedSpace(btree->index) + itupsz ! >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData)) return true; return false; diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashinsert.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashinsert.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashinsert.c 2008-11-08 23:19:47.802614678 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashinsert.c 2008-11-08 23:19:47.912878140 +0100 *************** *** 106,112 **** Assert(pageopaque->hasho_bucket == bucket); /* Do the insertion */ ! while (PageGetFreeSpace(page) < itemsz) { /* * no space on this page; check for an overflow page --- 106,112 ---- Assert(pageopaque->hasho_bucket == bucket); /* Do the insertion */ ! while (PageGetFreeSpace(page)-RelationGetReservedSpace(rel) < itemsz) { /* * no space on this page; check for an overflow page *************** *** 138,144 **** page = BufferGetPage(buf); /* should fit now, given test above */ ! Assert(PageGetFreeSpace(page) >= itemsz); } pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE); --- 138,144 ---- page = BufferGetPage(buf); /* should fit now, given test above */ ! Assert(PageGetFreeSpace(page)-RelationGetReservedSpace(rel) >= itemsz); } pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashovfl.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashovfl.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashovfl.c 2008-11-08 23:19:47.810863015 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashovfl.c 2008-11-08 23:19:47.915444493 +0100 *************** *** 656,662 **** * Walk up the bucket chain, looking for a page big enough for * this item. Exit if we reach the read page. */ ! while (PageGetFreeSpace(wpage) < itemsz) { Assert(!PageIsEmpty(wpage)); --- 656,662 ---- * Walk up the bucket chain, looking for a page big enough for * this item. Exit if we reach the read page. */ ! while (PageGetFreeSpace(wpage)-RelationGetReservedSpace(rel) < itemsz) { Assert(!PageIsEmpty(wpage)); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashpage.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashpage.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/hash/hashpage.c 2008-11-08 23:19:47.821028802 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/hash/hashpage.c 2008-11-08 23:19:47.918694933 +0100 *************** *** 856,862 **** itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz); ! if (PageGetFreeSpace(npage) < itemsz) { /* write out nbuf and drop lock, but keep pin */ _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK); --- 856,862 ---- itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz); ! if (PageGetFreeSpace(npage)-RelationGetReservedSpace(rel) < itemsz) { /* write out nbuf and drop lock, but keep pin */ _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/heap/heapam.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/heapam.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/heap/heapam.c 2008-11-08 23:19:47.868737614 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/heapam.c 2008-11-08 23:19:47.930477950 +0100 *************** *** 2593,2599 **** HeapTupleHasExternal(newtup) || newtup->t_len > TOAST_TUPLE_THRESHOLD); ! pagefree = PageGetHeapFreeSpace(page); newtupsize = MAXALIGN(newtup->t_len); --- 2593,2599 ---- HeapTupleHasExternal(newtup) || newtup->t_len > TOAST_TUPLE_THRESHOLD); ! pagefree = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation); newtupsize = MAXALIGN(newtup->t_len); *************** *** 2658,2664 **** /* Re-acquire the lock on the old tuple's page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* Re-check using the up-to-date free space */ ! pagefree = PageGetHeapFreeSpace(page); if (newtupsize > pagefree) { /* --- 2658,2664 ---- /* Re-acquire the lock on the old tuple's page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* Re-check using the up-to-date free space */ ! pagefree = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation); if (newtupsize > pagefree) { /* diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/heap/hio.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/hio.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/heap/hio.c 2008-11-08 23:19:47.874146098 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/hio.c 2008-11-08 23:19:47.932167154 +0100 *************** *** 269,275 **** * we're done. */ page = BufferGetPage(buffer); ! pageFreeSpace = PageGetHeapFreeSpace(page); if (len + saveFreeSpace <= pageFreeSpace) { /* use this page as future insert target, too */ --- 269,275 ---- * we're done. */ page = BufferGetPage(buffer); ! pageFreeSpace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation); if (len + saveFreeSpace <= pageFreeSpace) { /* use this page as future insert target, too */ *************** *** 362,368 **** PageInit(page, BufferGetPageSize(buffer), 0); ! if (len > PageGetHeapFreeSpace(page)) { /* We should not get here given the test at the top */ elog(PANIC, "tuple is too big: size %lu", (unsigned long) len); --- 362,368 ---- PageInit(page, BufferGetPageSize(buffer), 0); ! if (len > PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation)) { /* We should not get here given the test at the top */ elog(PANIC, "tuple is too big: size %lu", (unsigned long) len); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/heap/pruneheap.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/pruneheap.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/heap/pruneheap.c 2008-11-08 23:19:47.877897804 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/pruneheap.c 2008-11-08 23:19:47.933351529 +0100 *************** *** 100,106 **** HEAP_DEFAULT_FILLFACTOR); minfree = Max(minfree, BLCKSZ / 10); ! if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) { /* OK, try to get exclusive buffer lock */ if (!ConditionalLockBufferForCleanup(buffer)) --- 100,106 ---- HEAP_DEFAULT_FILLFACTOR); minfree = Max(minfree, BLCKSZ / 10); ! if (PageIsFull(page) || PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation) < minfree) { /* OK, try to get exclusive buffer lock */ if (!ConditionalLockBufferForCleanup(buffer)) *************** *** 112,118 **** * prune. (We needn't recheck PageIsPrunable, since no one else could * have pruned while we hold pin.) */ ! if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) { /* OK to prune (though not to remove redirects) */ (void) heap_page_prune(relation, buffer, OldestXmin, false, true); --- 112,118 ---- * prune. (We needn't recheck PageIsPrunable, since no one else could * have pruned while we hold pin.) */ ! if (PageIsFull(page) || PageGetHeapFreeSpace(page)-RelationGetReservedSpace(relation) < minfree) { /* OK to prune (though not to remove redirects) */ (void) heap_page_prune(relation, buffer, OldestXmin, false, true); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/heap/rewriteheap.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/rewriteheap.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/heap/rewriteheap.c 2008-11-08 23:19:47.881338490 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/heap/rewriteheap.c 2008-11-08 23:19:47.934390304 +0100 *************** *** 600,606 **** /* Now we can check to see if there's enough free space already. */ if (state->rs_buffer_valid) { ! pageFreeSpace = PageGetHeapFreeSpace(page); if (len + saveFreeSpace > pageFreeSpace) { --- 600,606 ---- /* Now we can check to see if there's enough free space already. */ if (state->rs_buffer_valid) { ! pageFreeSpace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(state->rs_new_rel); if (len + saveFreeSpace > pageFreeSpace) { diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/nbtree/nbtinsert.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/nbtree/nbtinsert.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/nbtree/nbtinsert.c 2008-11-08 23:19:47.891455345 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/nbtree/nbtinsert.c 2008-11-08 23:19:47.938184262 +0100 *************** *** 445,451 **** */ movedright = false; vacuumed = false; ! while (PageGetFreeSpace(page) < itemsz) { Buffer rbuf; --- 445,451 ---- */ movedright = false; vacuumed = false; ! while (PageGetFreeSpace(page)-RelationGetReservedSpace(rel) < itemsz) { Buffer rbuf; *************** *** 463,469 **** */ vacuumed = true; ! if (PageGetFreeSpace(page) >= itemsz) break; /* OK, now we have enough space */ } --- 463,469 ---- */ vacuumed = true; ! if (PageGetFreeSpace(page)-RelationGetReservedSpace(rel) >= itemsz) break; /* OK, now we have enough space */ } *************** *** 579,585 **** * so this comparison is correct even though we appear to be accounting * only for the item and not for its line pointer. */ ! if (PageGetFreeSpace(page) < itemsz) { bool is_root = P_ISROOT(lpageop); bool is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(lpageop); --- 579,585 ---- * so this comparison is correct even though we appear to be accounting * only for the item and not for its line pointer. */ ! if (PageGetFreeSpace(page)-RelationGetReservedSpace(rel) < itemsz) { bool is_root = P_ISROOT(lpageop); bool is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(lpageop); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/access/nbtree/nbtsort.c pgsql_spacereserve.c901d4bb8cca/src/backend/access/nbtree/nbtsort.c *** pgsql_spacereserve.9e46c188067f/src/backend/access/nbtree/nbtsort.c 2008-11-08 23:19:47.894508546 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/access/nbtree/nbtsort.c 2008-11-08 23:19:47.941241651 +0100 *************** *** 462,468 **** nblkno = state->btps_blkno; last_off = state->btps_lastoff; ! pgspc = PageGetFreeSpace(npage); itupsz = IndexTupleDSize(*itup); itupsz = MAXALIGN(itupsz); --- 462,468 ---- nblkno = state->btps_blkno; last_off = state->btps_lastoff; ! pgspc = PageGetFreeSpace(npage)-RelationGetReservedSpace(wstate->index); itupsz = IndexTupleDSize(*itup); itupsz = MAXALIGN(itupsz); diff -Nrc pgsql_spacereserve.9e46c188067f/src/backend/commands/vacuumlazy.c pgsql_spacereserve.c901d4bb8cca/src/backend/commands/vacuumlazy.c *** pgsql_spacereserve.9e46c188067f/src/backend/commands/vacuumlazy.c 2008-11-08 23:19:47.898479299 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/backend/commands/vacuumlazy.c 2008-11-08 23:19:47.945127252 +0100 *************** *** 343,349 **** PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; } ! freespace = PageGetHeapFreeSpace(page); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); --- 343,349 ---- PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; } ! freespace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(onerel); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); *************** *** 354,360 **** if (PageIsEmpty(page)) { empty_pages++; ! freespace = PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; --- 354,360 ---- if (PageIsEmpty(page)) { empty_pages++; ! freespace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(onerel); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; *************** *** 524,530 **** vacuumed_pages++; } ! freespace = PageGetHeapFreeSpace(page); /* Remember the location of the last page with nonremovable tuples */ if (hastup) --- 524,530 ---- vacuumed_pages++; } ! freespace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(onerel); /* Remember the location of the last page with nonremovable tuples */ if (hastup) *************** *** 626,632 **** /* Now that we've compacted the page, record its available space */ page = BufferGetPage(buf); ! freespace = PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, tblk, freespace); --- 626,632 ---- /* Now that we've compacted the page, record its available space */ page = BufferGetPage(buf); ! freespace = PageGetHeapFreeSpace(page)-RelationGetReservedSpace(onerel); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, tblk, freespace); diff -Nrc pgsql_spacereserve.9e46c188067f/src/include/utils/rel.h pgsql_spacereserve.c901d4bb8cca/src/include/utils/rel.h *** pgsql_spacereserve.9e46c188067f/src/include/utils/rel.h 2008-11-08 23:19:47.900403231 +0100 --- pgsql_spacereserve.c901d4bb8cca/src/include/utils/rel.h 2008-11-08 23:19:47.946913616 +0100 *************** *** 214,219 **** --- 214,220 ---- { int32 vl_len_; /* varlena header (do not touch directly!) */ int fillfactor; /* page fill factor in percent (0..100) */ + int reservedspace; /* page reserved space for in-place upgrade in bytes */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 *************** *** 228,233 **** --- 229,242 ---- ((StdRdOptions *) (relation)->rd_options)->fillfactor : (defaultff)) /* + * RelationGetReservedSpace + * Returns the relation's reserved space. + */ + #define RelationGetReservedSpace(relation) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->reservedspace : 0) + + /* * RelationGetTargetPageUsage * Returns the relation's desired space usage per page in bytes. */
pgsql-hackers by date: