Thread: Re: [HACKERS] pg_freespacemap question
Tom Lane wrote: > Mark Kirkwood <markir@paradise.net.nz> writes: > >>>Good points! I had not noticed this test case. Probably NULL is better > > >>Would setting it to 'BLCKSZ - (fixed index header stuff)' be better, > > > No, I don't think so, because that will just make it harder to recognize > what's what (remember that BLCKSZ isn't really a constant, and the index > overhead is not the same for all AMs either). The point here is that > for indexes the FSM tracks whole-page availability, not the amount of > free space within pages. So I think NULL is a reasonable representation > of that. Using NULL will make it easy to filter the results if you want > to see only heap-page data or only index-page data, whereas it will be > very hard to do that if the view adopts an ultimately-artificial > convention about the amount of available space on an index page. > Right - after suggesting it I realized that coding the different index overhead for each possible AM would have been ... difficult :-). A patch is attached to implement the NULL free bytes and other recommendations: 1/ Index free bytes set to NULL 2/ Comment added to the README briefly mentioning the index business 3/ Columns reordered more logically 4/ 'Blockid' column removed 5/ Free bytes column renamed to just 'bytes' instead of 'blockfreebytes' Now 5/ was only hinted at, but seemed worth doing while I was there (hopefully I haven't made it too terse now....). cheers Mark Index: pg_freespacemap.c =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.c,v retrieving revision 1.2 diff -c -r1.2 pg_freespacemap.c *** pg_freespacemap.c 14 Feb 2006 15:03:59 -0000 1.2 --- pg_freespacemap.c 9 Mar 2006 03:38:10 -0000 *************** *** 12,18 **** #include "storage/freespace.h" #include "utils/relcache.h" ! #define NUM_FREESPACE_PAGES_ELEM 6 #if defined(WIN32) || defined(__CYGWIN__) /* Need DLLIMPORT for some things that are not so marked in main headers */ --- 12,18 ---- #include "storage/freespace.h" #include "utils/relcache.h" ! #define NUM_FREESPACE_PAGES_ELEM 5 #if defined(WIN32) || defined(__CYGWIN__) /* Need DLLIMPORT for some things that are not so marked in main headers */ *************** *** 29,40 **** typedef struct { - uint32 blockid; - uint32 relfilenode; uint32 reltablespace; uint32 reldatabase; uint32 relblocknumber; ! uint32 blockfreebytes; } FreeSpacePagesRec; --- 29,40 ---- typedef struct { uint32 reltablespace; uint32 reldatabase; + uint32 relfilenode; uint32 relblocknumber; ! uint32 bytes; ! bool isindex; } FreeSpacePagesRec; *************** *** 91,107 **** /* Construct a tuple to return. */ tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false); ! TupleDescInitEntry(tupledesc, (AttrNumber) 1, "blockid", ! INT4OID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relblocknumber", INT8OID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 6, "blockfreebytes", INT4OID, -1, 0); /* Generate attribute metadata needed later to produce tuples */ --- 91,105 ---- /* Construct a tuple to return. */ tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false); ! TupleDescInitEntry(tupledesc, (AttrNumber) 1, "reltablespace", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reldatabase", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 4, "relblocknumber", INT8OID, -1, 0); ! TupleDescInitEntry(tupledesc, (AttrNumber) 5, "bytes", INT4OID, -1, 0); /* Generate attribute metadata needed later to produce tuples */ *************** *** 129,135 **** fctx->values[2] = (char *) palloc(3 * sizeof(uint32) + 1); fctx->values[3] = (char *) palloc(3 * sizeof(uint32) + 1); fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1); - fctx->values[5] = (char *) palloc(3 * sizeof(uint32) + 1); /* Return to original context when allocating transient memory */ --- 127,132 ---- *************** *** 158,169 **** for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { - fctx->record[i].blockid = i; - fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page); ! fctx->record[i].blockfreebytes = 0; /* index.*/ page++; i++; --- 155,166 ---- for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; + fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page); ! fctx->record[i].bytes = 0; ! fctx->record[i].isindex = true; page++; i++; *************** *** 178,189 **** for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { - fctx->record[i].blockid = i; - fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; fctx->record[i].relblocknumber = FSMPageGetPageNum(page); ! fctx->record[i].blockfreebytes = FSMPageGetSpace(page); page++; i++; --- 175,186 ---- for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; + fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].relblocknumber = FSMPageGetPageNum(page); ! fctx->record[i].bytes = FSMPageGetSpace(page); ! fctx->record[i].isindex = false; page++; i++; *************** *** 209,227 **** if (funcctx->call_cntr < funcctx->max_calls) { uint32 i = funcctx->call_cntr; - sprintf(fctx->values[0], "%u", fctx->record[i].blockid); - sprintf(fctx->values[1], "%u", fctx->record[i].relfilenode); - sprintf(fctx->values[2], "%u", fctx->record[i].reltablespace); - sprintf(fctx->values[3], "%u", fctx->record[i].reldatabase); - sprintf(fctx->values[4], "%u", fctx->record[i].relblocknumber); - sprintf(fctx->values[5], "%u", fctx->record[i].blockfreebytes); /* Build and return the tuple. */ ! tuple = BuildTupleFromCStrings(funcctx->attinmeta, fctx->values); result = HeapTupleGetDatum(tuple); --- 206,246 ---- if (funcctx->call_cntr < funcctx->max_calls) { uint32 i = funcctx->call_cntr; + char *values[NUM_FREESPACE_PAGES_ELEM]; + int j; + /* + * Use a temporary values array, initially pointing to fctx->values, + * so it can be reassigned w/o losing the storage for subsequent + * calls. + */ + for (j = 0; j < NUM_FREESPACE_PAGES_ELEM; j++) + { + values[j] = fctx->values[j]; + } + + + sprintf(values[0], "%u", fctx->record[i].reltablespace); + sprintf(values[1], "%u", fctx->record[i].reldatabase); + sprintf(values[2], "%u", fctx->record[i].relfilenode); + sprintf(values[3], "%u", fctx->record[i].relblocknumber); + /* + * Set (free) bytes to NULL for an index relation. + */ + if (fctx->record[i].isindex == true) + { + values[4] = NULL; + } + else + { + sprintf(values[4], "%u", fctx->record[i].bytes); + } /* Build and return the tuple. */ ! tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); result = HeapTupleGetDatum(tuple); Index: pg_freespacemap.sql.in =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.sql.in,v retrieving revision 1.2 diff -c -r1.2 pg_freespacemap.sql.in *** pg_freespacemap.sql.in 27 Feb 2006 16:09:48 -0000 1.2 --- pg_freespacemap.sql.in 9 Mar 2006 03:42:15 -0000 *************** *** 11,17 **** -- Create a view for convenient access. CREATE VIEW pg_freespacemap AS SELECT P.* FROM pg_freespacemap() AS P ! (blockid int4, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber int8, blockfreebytes int4); -- Don't want these to be available at public. REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC; --- 11,17 ---- -- Create a view for convenient access. CREATE VIEW pg_freespacemap AS SELECT P.* FROM pg_freespacemap() AS P ! (reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber int8, bytes int4); -- Don't want these to be available at public. REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC; Index: README.pg_freespacemap =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/README.pg_freespacemap,v retrieving revision 1.1 diff -c -r1.1 README.pg_freespacemap *** README.pg_freespacemap 12 Feb 2006 03:55:53 -0000 1.1 --- README.pg_freespacemap 9 Mar 2006 03:43:16 -0000 *************** *** 34,45 **** Column | references | Description ----------------+----------------------+------------------------------------ - blockid | | Id, 1.. max_fsm_pages - relfilenode | pg_class.relfilenode | Refilenode of the relation. reltablespace | pg_tablespace.oid | Tablespace oid of the relation. reldatabase | pg_database.oid | Database for the relation. relblocknumber | | Offset of the page in the relation. ! blockfreebytes | | Free bytes in the block/page. There is one row for each page in the free space map. --- 34,45 ---- Column | references | Description ----------------+----------------------+------------------------------------ reltablespace | pg_tablespace.oid | Tablespace oid of the relation. reldatabase | pg_database.oid | Database for the relation. + relfilenode | pg_class.relfilenode | Refilenode of the relation. relblocknumber | | Offset of the page in the relation. ! bytes | | Free bytes in the block/page, or NULL ! | | for an index page (see below). There is one row for each page in the free space map. *************** *** 47,52 **** --- 47,55 ---- Because the map is shared by all the databases, there are pages from relations not belonging to the current database. + The free space map can contain pages for btree indexes if they were emptied + by a vacuum process. The bytes field is set to NULL in this case. + When the pg_freespacemap view is accessed, internal free space map locks are taken, and a copy of the map data is made for the view to display. This ensures that the view produces a consistent set of results, while not *************** *** 58,91 **** ------------- regression=# \d pg_freespacemap ! View "public.pg_freespacemap" Column | Type | Modifiers ! ---------------+---------+----------- ! blockid | integer | ! relfilenode | oid | reltablespace | oid | reldatabase | oid | relblocknumber | bigint | ! blockfreebytes | integer | View definition: ! SELECT p.blockid, p.relfilenode, p.reltablespace, p.reldatabase, p.relblocknumber, p.blockfreebytes ! FROM pg_freespacemap() p(blockid integer, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber bigint,blockfreebytes integer); ! regression=# SELECT c.relname, m.relblocknumber, m.blockfreebytes FROM pg_freespacemap m INNER JOIN pg_class c ON c.relfilenode = m.relfilenode LIMIT 10; ! relname | relblocknumber | blockfreebytes ! ------------------------+----------------+---------------- ! sql_features | 5 | 2696 ! sql_implementation_info | 0 | 7104 ! sql_languages | 0 | 8016 ! sql_packages | 0 | 7376 ! sql_sizing | 0 | 6032 ! pg_authid | 0 | 7424 ! pg_toast_2618 | 13 | 4588 ! pg_toast_2618 | 12 | 1680 ! pg_toast_2618 | 10 | 1436 ! pg_toast_2618 | 7 | 1136 (10 rows) regression=# --- 61,93 ---- ------------- regression=# \d pg_freespacemap ! View "public.pg_freespacemap" Column | Type | Modifiers ! ----------------+---------+----------- reltablespace | oid | reldatabase | oid | + relfilenode | oid | relblocknumber | bigint | ! bytes | integer | View definition: ! SELECT p.reltablespace, p.reldatabase, p.relfilenode, p.relblocknumber, p.bytes ! FROM pg_freespacemap() p(reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber bigint, bytes integer); ! regression=# SELECT c.relname, m.relblocknumber, m.bytes FROM pg_freespacemap m INNER JOIN pg_class c ON c.relfilenode = m.relfilenode LIMIT 10; ! relname | relblocknumber | bytes ! ------------------------+----------------+-------- ! sql_features | 5 | 2696 ! sql_implementation_info | 0 | 7104 ! sql_languages | 0 | 8016 ! sql_packages | 0 | 7376 ! sql_sizing | 0 | 6032 ! pg_authid | 0 | 7424 ! pg_toast_2618 | 13 | 4588 ! pg_toast_2618 | 12 | 1680 ! pg_toast_2618 | 10 | 1436 ! pg_toast_2618 | 7 | 1136 (10 rows) regression=#
Mark, I have tried your patches and it worked great. Thanks. -- Tatsuo Ishii SRA OSS, Inc. Japan > Tom Lane wrote: > > Mark Kirkwood <markir@paradise.net.nz> writes: > > > >>>Good points! I had not noticed this test case. Probably NULL is better > > > > > >>Would setting it to 'BLCKSZ - (fixed index header stuff)' be better, > > > > > > No, I don't think so, because that will just make it harder to recognize > > what's what (remember that BLCKSZ isn't really a constant, and the index > > overhead is not the same for all AMs either). The point here is that > > for indexes the FSM tracks whole-page availability, not the amount of > > free space within pages. So I think NULL is a reasonable representation > > of that. Using NULL will make it easy to filter the results if you want > > to see only heap-page data or only index-page data, whereas it will be > > very hard to do that if the view adopts an ultimately-artificial > > convention about the amount of available space on an index page. > > > > Right - after suggesting it I realized that coding the different index > overhead for each possible AM would have been ... difficult :-). A patch > is attached to implement the NULL free bytes and other recommendations: > > 1/ Index free bytes set to NULL > 2/ Comment added to the README briefly mentioning the index business > 3/ Columns reordered more logically > 4/ 'Blockid' column removed > 5/ Free bytes column renamed to just 'bytes' instead of 'blockfreebytes' > > Now 5/ was only hinted at, but seemed worth doing while I was there > (hopefully I haven't made it too terse now....). > > cheers > > Mark >
BTW, I noticed difference of outputs from pg_freespacemap and pgstattuple. I ran pgbench and inspected "accounts" table by using these tools. pg_freespacemap: sum of bytes: 250712 pgstattuple: free_space: 354880 Shouldn't they be identical? -- Tatsuo Ishii SRA OSS, Inc. Japan
Tatsuo Ishii wrote: > BTW, I noticed difference of outputs from pg_freespacemap and > pgstattuple. > > I ran pgbench and inspected "accounts" table by using these tools. > > pg_freespacemap: > sum of bytes: 250712 > > pgstattuple: > free_space: 354880 > > Shouldn't they be identical? I would have thought so - unless there are not enough pages left in the FSM... pg_freespacemap is reporting on what gets into the FSM - so provided I haven't put a bug in there somewhere (!) - we need to look at how VACUUM reports free space to the FSM.... cheers Mark
> Tatsuo Ishii wrote: >> BTW, I noticed difference of outputs from pg_freespacemap and >> pgstattuple. >> >> I ran pgbench and inspected "accounts" table by using these tools. >> >> pg_freespacemap: >> sum of bytes: 250712 >> >> pgstattuple: >> free_space: 354880 >> >> Shouldn't they be identical? No, because (a) pgbench vacuums at the start of the run not the end, and (b) vacuum/fsm disregard pages with "uselessly small" amounts of free space (less than the average tuple size, IIRC). I do notice a rather serious shortcoming of pg_freespacemap in its current incarnation, which is that it *only* shows you the per-page free space data, and not any of the information that would let you determine what the FSM is doing to filter the raw data. The per-relation avgRequest and lastPageCount fields would be interesting for instance. Perhaps there should be a second view with one row per relation to carry the appropriate data. regards, tom lane
Tom Lane wrote: >>Tatsuo Ishii wrote: >> >>>BTW, I noticed difference of outputs from pg_freespacemap and >>>pgstattuple. >>> >>>I ran pgbench and inspected "accounts" table by using these tools. >>> >>>pg_freespacemap: >>>sum of bytes: 250712 >>> >>>pgstattuple: >>>free_space: 354880 >>> >>>Shouldn't they be identical? > > > vacuum/fsm disregard pages with "uselessly small" amounts of > free space (less than the average tuple size, IIRC). Ah - that what I was seeing! Thanks. > > I do notice a rather serious shortcoming of pg_freespacemap in its > current incarnation, which is that it *only* shows you the per-page free > space data, and not any of the information that would let you determine > what the FSM is doing to filter the raw data. The per-relation > avgRequest and lastPageCount fields would be interesting for instance. > Perhaps there should be a second view with one row per relation to > carry the appropriate data. > Ok - I did wonder about 2 views, but was unsure if the per-relation stuff was interesting. Given that it looks like it is interesting, I'll see about getting a second view going. Cheers Mark
> > Tatsuo Ishii wrote: > >> BTW, I noticed difference of outputs from pg_freespacemap and > >> pgstattuple. > >> > >> I ran pgbench and inspected "accounts" table by using these tools. > >> > >> pg_freespacemap: > >> sum of bytes: 250712 > >> > >> pgstattuple: > >> free_space: 354880 > >> > >> Shouldn't they be identical? > > No, because (a) pgbench vacuums at the start of the run not the end, I ran VACUUM after pbench run and still got the differece. > and (b) vacuum/fsm disregard pages with "uselessly small" amounts of > free space (less than the average tuple size, IIRC). That sounds strange to me. Each record of accounts tables is actually exactly same, i.e fixed size. So it should be possible that UPDATE reuses any free spaces made by previous UPDATE. If FSM neglects those free spaces "because they are uselessly small", then the unrecycled pages are getting grow even if they are regulary VACUUMed, no? > I do notice a rather serious shortcoming of pg_freespacemap in its > current incarnation, which is that it *only* shows you the per-page free > space data, and not any of the information that would let you determine > what the FSM is doing to filter the raw data. The per-relation > avgRequest and lastPageCount fields would be interesting for instance. > Perhaps there should be a second view with one row per relation to > carry the appropriate data. -- Tatsuo Ishii SRA OSS, Inc. Japan
Tatsuo Ishii <ishii@sraoss.co.jp> writes: > That sounds strange to me. Each record of accounts tables is actually > exactly same, i.e fixed size. So it should be possible that UPDATE > reuses any free spaces made by previous UPDATE. If FSM neglects those > free spaces "because they are uselessly small", then the unrecycled > pages are getting grow even if they are regulary VACUUMed, no? The point here is that if tuples require 50 bytes, and there are 20 bytes free on a page, pgstattuple counts 20 free bytes while FSM ignores the page. Recording that space in the FSM will not improve matters, it'll just risk pushing out FSM records for pages that do have useful amounts of free space. regards, tom lane
> The point here is that if tuples require 50 bytes, and there are 20 > bytes free on a page, pgstattuple counts 20 free bytes while FSM > ignores the page. Recording that space in the FSM will not improve > matters, it'll just risk pushing out FSM records for pages that do > have useful amounts of free space. Maybe an overloaded pgstattuple function that allows you to request FSM behavior? Chris
Christopher Kings-Lynne wrote: >> The point here is that if tuples require 50 bytes, and there are 20 >> bytes free on a page, pgstattuple counts 20 free bytes while FSM >> ignores the page. Recording that space in the FSM will not improve >> matters, it'll just risk pushing out FSM records for pages that do >> have useful amounts of free space. > > > > Maybe an overloaded pgstattuple function that allows you to request FSM > behavior? > That's a nice idea - could also do equivalently by adding an extra column "usable_free_space" or some such, and calculating this using FSM logic. Cheers Mark
Mark Kirkwood wrote: > Christopher Kings-Lynne wrote: > >>The point here is that if tuples require 50 bytes, and there are 20 > >>bytes free on a page, pgstattuple counts 20 free bytes while FSM > >>ignores the page. Recording that space in the FSM will not improve > >>matters, it'll just risk pushing out FSM records for pages that do > >>have useful amounts of free space. > > > >Maybe an overloaded pgstattuple function that allows you to request FSM > >behavior? > > That's a nice idea - could also do equivalently by adding an extra > column "usable_free_space" or some such, and calculating this using FSM > logic. The current pgstattuple function scans the whole table, so I don't think this is a good idea. Re: the overloaded function, I think the behaviors are different enough to merit a separate function, with a different name. -- Alvaro Herrera http://www.CommandPrompt.com/ The PostgreSQL Company - Command Prompt, Inc.
Mark Kirkwood wrote: > Tom Lane wrote: > >> >> I do notice a rather serious shortcoming of pg_freespacemap in its >> current incarnation, which is that it *only* shows you the per-page free >> space data, and not any of the information that would let you determine >> what the FSM is doing to filter the raw data. The per-relation >> avgRequest and lastPageCount fields would be interesting for instance. >> Perhaps there should be a second view with one row per relation to >> carry the appropriate data. >> > > Ok - I did wonder about 2 views, but was unsure if the per-relation > stuff was interesting. Given that it looks like it is interesting, I'll > see about getting a second view going. > This patch implements the second view for FSM relations. I have renamed the functions and views to be: pg_freespacemap_relations pg_freespacemap_pages This patch depends on the previous one (which was called simply 'pg_freespacemap.patch'). Cheers Mark
Attachment
Patch applied. Thanks. --------------------------------------------------------------------------- Mark Kirkwood wrote: > Tom Lane wrote: > > Mark Kirkwood <markir@paradise.net.nz> writes: > > > >>>Good points! I had not noticed this test case. Probably NULL is better > > > > > >>Would setting it to 'BLCKSZ - (fixed index header stuff)' be better, > > > > > > No, I don't think so, because that will just make it harder to recognize > > what's what (remember that BLCKSZ isn't really a constant, and the index > > overhead is not the same for all AMs either). The point here is that > > for indexes the FSM tracks whole-page availability, not the amount of > > free space within pages. So I think NULL is a reasonable representation > > of that. Using NULL will make it easy to filter the results if you want > > to see only heap-page data or only index-page data, whereas it will be > > very hard to do that if the view adopts an ultimately-artificial > > convention about the amount of available space on an index page. > > > > Right - after suggesting it I realized that coding the different index > overhead for each possible AM would have been ... difficult :-). A patch > is attached to implement the NULL free bytes and other recommendations: > > 1/ Index free bytes set to NULL > 2/ Comment added to the README briefly mentioning the index business > 3/ Columns reordered more logically > 4/ 'Blockid' column removed > 5/ Free bytes column renamed to just 'bytes' instead of 'blockfreebytes' > > Now 5/ was only hinted at, but seemed worth doing while I was there > (hopefully I haven't made it too terse now....). > > cheers > > Mark > > > Index: pg_freespacemap.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.c,v > retrieving revision 1.2 > diff -c -r1.2 pg_freespacemap.c > *** pg_freespacemap.c 14 Feb 2006 15:03:59 -0000 1.2 > --- pg_freespacemap.c 9 Mar 2006 03:38:10 -0000 > *************** > *** 12,18 **** > #include "storage/freespace.h" > #include "utils/relcache.h" > > ! #define NUM_FREESPACE_PAGES_ELEM 6 > > #if defined(WIN32) || defined(__CYGWIN__) > /* Need DLLIMPORT for some things that are not so marked in main headers */ > --- 12,18 ---- > #include "storage/freespace.h" > #include "utils/relcache.h" > > ! #define NUM_FREESPACE_PAGES_ELEM 5 > > #if defined(WIN32) || defined(__CYGWIN__) > /* Need DLLIMPORT for some things that are not so marked in main headers */ > *************** > *** 29,40 **** > typedef struct > { > > - uint32 blockid; > - uint32 relfilenode; > uint32 reltablespace; > uint32 reldatabase; > uint32 relblocknumber; > ! uint32 blockfreebytes; > > } FreeSpacePagesRec; > > --- 29,40 ---- > typedef struct > { > > uint32 reltablespace; > uint32 reldatabase; > + uint32 relfilenode; > uint32 relblocknumber; > ! uint32 bytes; > ! bool isindex; > > } FreeSpacePagesRec; > > *************** > *** 91,107 **** > > /* Construct a tuple to return. */ > tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 1, "blockid", > ! INT4OID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relblocknumber", > INT8OID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 6, "blockfreebytes", > INT4OID, -1, 0); > > /* Generate attribute metadata needed later to produce tuples */ > --- 91,105 ---- > > /* Construct a tuple to return. */ > tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 1, "reltablespace", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reldatabase", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 3, "relfilenode", > OIDOID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 4, "relblocknumber", > INT8OID, -1, 0); > ! TupleDescInitEntry(tupledesc, (AttrNumber) 5, "bytes", > INT4OID, -1, 0); > > /* Generate attribute metadata needed later to produce tuples */ > *************** > *** 129,135 **** > fctx->values[2] = (char *) palloc(3 * sizeof(uint32) + 1); > fctx->values[3] = (char *) palloc(3 * sizeof(uint32) + 1); > fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1); > - fctx->values[5] = (char *) palloc(3 * sizeof(uint32) + 1); > > > /* Return to original context when allocating transient memory */ > --- 127,132 ---- > *************** > *** 158,169 **** > for (nPages = 0; nPages < fsmrel->storedPages; nPages++) > { > > - fctx->record[i].blockid = i; > - fctx->record[i].relfilenode = fsmrel->key.relNode; > fctx->record[i].reltablespace = fsmrel->key.spcNode; > fctx->record[i].reldatabase = fsmrel->key.dbNode; > fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page); > ! fctx->record[i].blockfreebytes = 0; /* index.*/ > > page++; > i++; > --- 155,166 ---- > for (nPages = 0; nPages < fsmrel->storedPages; nPages++) > { > > fctx->record[i].reltablespace = fsmrel->key.spcNode; > fctx->record[i].reldatabase = fsmrel->key.dbNode; > + fctx->record[i].relfilenode = fsmrel->key.relNode; > fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page); > ! fctx->record[i].bytes = 0; > ! fctx->record[i].isindex = true; > > page++; > i++; > *************** > *** 178,189 **** > > for (nPages = 0; nPages < fsmrel->storedPages; nPages++) > { > - fctx->record[i].blockid = i; > - fctx->record[i].relfilenode = fsmrel->key.relNode; > fctx->record[i].reltablespace = fsmrel->key.spcNode; > fctx->record[i].reldatabase = fsmrel->key.dbNode; > fctx->record[i].relblocknumber = FSMPageGetPageNum(page); > ! fctx->record[i].blockfreebytes = FSMPageGetSpace(page); > > page++; > i++; > --- 175,186 ---- > > for (nPages = 0; nPages < fsmrel->storedPages; nPages++) > { > fctx->record[i].reltablespace = fsmrel->key.spcNode; > fctx->record[i].reldatabase = fsmrel->key.dbNode; > + fctx->record[i].relfilenode = fsmrel->key.relNode; > fctx->record[i].relblocknumber = FSMPageGetPageNum(page); > ! fctx->record[i].bytes = FSMPageGetSpace(page); > ! fctx->record[i].isindex = false; > > page++; > i++; > *************** > *** 209,227 **** > if (funcctx->call_cntr < funcctx->max_calls) > { > uint32 i = funcctx->call_cntr; > > > - sprintf(fctx->values[0], "%u", fctx->record[i].blockid); > - sprintf(fctx->values[1], "%u", fctx->record[i].relfilenode); > - sprintf(fctx->values[2], "%u", fctx->record[i].reltablespace); > - sprintf(fctx->values[3], "%u", fctx->record[i].reldatabase); > - sprintf(fctx->values[4], "%u", fctx->record[i].relblocknumber); > - sprintf(fctx->values[5], "%u", fctx->record[i].blockfreebytes); > > > > /* Build and return the tuple. */ > ! tuple = BuildTupleFromCStrings(funcctx->attinmeta, fctx->values); > result = HeapTupleGetDatum(tuple); > > > --- 206,246 ---- > if (funcctx->call_cntr < funcctx->max_calls) > { > uint32 i = funcctx->call_cntr; > + char *values[NUM_FREESPACE_PAGES_ELEM]; > + int j; > > + /* > + * Use a temporary values array, initially pointing to fctx->values, > + * so it can be reassigned w/o losing the storage for subsequent > + * calls. > + */ > + for (j = 0; j < NUM_FREESPACE_PAGES_ELEM; j++) > + { > + values[j] = fctx->values[j]; > + } > + > + > + sprintf(values[0], "%u", fctx->record[i].reltablespace); > + sprintf(values[1], "%u", fctx->record[i].reldatabase); > + sprintf(values[2], "%u", fctx->record[i].relfilenode); > + sprintf(values[3], "%u", fctx->record[i].relblocknumber); > > > + /* > + * Set (free) bytes to NULL for an index relation. > + */ > + if (fctx->record[i].isindex == true) > + { > + values[4] = NULL; > + } > + else > + { > + sprintf(values[4], "%u", fctx->record[i].bytes); > + } > > > /* Build and return the tuple. */ > ! tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); > result = HeapTupleGetDatum(tuple); > > > Index: pg_freespacemap.sql.in > =================================================================== > RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.sql.in,v > retrieving revision 1.2 > diff -c -r1.2 pg_freespacemap.sql.in > *** pg_freespacemap.sql.in 27 Feb 2006 16:09:48 -0000 1.2 > --- pg_freespacemap.sql.in 9 Mar 2006 03:42:15 -0000 > *************** > *** 11,17 **** > -- Create a view for convenient access. > CREATE VIEW pg_freespacemap AS > SELECT P.* FROM pg_freespacemap() AS P > ! (blockid int4, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber int8, blockfreebytes int4); > > -- Don't want these to be available at public. > REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC; > --- 11,17 ---- > -- Create a view for convenient access. > CREATE VIEW pg_freespacemap AS > SELECT P.* FROM pg_freespacemap() AS P > ! (reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber int8, bytes int4); > > -- Don't want these to be available at public. > REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC; > Index: README.pg_freespacemap > =================================================================== > RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/README.pg_freespacemap,v > retrieving revision 1.1 > diff -c -r1.1 README.pg_freespacemap > *** README.pg_freespacemap 12 Feb 2006 03:55:53 -0000 1.1 > --- README.pg_freespacemap 9 Mar 2006 03:43:16 -0000 > *************** > *** 34,45 **** > > Column | references | Description > ----------------+----------------------+------------------------------------ > - blockid | | Id, 1.. max_fsm_pages > - relfilenode | pg_class.relfilenode | Refilenode of the relation. > reltablespace | pg_tablespace.oid | Tablespace oid of the relation. > reldatabase | pg_database.oid | Database for the relation. > relblocknumber | | Offset of the page in the relation. > ! blockfreebytes | | Free bytes in the block/page. > > > There is one row for each page in the free space map. > --- 34,45 ---- > > Column | references | Description > ----------------+----------------------+------------------------------------ > reltablespace | pg_tablespace.oid | Tablespace oid of the relation. > reldatabase | pg_database.oid | Database for the relation. > + relfilenode | pg_class.relfilenode | Refilenode of the relation. > relblocknumber | | Offset of the page in the relation. > ! bytes | | Free bytes in the block/page, or NULL > ! | | for an index page (see below). > > > There is one row for each page in the free space map. > *************** > *** 47,52 **** > --- 47,55 ---- > Because the map is shared by all the databases, there are pages from > relations not belonging to the current database. > > + The free space map can contain pages for btree indexes if they were emptied > + by a vacuum process. The bytes field is set to NULL in this case. > + > When the pg_freespacemap view is accessed, internal free space map locks are > taken, and a copy of the map data is made for the view to display. > This ensures that the view produces a consistent set of results, while not > *************** > *** 58,91 **** > ------------- > > regression=# \d pg_freespacemap > ! View "public.pg_freespacemap" > Column | Type | Modifiers > ! ---------------+---------+----------- > ! blockid | integer | > ! relfilenode | oid | > reltablespace | oid | > reldatabase | oid | > relblocknumber | bigint | > ! blockfreebytes | integer | > View definition: > ! SELECT p.blockid, p.relfilenode, p.reltablespace, p.reldatabase, p.relblocknumber, p.blockfreebytes > ! FROM pg_freespacemap() p(blockid integer, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber bigint,blockfreebytes integer); > > ! regression=# SELECT c.relname, m.relblocknumber, m.blockfreebytes > FROM pg_freespacemap m INNER JOIN pg_class c > ON c.relfilenode = m.relfilenode LIMIT 10; > ! relname | relblocknumber | blockfreebytes > ! ------------------------+----------------+---------------- > ! sql_features | 5 | 2696 > ! sql_implementation_info | 0 | 7104 > ! sql_languages | 0 | 8016 > ! sql_packages | 0 | 7376 > ! sql_sizing | 0 | 6032 > ! pg_authid | 0 | 7424 > ! pg_toast_2618 | 13 | 4588 > ! pg_toast_2618 | 12 | 1680 > ! pg_toast_2618 | 10 | 1436 > ! pg_toast_2618 | 7 | 1136 > (10 rows) > > regression=# > --- 61,93 ---- > ------------- > > regression=# \d pg_freespacemap > ! View "public.pg_freespacemap" > Column | Type | Modifiers > ! ----------------+---------+----------- > reltablespace | oid | > reldatabase | oid | > + relfilenode | oid | > relblocknumber | bigint | > ! bytes | integer | > View definition: > ! SELECT p.reltablespace, p.reldatabase, p.relfilenode, p.relblocknumber, p.bytes > ! FROM pg_freespacemap() p(reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber bigint, bytes integer); > > ! regression=# SELECT c.relname, m.relblocknumber, m.bytes > FROM pg_freespacemap m INNER JOIN pg_class c > ON c.relfilenode = m.relfilenode LIMIT 10; > ! relname | relblocknumber | bytes > ! ------------------------+----------------+-------- > ! sql_features | 5 | 2696 > ! sql_implementation_info | 0 | 7104 > ! sql_languages | 0 | 8016 > ! sql_packages | 0 | 7376 > ! sql_sizing | 0 | 6032 > ! pg_authid | 0 | 7424 > ! pg_toast_2618 | 13 | 4588 > ! pg_toast_2618 | 12 | 1680 > ! pg_toast_2618 | 10 | 1436 > ! pg_toast_2618 | 7 | 1136 > (10 rows) > > regression=# > > > > ---------------------------(end of broadcast)--------------------------- > TIP 9: In versions below 8.0, the planner will ignore your desire to > choose an index scan if your joining column's datatypes do not > match -- Bruce Momjian http://candle.pha.pa.us EnterpriseDB http://www.enterprisedb.com + If your life is a hard drive, Christ can be your backup. +
Patch applied. Thanks. --------------------------------------------------------------------------- Mark Kirkwood wrote: > Mark Kirkwood wrote: > > Tom Lane wrote: > > > >> > >> I do notice a rather serious shortcoming of pg_freespacemap in its > >> current incarnation, which is that it *only* shows you the per-page free > >> space data, and not any of the information that would let you determine > >> what the FSM is doing to filter the raw data. The per-relation > >> avgRequest and lastPageCount fields would be interesting for instance. > >> Perhaps there should be a second view with one row per relation to > >> carry the appropriate data. > >> > > > > Ok - I did wonder about 2 views, but was unsure if the per-relation > > stuff was interesting. Given that it looks like it is interesting, I'll > > see about getting a second view going. > > > > This patch implements the second view for FSM relations. I have renamed > the functions and views to be: > > pg_freespacemap_relations > pg_freespacemap_pages > > This patch depends on the previous one (which was called simply > 'pg_freespacemap.patch'). > > Cheers > > Mark [ application/gzip is not supported, skipping... ] > > ---------------------------(end of broadcast)--------------------------- > TIP 4: Have you searched our list archives? > > http://archives.postgresql.org -- Bruce Momjian http://candle.pha.pa.us EnterpriseDB http://www.enterprisedb.com + If your life is a hard drive, Christ can be your backup. +