From 2a530ea1306c291a40fff4042a0b1a5755dcefc9 Mon Sep 17 00:00:00 2001 From: kommih Date: Thu, 1 Nov 2018 12:00:10 +1100 Subject: [PATCH 3/3] First draft of pluggable-storage documentation --- doc/src/sgml/{indexam.sgml => am.sgml} | 590 ++++++++++++++++++++- doc/src/sgml/catalogs.sgml | 5 +- doc/src/sgml/config.sgml | 24 + doc/src/sgml/filelist.sgml | 2 +- doc/src/sgml/postgres.sgml | 2 +- doc/src/sgml/ref/create_access_method.sgml | 12 +- doc/src/sgml/ref/create_table.sgml | 18 +- doc/src/sgml/ref/create_table_as.sgml | 14 + doc/src/sgml/release-9.6.sgml | 2 +- doc/src/sgml/xindex.sgml | 2 +- 10 files changed, 640 insertions(+), 31 deletions(-) rename doc/src/sgml/{indexam.sgml => am.sgml} (78%) diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/am.sgml similarity index 78% rename from doc/src/sgml/indexam.sgml rename to doc/src/sgml/am.sgml index beb99d1831..dc13bc1073 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/am.sgml @@ -1,16 +1,20 @@ - + - - Index Access Method Interface Definition + + Access Method Interface Definition This chapter defines the interface between the core - PostgreSQL system and index access - methods, which manage individual index types. The core system - knows nothing about indexes beyond what is specified here, so it is - possible to develop entirely new index types by writing add-on code. + PostgreSQL system and access + methods, which manage individual INDEX + and TABLE types. The core system knows nothing + about these access methods beyond what is specified here, so it is + possible to develop entirely new access method types by writing add-on code. - + + + Overview of Index access methods + All indexes in PostgreSQL are what are known technically as secondary indexes; that is, the index is @@ -42,8 +46,8 @@ dead tuples are reclaimed (by vacuuming) when the dead tuples themselves are reclaimed. - - + + Basic API Structure for Indexes @@ -217,9 +221,9 @@ typedef struct IndexAmRoutine conditions. - + - + Index Access Method Functions @@ -709,9 +713,11 @@ amparallelrescan (IndexScanDesc scan); the beginning. - + + + - + Index Scanning @@ -864,9 +870,9 @@ amparallelrescan (IndexScanDesc scan); if its internal implementation is unsuited to one API or the other. - + - + Index Locking Considerations @@ -978,9 +984,9 @@ amparallelrescan (IndexScanDesc scan); reduce the frequency of such transaction cancellations. - + - + Index Uniqueness Checks @@ -1127,9 +1133,9 @@ amparallelrescan (IndexScanDesc scan); - + - + Index Cost Estimation Functions @@ -1376,5 +1382,549 @@ cost_qual_eval(&index_qual_cost, path->indexquals, root); Examples of cost estimator functions can be found in src/backend/utils/adt/selfuncs.c. + + + + Overview of Table access methods + + + All Tables in PostgreSQL are the primary data store. + Each table is stored as its own physical relation and so + is described by an entry in the pg_class catalog. + The contents of an table are entirely under the control of its access method. + (All the access methods furthermore use the standard page layout described in + .) + + + + Table access method API + + + Each table access method is described by a row in the + pg_am + system catalog. The pg_am entry + specifies a name and a handler function for the access + method. These entries can be created and deleted using the + and + SQL commands. + + + + A table access method handler function must be declared to accept a + single argument of type internal and to return the + pseudo-type table_am_handler. The argument is a dummy value that + simply serves to prevent handler functions from being called directly from + SQL commands. The result of the function must be a palloc'd struct of + type TableAmRoutine, which contains everything + that the core code needs to know to make use of the table access method. + The TableAmRoutine struct, also called the access + method's API struct, includes fields specifying assorted + fixed properties of the access method, such as whether it can support + bitmap scans. More importantly, it contains pointers to support + functions for the access method, which do all of the real work to access + tables. These support functions are plain C functions and are not + visible or callable at the SQL level. The support functions are described + in . + + + + The structure TableAmRoutine is defined thus: + +typedef struct TableAmRoutine +{ + NodeTag type; + + SlotCallbacks_function slot_callbacks; + + SnapshotSatisfies_function snapshot_satisfies; + SnapshotSatisfiesUpdate_function snapshot_satisfiesUpdate; + SnapshotSatisfiesVacuum_function snapshot_satisfiesVacuum; + + /* Operations on physical tuples */ + TupleInsert_function tuple_insert; + TupleInsertSpeculative_function tuple_insert_speculative; + TupleCompleteSpeculative_function tuple_complete_speculative; + TupleUpdate_function tuple_update; + TupleDelete_function tuple_delete; + TupleFetchRowVersion_function tuple_fetch_row_version; + TupleLock_function tuple_lock; + MultiInsert_function multi_insert; + TupleGetLatestTid_function tuple_get_latest_tid; + TupleFetchFollow_function tuple_fetch_follow; + + GetTupleData_function get_tuple_data; + + RelationVacuum_function relation_vacuum; + RelationScanAnalyzeNextBlock_function scan_analyze_next_block; + RelationScanAnalyzeNextTuple_function scan_analyze_next_tuple; + RelationCopyForCluster_function relation_copy_for_cluster; + RelationSync_function relation_sync; + + /* Operations on relation scans */ + ScanBegin_function scan_begin; + ScanSetlimits_function scansetlimits; + ScanGetnextSlot_function scan_getnextslot; + + BitmapPagescan_function scan_bitmap_pagescan; + BitmapPagescanNext_function scan_bitmap_pagescan_next; + + SampleScanNextBlock_function scan_sample_next_block; + SampleScanNextTuple_function scan_sample_next_tuple; + + ScanEnd_function scan_end; + ScanRescan_function scan_rescan; + ScanUpdateSnapshot_function scan_update_snapshot; + + BeginIndexFetchTable_function begin_index_fetch; + EndIndexFetchTable_function reset_index_fetch; + EndIndexFetchTable_function end_index_fetch; + + + IndexBuildRangeScan_function index_build_range_scan; + IndexValidateScan_function index_validate_scan; + + CreateInitFork_function CreateInitFork; +} TableAmRoutine; + + + + + An individual table is defined by a + pg_class + entry that describes it as a physical relation. + + + + + + Table Access Method Functions + + + The table construction and maintenance functions that an table access + method must provide in TableAmRoutine are: + + + + +TupleTableSlotOps * +slot_callbacks (Relation relation); + + API to access the slot specific methods; + Following methods are available; + TTSOpsVirtual, + TTSOpsHeapTuple, + TTSOpsMinimalTuple, + TTSOpsBufferTuple, + + + + +bool +snapshot_satisfies (TupleTableSlot *slot, Snapshot snapshot); + + API to check whether the provided slot is visible to the current + transaction according the snapshot. + + + + +Oid +tuple_insert (Relation rel, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate); + + API to insert the tuple and provide the ItemPointerData + where the tuple is successfully inserted. + + + + +Oid +tuple_insert_speculative (Relation rel, + TupleTableSlot *slot, + CommandId cid, + int options, + BulkInsertState bistate, + uint32 specToken); + + API to insert the tuple with a speculative token. This API is similar + like tuple_insert, with additional speculative + information. + + + + +void +tuple_complete_speculative (Relation rel, + TupleTableSlot *slot, + uint32 specToken, + bool succeeded); + + API to complete the state of the tuple inserted by the API tuple_insert_speculative + with the successful completion of the index insert. + + + + + +HTSU_Result +tuple_update (Relation relation, + ItemPointer otid, + TupleTableSlot *slot, + CommandId cid, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + LockTupleMode *lockmode, + bool *update_indexes); + + API to update the existing tuple with new data. + + + + + +HTSU_Result +tuple_delete (Relation relation, + ItemPointer tid, + CommandId cid, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + bool changingPart); + + API to delete the existing tuple. + + + + + +bool +tuple_fetch_row_version (Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + Relation stats_relation); + + API to fetch and store the Buffered Heap tuple in the provided slot + based on the ItemPointer. + + + + + +HTSU_Result +TupleLock_function (Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + CommandId cid, + LockTupleMode mode, + LockWaitPolicy wait_policy, + uint8 flags, + HeapUpdateFailureData *hufd); + + API to lock the specified the ItemPointer tuple and fetches the newest version of + its tuple and TID. + + + + + +void +multi_insert (Relation relation, TupleTableSlot **slots, int nslots, + CommandId cid, int options, BulkInsertState bistate); + + API to insert multiple tuples at a time into the relation. + + + + + +void +tuple_get_latest_tid (Relation relation, + Snapshot snapshot, + ItemPointer tid); + + API to get the the latest TID of the tuple with the given itempointer. + + + + + +bool +tuple_fetch_follow (struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead); + + API to get the all the tuples of the page that satisfies itempointer. + + + + + +tuple_data +get_tuple_data (TupleTableSlot *slot, tuple_data_flags flags); + + API to return the internal structure members of the HeapTuple. + + + + + +void +relation_vacuum (Relation onerel, int options, + struct VacuumParams *params, BufferAccessStrategy bstrategy); + + API to perform vacuum for one heap relation. + + + + + +void +scan_analyze_next_block (TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy); + + API to fill the scan descriptor with the buffer of the specified block. + + + + + +bool +scan_analyze_next_tuple (TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, TupleTableSlot *slot)); + + API to analyze the block and fill the buffered heap tuple in the slot and also + provide the live and dead rows. + + + + + +void +relation_copy_for_cluster (Relation NewHeap, Relation OldHeap, Relation OldIndex, + bool use_sort, + TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, + double *num_tuples, double *tups_vacuumed, double *tups_recently_dead); + + API to copy one relation to another relation eith using the Index or table scan. + + + + + +void +relation_sync (Relation relation); + + API to sync the relation to disk, useful for the cases where no WAL is written. + + + + + +TableScanDesc +scan_begin (Relation relation, + Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); + + API to start the relation scan for the provided relation and returns the + TableScanDesc structure. + + + + + +void +scansetlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks); + + API to fix the relation scan range limits. + + + + + +TupleTableSlot * +scan_getnextslot (TableScanDesc scan, + ScanDirection direction, TupleTableSlot *slot); + + API to fill the next visible tuple from the relation scan in the provided slot + and return it. + + + + + +bool +scan_bitmap_pagescan (TableScanDesc scan, + TBMIterateResult *tbmres); + + API to scan the relation and fill the scan description bitmap with valid item pointers + for the specified block. + + + + + +bool +scan_bitmap_pagescan_next (TableScanDesc scan, + TupleTableSlot *slot); + + API to fill the buffered heap tuple data from the bitmap scanned item pointers and store + it in the provided slot. + + + + + +bool +scan_sample_next_block (TableScanDesc scan, struct SampleScanState *scanstate); + + API to scan the relation and fill the scan description bitmap with valid item pointers + for the specified block provided by the sample method. + + + + + +bool +scan_sample_next_tuple (TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot); + + API to fill the buffered heap tuple data from the bitmap scanned item pointers based on the sample + method and store it in the provided slot. + + + + + +void +scan_end (TableScanDesc scan); + + API to end the relation scan. + + + + + +void +scan_rescan (TableScanDesc scan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); + + API to restart the relation scan with provided data. + + + + + +void +scan_update_snapshot (TableScanDesc scan, Snapshot snapshot); + + API to update the relation scan with the new snapshot. + + + + +IndexFetchTableData * +begin_index_fetch (Relation relation); + + API to prepare the IndexFetchTableData for the relation. + + + + +void +reset_index_fetch (struct IndexFetchTableData* data); + + API to reset the prepared internal members of the IndexFetchTableData. + + + + +void +end_index_fetch (struct IndexFetchTableData* data); + + API to clear and free the IndexFetchTableData. + + + + +double +index_build_range_scan (Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + BlockNumber start_blockno, + BlockNumber end_blockno, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan); + + API to perform the table scan with bounded range specified by the caller + and insert the satisfied records into the index using the provided callback + function pointer. + + + + +void +index_validate_scan (Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + struct ValidateIndexState *state); + + API to perform the table scan and insert the satisfied records into the index. + This API is similar like index_build_range_scan. This + is used in the scenario of concurrent index build. + + + + + + Table scanning + + + + + + + Table insert/update/delete + + + + + + + Table locking + + + + + + + Table vacuum + + + + + + + Table fetch + + + + + + diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0179deea2e..f0c8037bbc 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -587,8 +587,9 @@ The catalog pg_am stores information about relation access methods. There is one row for each access method supported by the system. - Currently, only indexes have access methods. The requirements for index - access methods are discussed in detail in . + Currently, only INDEX and TABLE have + access methods. The requirements for access methods are discussed in detail + in . diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f11b8f724c..8765d7c57c 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6585,6 +6585,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + default_table_access_method (string) + + default_table_access_method configuration parameter + + + + + This variable specifies the default table access method using which to create + objects (tables and materialized views) when a CREATE command does + not explicitly specify a access method. + + + + The value is either the name of a table access method, or an empty string + to specify using the default table access method of the current database. + If the value does not match the name of any existing table access methods, + PostgreSQL will automatically use the default + table access method of the current database. + + + + + default_tablespace (string) diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 48ac14a838..99a6496502 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -90,7 +90,7 @@ - + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 0070603fc3..3e66ae9c8a 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -251,7 +251,7 @@ &tablesample-method; &custom-scan; &geqo; - &indexam; + &am; &generic-wal; &btree; &gist; diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml index 851c5e63be..256914022a 100644 --- a/doc/src/sgml/ref/create_access_method.sgml +++ b/doc/src/sgml/ref/create_access_method.sgml @@ -61,7 +61,8 @@ CREATE ACCESS METHOD name This clause specifies the type of access method to define. - Only INDEX is supported at present. + Only INDEX and TABLE + are supported at present. @@ -76,9 +77,12 @@ CREATE ACCESS METHOD name declared to take a single argument of type internal, and its return type depends on the type of access method; for INDEX access methods, it must - be index_am_handler. The C-level API that the handler - function must implement varies depending on the type of access method. - The index access method API is described in . + be index_am_handler and for TABLE + access methods, it must be table_am_handler. + The C-level API that the handler function must implement varies + depending on the type of access method. The index access method API + is described in and the table access method + API is described in . diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 10428f8ff0..87e0f01ab2 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -29,6 +29,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI ] ) [ INHERITS ( parent_table [, ... ] ) ] [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -40,6 +41,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ) ] [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -51,6 +53,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ) ] { FOR VALUES partition_bound_spec | DEFAULT } [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -955,7 +958,7 @@ WITH ( MODULUS numeric_literal, REM The access method must support amgettuple (see ); at present this means GIN + linkend="index-access-methods"/>); at present this means GIN cannot be used. Although it's allowed, there is little point in using B-tree or hash indexes with an exclusion constraint, because this does nothing that an ordinary unique constraint doesn't do better. @@ -1138,6 +1141,19 @@ WITH ( MODULUS numeric_literal, REM + + USING method + + + This clause specifies optional access method for the new table; + see for more information. + If this option is not specified, then the default table access method + is chosen for the new table. see + for more information. + + + + WITH ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/ref/create_table_as.sgml b/doc/src/sgml/ref/create_table_as.sgml index 527138e787..2acf52d2f5 100644 --- a/doc/src/sgml/ref/create_table_as.sgml +++ b/doc/src/sgml/ref/create_table_as.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name [ (column_name [, ...] ) ] + [ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -120,6 +121,19 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + USING method + + + This clause specifies optional access method for the new table; + see for more information. + If this option is not specified, then the default table access method + is chosen for the new table. see + for more information. + + + + WITH ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml index acb6a88b31..68c79db4b5 100644 --- a/doc/src/sgml/release-9.6.sgml +++ b/doc/src/sgml/release-9.6.sgml @@ -10081,7 +10081,7 @@ This commit is also listed under libpq and PL/pgSQL 2016-08-13 [ed0097e4f] Add SQL-accessible functions for inspecting index AM pro --> - Restructure index access + Restructure index access method API to hide most of it at the C level (Alexander Korotkov, Andrew Gierth) diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 9446f8b836..4fa821160c 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -36,7 +36,7 @@ described in pg_am. It is possible to add a new index access method by writing the necessary code and then creating an entry in pg_am — but that is - beyond the scope of this chapter (see ). + beyond the scope of this chapter (see ). -- 2.18.0.windows.1