From ce3daeeb1b213b1a2efb5965fc9aa082608cd591 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 11 Mar 2022 11:37:29 +0300 Subject: [PATCH v52 5/7] Use 64-bit XIDs - change TransactionId to 64bit - disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit -- now 32bit xid is named ShortTransactionId - heap page format is changed to contain xid and multixact base value, tuple's xmin and xmax are offsets from. -- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified. - in-memory tuple (HeapTuple) were enriched with precalulated 64bit xmin/xmax. Authors: - Alexander Korotkov - Teodor Sigaev - Nikita Glukhov - Maxim Orlov - Pavel Borisov - Yura Sokolov - Aleksander Alekseev Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com --- contrib/amcheck/verify_heapam.c | 119 +- contrib/amcheck/verify_nbtree.c | 2 +- contrib/hstore/hstore_io.c | 2 + contrib/pageinspect/Makefile | 3 +- contrib/pageinspect/btreefuncs.c | 18 +- contrib/pageinspect/expected/btree.out | 4 +- contrib/pageinspect/expected/hash_1.out | 166 +++ .../pageinspect/expected/oldextversions.out | 10 +- contrib/pageinspect/expected/page.out | 28 +- contrib/pageinspect/heapfuncs.c | 9 +- .../pageinspect/pageinspect--1.12--1.13.sql | 145 ++ contrib/pageinspect/pageinspect--1.5.sql | 2 + contrib/pageinspect/rawpage.c | 35 +- contrib/pageinspect/sql/btree.sql | 3 +- contrib/pg_surgery/heap_surgery.c | 17 +- contrib/pg_visibility/pg_visibility.c | 7 +- contrib/pgrowlocks/pgrowlocks.c | 2 +- contrib/pgstattuple/pgstatapprox.c | 2 + contrib/pgstattuple/pgstatindex.c | 2 +- .../postgres_fdw/expected/postgres_fdw.out | 55 +- contrib/postgres_fdw/postgres_fdw.c | 9 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +- src/backend/access/common/heaptuple.c | 10 +- src/backend/access/common/reloptions.c | 12 +- src/backend/access/hash/hashvalidate.c | 5 +- src/backend/access/heap/heapam.c | 1228 +++++++++++++++-- src/backend/access/heap/heapam_handler.c | 62 +- src/backend/access/heap/heapam_visibility.c | 173 +-- src/backend/access/heap/heaptoast.c | 3 + src/backend/access/heap/hio.c | 36 +- src/backend/access/heap/pruneheap.c | 92 +- src/backend/access/heap/rewriteheap.c | 101 +- src/backend/access/heap/vacuumlazy.c | 150 +- src/backend/access/nbtree/nbtpage.c | 2 + src/backend/access/nbtree/nbtsplitloc.c | 16 +- src/backend/access/rmgrdesc/gistdesc.c | 4 +- src/backend/access/rmgrdesc/heapdesc.c | 32 + src/backend/access/rmgrdesc/nbtdesc.c | 4 +- src/backend/access/rmgrdesc/xactdesc.c | 6 +- src/backend/access/rmgrdesc/xlogdesc.c | 2 +- src/backend/access/transam/clog.c | 19 +- src/backend/access/transam/commit_ts.c | 19 - src/backend/access/transam/multixact.c | 624 +-------- src/backend/access/transam/slru.c | 11 +- src/backend/access/transam/subtrans.c | 9 +- src/backend/access/transam/transam.c | 18 +- src/backend/access/transam/twophase.c | 53 +- src/backend/access/transam/varsup.c | 177 +-- src/backend/access/transam/xact.c | 35 +- src/backend/access/transam/xlog.c | 9 +- src/backend/access/transam/xloginsert.c | 7 + src/backend/access/transam/xlogreader.c | 34 - src/backend/bootstrap/bootstrap.c | 2 +- src/backend/catalog/heap.c | 8 +- src/backend/catalog/pg_inherits.c | 2 +- src/backend/commands/async.c | 2 +- src/backend/commands/dbcommands.c | 9 +- src/backend/commands/indexcmds.c | 6 +- src/backend/commands/sequence.c | 30 +- src/backend/commands/vacuum.c | 22 +- src/backend/executor/execExprInterp.c | 1 + src/backend/executor/execUtils.c | 1 + src/backend/executor/nodeModifyTable.c | 1 + src/backend/executor/spi.c | 1 + src/backend/nodes/gen_node_support.pl | 6 +- src/backend/optimizer/util/plancat.c | 2 +- src/backend/postmaster/autovacuum.c | 72 +- src/backend/replication/logical/decode.c | 24 +- src/backend/replication/logical/proto.c | 50 +- .../replication/logical/reorderbuffer.c | 17 +- src/backend/replication/logical/snapbuild.c | 4 +- src/backend/replication/logical/worker.c | 2 +- src/backend/replication/walreceiver.c | 28 +- src/backend/replication/walsender.c | 73 +- src/backend/statistics/extended_stats.c | 1 + src/backend/storage/buffer/Makefile | 3 +- src/backend/storage/buffer/bufmgr.c | 136 +- src/backend/storage/buffer/heap_convert.c | 549 ++++++++ src/backend/storage/buffer/meson.build | 1 + src/backend/storage/ipc/procarray.c | 182 +-- src/backend/storage/ipc/sinvaladt.c | 4 +- src/backend/storage/ipc/standby.c | 4 +- src/backend/storage/lmgr/lmgr.c | 14 +- src/backend/storage/lmgr/lock.c | 4 +- src/backend/storage/lmgr/predicate.c | 31 +- src/backend/storage/lmgr/proc.c | 12 +- src/backend/storage/page/bufpage.c | 254 +++- src/backend/utils/adt/enum.c | 2 +- src/backend/utils/adt/jsonfuncs.c | 2 + src/backend/utils/adt/lockfuncs.c | 9 +- src/backend/utils/adt/pgstatfuncs.c | 1 + src/backend/utils/adt/rowtypes.c | 12 + src/backend/utils/adt/xid.c | 37 +- src/backend/utils/adt/xid8funcs.c | 83 +- src/backend/utils/cache/catcache.c | 1 + src/backend/utils/cache/relcache.c | 3 +- src/backend/utils/fmgr/fmgr.c | 4 +- src/backend/utils/misc/guc_tables.c | 164 +-- src/backend/utils/misc/help_config.c | 8 +- src/backend/utils/misc/pg_controldata.c | 2 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/backend/utils/sort/tuplesortvariants.c | 14 +- src/backend/utils/time/combocid.c | 20 +- src/backend/utils/time/snapmgr.c | 23 +- src/bin/pg_amcheck/t/004_verify_heapam.pl | 185 ++- src/bin/pg_controldata/pg_controldata.c | 2 +- src/bin/pg_dump/pg_dump.c | 27 +- src/bin/pg_dump/pg_dump.h | 8 +- src/bin/pg_resetwal/pg_resetwal.c | 55 +- src/bin/pg_resetwal/t/001_basic.pl | 18 +- src/bin/pg_upgrade/Makefile | 1 + src/bin/pg_upgrade/check.c | 149 +- src/bin/pg_upgrade/controldata.c | 17 +- src/bin/pg_upgrade/file.c | 99 +- src/bin/pg_upgrade/meson.build | 1 + src/bin/pg_upgrade/pg_upgrade.c | 144 +- src/bin/pg_upgrade/pg_upgrade.h | 34 +- src/bin/pg_upgrade/relfilenumber.c | 34 +- src/bin/pg_upgrade/segresize.c | 586 ++++++++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 18 + src/bin/pg_upgrade/version.c | 104 +- src/bin/pg_waldump/pg_waldump.c | 2 +- src/bin/pg_waldump/t/001_basic.pl | 3 +- src/include/access/ginblock.h | 11 +- src/include/access/gist.h | 2 +- src/include/access/heapam.h | 24 +- src/include/access/heapam_xlog.h | 31 +- src/include/access/heaptoast.h | 11 +- src/include/access/htup.h | 19 +- src/include/access/htup_details.h | 279 +++- src/include/access/multixact.h | 11 +- src/include/access/nbtree.h | 10 + src/include/access/reloptions.h | 2 +- src/include/access/rewriteheap.h | 4 +- src/include/access/rmgrlist.h | 1 + src/include/access/slru.h | 10 +- src/include/access/tableam.h | 2 +- src/include/access/transam.h | 92 +- src/include/access/tupmacs.h | 3 +- src/include/access/xact.h | 13 +- src/include/access/xloginsert.h | 1 + src/include/access/xlogreader.h | 4 - src/include/access/xlogrecord.h | 5 +- src/include/c.h | 23 +- src/include/catalog/catversion.h | 3 +- src/include/catalog/pg_amproc.dat | 4 +- src/include/catalog/pg_operator.dat | 8 +- src/include/catalog/pg_proc.dat | 12 +- src/include/catalog/pg_type.dat | 4 +- src/include/catalog/pg_type.h | 5 + src/include/commands/vacuum.h | 22 +- src/include/fmgr.h | 2 + src/include/nodes/pg_list.h | 4 + src/include/pg_config.h.in | 3 + src/include/port/pg_lfind.h | 163 ++- src/include/postgres.h | 9 +- src/include/postmaster/autovacuum.h | 4 +- src/include/storage/buf_internals.h | 5 +- src/include/storage/bufmgr.h | 6 + src/include/storage/bufpage.h | 232 +++- src/include/storage/itemid.h | 2 + src/include/storage/lock.h | 14 +- src/include/storage/proc.h | 7 +- src/include/utils/combocid.h | 2 +- src/include/utils/rel.h | 12 +- src/include/utils/xid8.h | 4 +- src/pl/plperl/plperl.c | 4 +- src/pl/plpgsql/src/pl_comp.c | 4 +- src/pl/plpgsql/src/pl_exec.c | 2 + src/pl/plpython/plpy_procedure.c | 4 +- src/pl/tcl/pltcl.c | 4 +- src/test/Makefile | 3 +- src/test/meson.build | 1 + src/test/modules/test_lfind/test_lfind.c | 30 +- .../perl/PostgreSQL/Test/AdjustUpgrade.pm | 4 + src/test/recovery/t/003_recovery_targets.pl | 2 +- src/test/recovery/t/039_end_of_wal.pl | 24 +- src/test/regress/expected/indirect_toast.out | 8 + src/test/regress/expected/insert.out | 16 +- src/test/regress/expected/opr_sanity.out | 6 +- src/test/regress/expected/select_views.out | 86 +- src/test/regress/expected/txid.out | 8 +- src/test/regress/expected/type_sanity.out | 5 +- src/test/regress/expected/xid.out | 22 +- src/test/regress/expected/xid64.out | 92 ++ src/test/regress/parallel_schedule | 2 +- src/test/regress/regress.c | 292 ++++ src/test/regress/sql/indirect_toast.sql | 11 + src/test/regress/sql/insert.sql | 17 +- src/test/regress/sql/select_views.sql | 2 +- src/test/regress/sql/type_sanity.sql | 5 +- src/test/regress/sql/xid.sql | 2 +- src/test/regress/sql/xid64.sql | 84 ++ src/test/xid-64/Makefile | 22 + src/test/xid-64/README | 16 + src/test/xid-64/meson.build | 16 + src/test/xid-64/t/002_test_gucs.pl | 79 ++ src/test/xid-64/t/003_test_integrity.pl | 58 + src/test/xid-64/t/004_test_relminmxid.pl | 90 ++ src/test/xid-64/t/005_stream_subxact.pl | 100 ++ src/test/xid-64/t/006_zeropage.pl | 33 + src/test/xid-64/t/007_first_multi.pl | 83 ++ src/tools/pgindent/typedefs.list | 4 +- 203 files changed, 6800 insertions(+), 2566 deletions(-) create mode 100644 contrib/pageinspect/expected/hash_1.out create mode 100644 contrib/pageinspect/pageinspect--1.12--1.13.sql create mode 100644 src/backend/storage/buffer/heap_convert.c create mode 100644 src/bin/pg_upgrade/segresize.c create mode 100644 src/test/regress/expected/xid64.out create mode 100644 src/test/regress/sql/xid64.sql create mode 100644 src/test/xid-64/Makefile create mode 100644 src/test/xid-64/README create mode 100644 src/test/xid-64/meson.build create mode 100644 src/test/xid-64/t/002_test_gucs.pl create mode 100644 src/test/xid-64/t/003_test_integrity.pl create mode 100644 src/test/xid-64/t/004_test_relminmxid.pl create mode 100644 src/test/xid-64/t/005_stream_subxact.pl create mode 100644 src/test/xid-64/t/006_zeropage.pl create mode 100644 src/test/xid-64/t/007_first_multi.pl diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 00a65b5448..d109ccd66b 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -17,6 +17,7 @@ #include "access/multixact.h" #include "access/toast_internals.h" #include "access/visibilitymap.h" +#include "catalog/catalog.h" #include "catalog/pg_am.h" #include "funcapi.h" #include "miscadmin.h" @@ -85,7 +86,7 @@ typedef struct HeapCheckContext * from them. */ FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */ - TransactionId next_xid; /* 32-bit version of next_fxid */ + TransactionId next_xid; /* 64-bit version of next_fxid */ TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */ FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed * relative to next_fxid */ @@ -126,6 +127,7 @@ typedef struct HeapCheckContext uint16 lp_len; uint16 lp_off; HeapTupleHeader tuphdr; + HeapTupleData tuple; int natts; /* Values for iterating over attributes within the tuple */ @@ -169,8 +171,6 @@ static bool check_tuple_visibility(HeapCheckContext *ctx, static void report_corruption(HeapCheckContext *ctx, char *msg); static void report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta, char *msg); -static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, - const HeapCheckContext *ctx); static void update_cached_xid_range(HeapCheckContext *ctx); static void update_cached_mxid_range(HeapCheckContext *ctx); static XidBoundsViolation check_mxid_in_range(MultiXactId mxid, @@ -394,7 +394,7 @@ verify_heapam(PG_FUNCTION_ARGS) update_cached_xid_range(&ctx); update_cached_mxid_range(&ctx); ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid; - ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx); + ctx.relfrozenfxid = FullTransactionIdFromXid(ctx.relfrozenxid); ctx.relminmxid = ctx.rel->rd_rel->relminmxid; if (TransactionIdIsNormal(ctx.relfrozenxid)) @@ -555,6 +555,12 @@ verify_heapam(PG_FUNCTION_ARGS) ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid); ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr); + ctx.tuple.t_data = ctx.tuphdr; + ctx.tuple.t_len = ItemIdGetLength(ctx.itemid); + ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel); + HeapTupleCopyXidsFromPage(ctx.buffer, &ctx.tuple, ctx.page, + IsToastRelation(ctx.rel)); + /* Ok, ready to check this next tuple */ check_tuple(&ctx, &xmin_commit_status_ok[ctx.offnum], @@ -588,6 +594,8 @@ verify_heapam(PG_FUNCTION_ARGS) TransactionId curr_xmax; TransactionId next_xmin; OffsetNumber nextoffnum = successor[ctx.offnum]; + HeapTupleData curr_tup; + HeapTupleData next_tup; /* * The current line pointer may not have a successor, either @@ -650,9 +658,13 @@ verify_heapam(PG_FUNCTION_ARGS) if (ItemIdIsRedirected(next_lp)) continue; curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp); - curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup); + curr_tup.t_data = curr_htup; + HeapTupleCopyXidsFromPage(ctx.buffer, &curr_tup, ctx.page, false); + curr_xmax = HeapTupleGetUpdateXidAny(&curr_tup); next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp); - next_xmin = HeapTupleHeaderGetXmin(next_htup); + next_tup.t_data = next_htup; + HeapTupleCopyXidsFromPage(ctx.buffer, &next_tup, ctx.page, false); + next_xmin = HeapTupleGetXmin(&next_tup); if (!TransactionIdIsValid(curr_xmax) || !TransactionIdEquals(curr_xmax, next_xmin)) continue; @@ -706,7 +718,7 @@ verify_heapam(PG_FUNCTION_ARGS) * xmin. This should be safe because the xmin itself can't have * changed, only its commit status. */ - curr_xmin = HeapTupleHeaderGetXmin(curr_htup); + curr_xmin = HeapTupleGetXmin(&curr_tup); if (xmin_commit_status_ok[ctx.offnum] && xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS && xmin_commit_status_ok[nextoffnum] && @@ -904,7 +916,7 @@ check_tuple_header(HeapCheckContext *ctx) { HeapTupleHeader tuphdr = ctx->tuphdr; uint16 infomask = tuphdr->t_infomask; - TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr); + TransactionId curr_xmax = HeapTupleGetUpdateXidAny(&ctx->tuple); bool result = true; unsigned expected_hoff; @@ -1022,13 +1034,14 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, XidCommitStatus xmin_status; XidCommitStatus xvac_status; XidCommitStatus xmax_status; + HeapTuple tuple = &ctx->tuple; HeapTupleHeader tuphdr = ctx->tuphdr; ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */ *xmin_commit_status_ok = false; /* have not yet proven otherwise */ /* If xmin is normal, it should be within valid range */ - xmin = HeapTupleHeaderGetXmin(tuphdr); + xmin = HeapTupleGetXmin(tuple); switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: @@ -1042,19 +1055,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("xmin %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmin %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmin %llu precedes relation freeze threshold %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; } @@ -1080,19 +1093,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -1146,19 +1159,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -1235,7 +1248,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of * avoiding possibly-bogus complaints about missing TOAST entries. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (check_mxid_valid_in_rel(xmax, ctx)) { case XID_INVALID: @@ -1294,7 +1307,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, * We already checked above that this multixact is within limits for * this table. Now check the update xid from this multixact. */ - xmax = HeapTupleGetUpdateXid(tuphdr); + xmax = HeapTupleGetUpdateXid(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1306,19 +1319,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("update xid %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return true; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("update xid %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return true; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("update xid %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return true; case XID_BOUNDS_OK: break; @@ -1358,7 +1371,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, } /* xmax is an XID, not a MXID. Sanity check it. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1368,19 +1381,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("xmax %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmax %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmax %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: break; @@ -1859,50 +1872,6 @@ check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok, ctx->attnum = -1; } -/* - * Convert a TransactionId into a FullTransactionId using our cached values of - * the valid transaction ID range. It is the caller's responsibility to have - * already updated the cached values, if necessary. - */ -static FullTransactionId -FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) -{ - uint64 nextfxid_i; - int32 diff; - FullTransactionId fxid; - - Assert(TransactionIdIsNormal(ctx->next_xid)); - Assert(FullTransactionIdIsNormal(ctx->next_fxid)); - Assert(XidFromFullTransactionId(ctx->next_fxid) == ctx->next_xid); - - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - nextfxid_i = U64FromFullTransactionId(ctx->next_fxid); - - /* compute the 32bit modulo difference */ - diff = (int32) (ctx->next_xid - xid); - - /* - * In cases of corruption we might see a 32bit xid that is before epoch 0. - * We can't represent that as a 64bit xid, due to 64bit xids being - * unsigned integers, without the modulo arithmetic of 32bit xid. There's - * no really nice way to deal with that, but it works ok enough to use - * FirstNormalFullTransactionId in that case, as a freshly initdb'd - * cluster already has a newer horizon. - */ - if (diff > 0 && (nextfxid_i - FirstNormalTransactionId) < (int64) diff) - { - Assert(EpochFromFullTransactionId(ctx->next_fxid) == 0); - fxid = FirstNormalFullTransactionId; - } - else - fxid = FullTransactionIdFromU64(nextfxid_i - diff); - - Assert(FullTransactionIdIsNormal(fxid)); - return fxid; -} - /* * Update our cached range of valid transaction IDs. */ @@ -1916,8 +1885,8 @@ update_cached_xid_range(HeapCheckContext *ctx) LWLockRelease(XidGenLock); /* And compute alternate versions of the same */ + ctx->oldest_fxid = FullTransactionIdFromXid(ctx->oldest_xid); ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid); - ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); } /* @@ -2016,7 +1985,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, } /* Check if the xid is within bounds */ - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); + fxid = FullTransactionIdFromXid(xid); if (!fxid_in_cached_range(fxid, ctx)) { /* @@ -2025,7 +1994,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, * performed the full xid conversion, reconvert. */ update_cached_xid_range(ctx); - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); } if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid)) @@ -2049,8 +2017,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, *status = XID_COMMITTED; LWLockAcquire(XactTruncationLock, LW_SHARED); clog_horizon = - FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid, - ctx); + FullTransactionIdFromXid(ShmemVariableCache->oldestClogXid); if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid)) { if (TransactionIdIsCurrentTransactionId(xid)) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index bcff849aa9..860fc10cfb 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -571,7 +571,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * avoid this. */ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple), snapshot->xmin)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 999ddad76d..f7e2fa847e 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -914,6 +914,7 @@ hstore_from_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroXids(&tuple); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); @@ -1067,6 +1068,7 @@ hstore_populate_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroXids(&tuple); } /* diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index 95e030b396..446721018e 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -13,7 +13,8 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \ +DATA = pageinspect--1.12--1.13.sql \ + pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \ pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index afa1947fad..a810be4530 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -122,6 +122,9 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->page_size = PageGetPageSize(page); + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_level = opaque->btpo_level; + /* page type (flags) */ if (P_ISDELETED(opaque)) { @@ -143,11 +146,18 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) FullTransactionId safexid = BTPageGetDeleteXid(page); elog(DEBUG2, "deleted page from block %u has safexid %llu", - blkno, (unsigned long long) U64FromFullTransactionId(safexid)); + blkno, (unsigned long long) XidFromFullTransactionId(safexid)); } else - elog(DEBUG2, "deleted page from block %u has safexid %llu", - blkno, (unsigned long long) opaque->btpo_level); + { + ShortTransactionId safexid = BTP_GET_XACT(opaque); + + stat->btpo_prev = 0; + stat->btpo_level = 0; + + elog(DEBUG2, "deleted page from block %u has safexid %u", + blkno, safexid); + } /* Don't interpret BTDeletedPageData as index tuples */ maxoff = InvalidOffsetNumber; @@ -162,9 +172,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->type = 'i'; /* btpage opaque data */ - stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; - stat->btpo_level = opaque->btpo_level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 0aa5d73322..f5b05dbc06 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -207,8 +207,8 @@ SELECT bt_page_items('aaa'::bytea); ERROR: invalid page size -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); -ERROR: input page is not a valid btree page +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); ERROR: input page is not a valid btree page \set VERBOSITY default diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out new file mode 100644 index 0000000000..5e64eb9260 --- /dev/null +++ b/contrib/pageinspect/expected/hash_1.out @@ -0,0 +1,166 @@ +CREATE TABLE test_hash (a int, b text); +INSERT INTO test_hash VALUES (1, 'one'); +CREATE INDEX test_hash_a_idx ON test_hash USING hash (a); +\x +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]--+--------- +hash_page_type | metapage + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5)); +-[ RECORD 1 ]--+------- +hash_page_type | bitmap + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); +ERROR: invalid block number +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); +ERROR: invalid overflow block number 0 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); +ERROR: invalid overflow block number 1 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2); +ERROR: invalid overflow block number 2 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3); +ERROR: invalid overflow block number 3 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); +ERROR: invalid overflow block number 4 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); +ERROR: invalid overflow block number 5 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +magic | 105121344 +version | 4 +ntuples | 1 +bsize | 8156 +bmsize | 4096 +bmshift | 15 +maxbucket | 3 +highmask | 7 +lowmask | 3 +ovflpoint | 2 +firstfree | 0 +nmaps | 1 +procid | 450 +spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} + +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 1)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 2)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 3)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 4)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash meta page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 0 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 1 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---+----------- +live_items | 1 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 2 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 3 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---------- +itemoffset | 1 +ctid | (0,1) +data | 2389907270 + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +DROP TABLE test_hash; diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out index f5c4b61bd7..00323d392d 100644 --- a/contrib/pageinspect/expected/oldextversions.out +++ b/contrib/pageinspect/expected/oldextversions.out @@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1); -- pagesize in pageinspect >= 1.10. ALTER EXTENSION pageinspect UPDATE TO '1.9'; \df page_header - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ - public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ + public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func (1 row) SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) DROP TABLE test1; diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index 80ddb45a60..631b6c7be4 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -49,7 +49,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; @@ -70,19 +70,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- - 0: 254 + - 1: 254 + - 3: 254 + - 7: 254 + - 15: 254 + - 31: 254 + - 63: 254 + - 127: 254 + - 255: 254 + - 511: 254 + - 1023: 254 + - 2047: 254 + - 4095: 254 + + 0: 253 + + 1: 253 + + 3: 253 + + 7: 253 + + 15: 253 + + 31: 253 + + 63: 253 + + 127: 253 + + 255: 253 + + 511: 253 + + 1023: 253 + + 2047: 253 + + 4095: 253 + fp_next_slot: 0 + (1 row) diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 0f0252558c..5687f469a8 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -163,7 +163,7 @@ heap_page_items(PG_FUNCTION_ARGS) inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; - inter_call_data->page = VARDATA(raw_page); + inter_call_data->page = get_page_from_raw(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; @@ -211,6 +211,7 @@ heap_page_items(PG_FUNCTION_ARGS) lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { + HeapTupleData tup; HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; @@ -218,9 +219,11 @@ heap_page_items(PG_FUNCTION_ARGS) /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); + tup.t_data = tuphdr; + HeapTupleCopyXidsFromPage(InvalidBuffer, &tup, page, false); - values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); - values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); + values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup)); + values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); diff --git a/contrib/pageinspect/pageinspect--1.12--1.13.sql b/contrib/pageinspect/pageinspect--1.12--1.13.sql new file mode 100644 index 0000000000..a2e0232a10 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.12--1.13.sql @@ -0,0 +1,145 @@ +/* contrib/pageinspect/pageinspect--1.12--1.13.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.13'" to load this file. \quit + +-- +-- gist_page_opaque_info() +-- +DROP FUNCTION gist_page_opaque_info(bytea); +CREATE FUNCTION gist_page_opaque_info(IN page bytea, + OUT lsn pg_lsn, + OUT nsn pg_lsn, + OUT rightlink bigint, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'gist_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + + +-- +-- gist_page_items_bytea() +-- +DROP FUNCTION gist_page_items_bytea(bytea); +CREATE FUNCTION gist_page_items_bytea(IN page bytea, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT key_data bytea) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items_bytea' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- gist_page_items() +-- +DROP FUNCTION gist_page_items(bytea, regclass); +CREATE FUNCTION gist_page_items(IN page bytea, + IN index_oid regclass, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT keys text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_raw_page() +-- +DROP FUNCTION get_raw_page(text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, int4); +CREATE FUNCTION get_raw_page(text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +DROP FUNCTION get_raw_page(text, text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, text, int4); +CREATE FUNCTION get_raw_page(text, text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_checksum() +-- +DROP FUNCTION page_checksum(IN page bytea, IN blkno int8); +DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4); +CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8) +RETURNS smallint +AS 'MODULE_PATHNAME', 'page_checksum_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_metap() +-- +DROP FUNCTION bt_metap(text); +CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int8, + OUT level int8, + OUT fastroot int8, + OUT fastlevel int8, + OUT last_cleanup_num_delpages int8, + OUT last_cleanup_num_tuples float8, + OUT allequalimage boolean) +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_stats() +-- +DROP FUNCTION bt_page_stats(text, int8); +DROP FUNCTION IF EXISTS bt_page_stats(text, int4); +CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo_level int8, + OUT btpo_flags int4) +AS 'MODULE_PATHNAME', 'bt_page_stats_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_items() +-- +DROP FUNCTION bt_page_items(text, int8); +DROP FUNCTION IF EXISTS bt_page_items(text, int4); +CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text, + OUT dead boolean, + OUT htid tid, + OUT tids tid[]) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_page_items_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_page_items() +-- +DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass); +CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, + OUT itemoffset int, + OUT blknum int8, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'brin_page_items' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql index 1e40c3c97e..fdbd2995a2 100644 --- a/contrib/pageinspect/pageinspect--1.5.sql +++ b/contrib/pageinspect/pageinspect--1.5.sql @@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea, OUT special smallint, OUT pagesize smallint, OUT version smallint, + OUT xid_base xid, + OUT multi_base xid, OUT prune_xid xid) AS 'MODULE_PATHNAME', 'page_header' LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index b25a63cbd6..5e6b8c6cfa 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/relation.h" +#include "commands/sequence.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -251,8 +252,9 @@ page_header(PG_FUNCTION_ARGS) Datum result; HeapTuple tuple; - Datum values[9]; - bool nulls[9]; + Datum values[11]; + bool nulls[11]; + bool is_toast; Page page; PageHeader pageheader; @@ -314,12 +316,37 @@ page_header(PG_FUNCTION_ARGS) } values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); - values[8] = TransactionIdGetDatum(pageheader->pd_prune_xid); + is_toast = PageGetSpecialSize(page) == + MAXALIGN(sizeof(ToastPageSpecialData)); + values[8] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page, + is_toast)); /* Build and return the tuple. */ - memset(nulls, 0, sizeof(nulls)); + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) + { + /* Heap page */ + HeapPageSpecial pageSpecial = HeapPageGetSpecial((Page) page); + + values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + values[10] = TransactionIdGetDatum(pageSpecial->pd_multi_base); + } + else if (PageGetSpecialSize(page) == MAXALIGN(sizeof(ToastPageSpecialData))) + { + /* TOAST page */ + ToastPageSpecial pageSpecial = ToastPageGetSpecial((Page) page); + + values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + nulls[10] = true; + } + else + { + /* Double xmax page */ + nulls[9] = true; + nulls[10] = true; + } + tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql index 102ebdefe3..87f202fb9f 100644 --- a/contrib/pageinspect/sql/btree.sql +++ b/contrib/pageinspect/sql/btree.sql @@ -51,7 +51,8 @@ SELECT bt_page_items(get_raw_page('test1_b_gist', 0)); SELECT bt_page_items('aaa'::bytea); -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); \set VERBOSITY default diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c index 4308d1933b..bb57e5dd9c 100644 --- a/contrib/pg_surgery/heap_surgery.c +++ b/contrib/pg_surgery/heap_surgery.c @@ -15,6 +15,7 @@ #include "access/heapam.h" #include "access/visibilitymap.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/pg_am_d.h" #include "catalog/pg_proc_d.h" #include "miscadmin.h" @@ -272,11 +273,20 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) else { HeapTupleHeader htup; + HeapTupleData tuple; + bool is_toast; Assert(heap_force_opt == HEAP_FORCE_FREEZE); + is_toast = IsToastRelation(rel); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = htup; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast); + /* * Reset all visibility-related fields of the tuple. This * logic should mimic heap_execute_freeze_tuple(), but we @@ -284,8 +294,11 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) * potentially-garbled data is left behind. */ ItemPointerSet(&htup->t_ctid, blkno, curoff); - HeapTupleHeaderSetXmin(htup, FrozenTransactionId); - HeapTupleHeaderSetXmax(htup, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tuple, FrozenTransactionId, + is_toast); + HeapTupleAndHeaderSetXmax(page, &tuple, InvalidTransactionId, + is_toast); + if (htup->t_infomask & HEAP_MOVED) { if (htup->t_infomask & HEAP_MOVED_OFF) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 2a4acfd1ee..f6d574a5c1 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -14,6 +14,7 @@ #include "access/htup_details.h" #include "access/visibilitymap.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/pg_type.h" #include "catalog/storage_xlog.h" #include "funcapi.h" @@ -650,6 +651,8 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = relid; + HeapTupleCopyXidsFromPage(buffer, &tuple, page, + IsToastRelation(rel)); /* * If we're checking whether the page is all-visible, we expect @@ -693,7 +696,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) */ if (check_frozen) { - if (heap_tuple_needs_eventual_freeze(tuple.t_data)) + if (heap_tuple_needs_eventual_freeze(&tuple)) record_corrupt_item(items, &tuple.t_self); } } @@ -756,7 +759,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) * be set here. So just check the xmin. */ - xmin = HeapTupleHeaderGetXmin(tup->t_data); + xmin = HeapTupleGetXmin(tup); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index dea76d8dcb..5616ea64f5 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -130,7 +130,7 @@ pgrowlocks(PG_FUNCTION_ARGS) htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), hscan->rs_cbuf); - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; /* diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index f601dc6121..40a45727c1 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -19,6 +19,7 @@ #include "access/transam.h" #include "access/visibilitymap.h" #include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_am_d.h" #include "commands/vacuum.h" @@ -153,6 +154,7 @@ statapprox_heap(Relation rel, output_type *stat) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, IsToastRelation(rel)); /* * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 8e5a4d6a66..55ef89d8e2 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -631,7 +631,7 @@ pgstathashindex(PG_FUNCTION_ARGS) metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; - stats.space_per_page = metap->hashm_bsize; + stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData)); _hash_relbuf(rel, metabuf); /* Get the current relation length */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index c988745b92..152f3295c8 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -4915,16 +4915,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------- - Update on public.ft2 - Output: c1, c2, c3, c4, c5, c6, c7, c8 - -> Foreign Update on public.ft2 - Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 -(4 rows) +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 + Sort Key: t.c1 + CTE t + -> Update on public.ft2 + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8 + -> Foreign Update on public.ft2 + Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 + -> CTE Scan on t + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 +(10 rows) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+--------------------+------------------------------+--------------------------+----+------------+----- 7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo @@ -5044,16 +5052,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down - QUERY PLAN --------------------------------------------------------------------------------------------- - Delete on public.ft2 - Output: c1, c4 - -> Foreign Delete on public.ft2 - Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 -(4 rows) + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c4 + Sort Key: t.c1 + CTE t + -> Delete on public.ft2 + Output: ft2.c1, ft2.c4 + -> Foreign Delete on public.ft2 + Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 + -> CTE Scan on t + Output: t.c1, t.c4 +(10 rows) -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; c1 | c4 ------+------------------------------ 5 | Tue Jan 06 00:00:00 1970 PST @@ -6314,7 +6330,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; 1218 | 818 | ggg_trig_update | | | (--; | ft2 | (1 row) -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+------------------------+------------------------------+--------------------------+----+------------+----- 8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index e9144beb62..0ffb8b61be 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4818,8 +4818,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, * Note: no need to care about tableoid here because it will be * initialized in ExecProcessReturning(). */ - HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId); + HeapTupleSetXmin(resultTup, InvalidTransactionId); + HeapTupleSetXmax(resultTup, InvalidTransactionId); HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId); } @@ -7640,6 +7640,7 @@ make_tuple_from_result_row(PGresult *res, */ if (ctid) tuple->t_self = tuple->t_data->t_ctid = *ctid; + HeapTupleSetZeroXids(tuple); /* * Stomp on the xmin, xmax, and cmin fields from the tuple created by @@ -7649,8 +7650,8 @@ make_tuple_from_result_row(PGresult *res, * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(tuple, InvalidTransactionId); + HeapTupleSetXmin(tuple, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* Clean up */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index cb40540702..4b1c43637c 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1442,16 +1442,20 @@ EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; @@ -1558,7 +1562,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *; INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; -- Test errors thrown on remote side during update ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index c52d40dce0..7413304ddd 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -737,10 +737,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) result = PointerGetDatum(&(tup->t_self)); break; case MinTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup)); break; case MaxTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: @@ -785,6 +785,7 @@ heap_copytuple(HeapTuple tuple) newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); return newTuple; @@ -811,6 +812,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; + HeapTupleCopyXids(dest, src); dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len); } @@ -1174,6 +1176,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tuple); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); @@ -1258,6 +1261,7 @@ heap_modify_tuple(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); return newTuple; } @@ -1321,6 +1325,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); return newTuple; } @@ -1561,6 +1566,7 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup) result->t_len = len; ItemPointerSetInvalid(&(result->t_self)); result->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(result); result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2)); diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index c852b1fb74..0498115c92 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1922,17 +1922,17 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)}, {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)}, - {"autovacuum_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)}, - {"autovacuum_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)}, - {"autovacuum_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)}, - {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)}, - {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, - {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 24bab58499..3fd0c4609d 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -317,11 +317,10 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) * INTERNAL and allowing any such function seems too scary. */ if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) && - (argtype == DATEOID || - argtype == XIDOID || argtype == CIDOID)) + (argtype == DATEOID || argtype == CIDOID)) /* okay, allowed use of hashint4() */ ; else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) && - (argtype == XID8OID)) + (argtype == XID8OID || argtype == XIDOID)) /* okay, allowed use of hashint8() */ ; else if ((funcid == F_TIMESTAMP_HASH || funcid == F_TIMESTAMP_HASH_EXTENDED) && diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index b6ee64d856..a33cbc207b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -52,11 +52,14 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/namespace.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" #include "port/pg_bitutils.h" +#include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" @@ -74,7 +77,7 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, - TransactionId xid, CommandId cid, int options); + CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, @@ -114,6 +117,8 @@ static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy); +static bool heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi); /* @@ -462,6 +467,8 @@ heapgetpage(TableScanDesc sscan, BlockNumber block) loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp); loctup.t_len = ItemIdGetLength(lpp); + HeapTupleCopyXidsFromPage(buffer, &loctup, page, + IsToastRelation(scan->rs_base.rs_rd)); ItemPointerSet(&(loctup.t_self), block, lineoff); if (all_visible) @@ -473,7 +480,16 @@ heapgetpage(TableScanDesc sscan, BlockNumber block) &loctup, buffer, snapshot); if (valid) - scan->rs_vistuples[ntup++] = lineoff; + { + scan->rs_vistuples[ntup] = lineoff; + /* + * Since there is no lock futher and xmin or xmax may be + * changed while base shift, copy them here. + */ + scan->rs_xmin[ntup] = loctup.t_xmin; + scan->rs_xmax[ntup] = loctup.t_xmax; + ++ntup; + } } LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -777,6 +793,8 @@ continue_page: tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyXidsFromPage(scan->rs_cbuf, tuple, page, + IsToastRelation(scan->rs_base.rs_rd)); ItemPointerSet(&(tuple->t_self), block, lineoff); visible = HeapTupleSatisfiesVisibility(tuple, @@ -867,6 +885,9 @@ heapgettup_pagemode(HeapScanDesc scan, linesleft = scan->rs_cindex; /* lineindex now references the next or previous visible tid */ + tuple->t_xmin = scan->rs_xmin[scan->rs_cindex]; + tuple->t_xmax = scan->rs_xmax[scan->rs_cindex]; + goto continue_page; } @@ -895,6 +916,8 @@ continue_page: tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp); tuple->t_len = ItemIdGetLength(lpp); + tuple->t_xmin = scan->rs_xmin[lineindex]; + tuple->t_xmax = scan->rs_xmax[lineindex]; ItemPointerSet(&(tuple->t_self), block, lineoff); /* skip any tuples that don't match the scan key */ @@ -1403,6 +1426,7 @@ heap_fetch(Relation relation, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, tuple, page, IsToastRelation(relation)); /* * check tuple visibility, then release lock @@ -1411,7 +1435,7 @@ heap_fetch(Relation relation, if (valid) PredicateLockTID(relation, &(tuple->t_self), snapshot, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleGetXmin(tuple)); HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot); @@ -1488,6 +1512,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Assert(TransactionIdIsValid(RecentXmin)); Assert(BufferGetBlockNumber(buffer) == blkno); + heapTuple->t_self = *tid; + /* Scan through possible multiple members of HOT-chain */ for (;;) { @@ -1523,6 +1549,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); heapTuple->t_len = ItemIdGetLength(lp); heapTuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, heapTuple, page, + IsToastRelation(relation)); ItemPointerSet(&heapTuple->t_self, blkno, offnum); /* @@ -1537,7 +1565,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) + HeapTupleGetXmin(heapTuple))) break; /* @@ -1558,7 +1586,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, { ItemPointerSetOffsetNumber(tid, offnum); PredicateLockTID(relation, &heapTuple->t_self, snapshot, - HeapTupleHeaderGetXmin(heapTuple->t_data)); + HeapTupleGetXmin(heapTuple)); if (all_dead) *all_dead = false; return true; @@ -1593,7 +1621,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, blkno); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; - prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + prev_xmax = HeapTupleGetUpdateXidAny(heapTuple); } else break; /* end of chain */ @@ -1679,13 +1707,14 @@ heap_get_latest_tid(TableScanDesc sscan, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp))) { UnlockReleaseBuffer(buffer); break; @@ -1704,7 +1733,7 @@ heap_get_latest_tid(TableScanDesc sscan, * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(tp.t_data) || + HeapTupleIsOnlyLocked(&tp) || HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { @@ -1713,7 +1742,7 @@ heap_get_latest_tid(TableScanDesc sscan, } ctid = tp.t_data->t_ctid; - priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&tp); UnlockReleaseBuffer(buffer); } /* end of loop */ } @@ -1738,7 +1767,7 @@ heap_get_latest_tid(TableScanDesc sscan, static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { - Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid)); + Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) @@ -1805,6 +1834,31 @@ ReleaseBulkInsertStatePin(BulkInsertState bistate) bistate->last_free = InvalidBlockNumber; } +/* + * Add xid_base and multi base to the WAL record. + * + * WAL record must being constructed. + */ +static inline void +xlog_register_base(Page page, bool is_toast, TransactionId *xid_base, + TransactionId *multi_base) +{ + if (is_toast) + { + *xid_base = ToastPageGetSpecial(page)->pd_xid_base; + *multi_base = InvalidTransactionId; + } + else + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + *xid_base = special->pd_xid_base; + *multi_base = special->pd_multi_base; + } + + XLogRegisterData((char *) xid_base, sizeof(*xid_base)); + XLogRegisterData((char *) multi_base, sizeof(*multi_base)); +} /* * heap_insert - insert tuple into a heap @@ -1844,7 +1898,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ - heaptup = heap_prepare_insert(relation, tup, xid, cid, options); + heaptup = heap_prepare_insert(relation, tup, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, @@ -1872,6 +1926,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber); + heap_page_prepare_for_xid(relation, buffer, xid, false); + HeapTupleSetXmin(heaptup, xid); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -1909,6 +1966,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; int bufflags = 0; + TransactionId xid_base, + multi_base; /* * If this is a catalog, we need to transmit combo CIDs to properly @@ -1947,12 +2006,17 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, { xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE; bufflags |= REGBUF_KEEP_DATA; - - if (IsToastRelation(relation)) - xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION; } + if (IsToastRelation(relation)) + xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION; + XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + xlog_register_base(page, IsToastRelation(relation), &xid_base, + &multi_base); + XLogRegisterData((char *) &xlrec, SizeOfHeapInsert); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; @@ -2014,7 +2078,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * that in any case, the header fields are also set in the original tuple. */ static HeapTuple -heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, +heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options) { /* @@ -2031,12 +2095,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - HeapTupleHeaderSetXmin(tup->t_data, xid); + HeapTupleSetXmin(tup, InvalidTransactionId); if (options & HEAP_INSERT_FROZEN) - HeapTupleHeaderSetXminFrozen(tup->t_data); + HeapTupleHeaderStoreXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); - HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ + HeapTupleSetXmax(tup, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* @@ -2128,8 +2192,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL); slots[i]->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slots[i]->tts_tableOid; - heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid, - options); + heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options); } /* @@ -2204,6 +2267,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) all_frozen_set = true; + heap_page_prepare_for_xid(relation, buffer, xid, false); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2211,6 +2276,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ + HeapTupleSetXmin(heaptuples[ndone], xid); RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); /* @@ -2227,6 +2293,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; + HeapTupleSetXmin(heaptup, xid); RelationPutHeapTuple(relation, buffer, heaptup, false); /* @@ -2272,6 +2339,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, char *scratchptr = scratch.data; bool init; int bufflags = 0; + TransactionId xid_base, + multi_base; /* * If the page was previously empty, we can reinit the page @@ -2362,6 +2431,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + xlog_register_base(page, IsToastRelation(relation), &xid_base, + &multi_base); + XLogRegisterData((char *) xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); @@ -2569,6 +2643,7 @@ heap_delete(Relation relation, ItemPointer tid, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); l1: @@ -2600,7 +2675,7 @@ l1: uint16 infomask; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tp.t_data); + xwait = HeapTupleGetRawXmax(&tp); infomask = tp.t_data->t_infomask; /* @@ -2639,6 +2714,10 @@ l1: NULL); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyXidsFromPage(buffer, &tp, page, + IsToastRelation(relation)); + /* * If xwait had just locked the tuple then some other xact * could update this tuple before we get to this point. Check @@ -2649,7 +2728,7 @@ l1: */ if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; } @@ -2676,6 +2755,10 @@ l1: XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyXidsFromPage(buffer, &tp, page, + IsToastRelation(relation)); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -2686,7 +2769,7 @@ l1: */ if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; @@ -2700,7 +2783,7 @@ l1: */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tp.t_data)) + HeapTupleIsOnlyLocked(&tp)) result = TM_Ok; else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) result = TM_Updated; @@ -2730,9 +2813,9 @@ l1: if (result != TM_Ok) { tmfd->ctid = tp.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&tp); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); + tmfd->cmax = HeapTupleGetCmax(&tp); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -2755,7 +2838,7 @@ l1: CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer)); /* replace cid with a combo CID if necessary */ - HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo); + HeapTupleAdjustCmax(&tp, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so @@ -2773,11 +2856,20 @@ l1: */ MultiXactIdSetOldestMember(); - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); +#ifdef USE_ASSERT_CHECKING + if (IsToastRelation(relation)) + Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0); +#endif + + heap_page_prepare_for_xid(relation, buffer, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); + START_CRIT_SECTION(); /* @@ -2787,7 +2879,7 @@ l1: * the subsequent page pruning will be a no-op and the hint will be * cleared. */ - PageSetPrunable(page, xid); + PageSetPrunable(page, xid, IsToastRelation(relation)); if (PageIsAllVisible(page)) { @@ -2803,7 +2895,7 @@ l1: tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); - HeapTupleHeaderSetXmax(tp.t_data, new_xmax); + HeapTupleAndHeaderSetXmax(page, &tp, new_xmax, IsToastRelation(relation)); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; @@ -2842,6 +2934,8 @@ l1: tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); xlrec.xmax = new_xmax; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION; if (old_key_tuple != NULL) { @@ -2999,7 +3093,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; - Page page; + Page page, + newpage; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, @@ -3026,6 +3121,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, infomask_new_tuple, infomask2_new_tuple; + Assert(!IsToastRelation(relation)); Assert(ItemPointerIsValid(otid)); /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ @@ -3097,6 +3193,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* the new tuple is ready, except for this: */ newtup->t_tableOid = RelationGetRelid(relation); @@ -3190,7 +3287,7 @@ l2: */ /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; /* @@ -3241,6 +3338,7 @@ l2: checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* * If xwait had just locked the tuple then some other xact @@ -3249,7 +3347,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup), xwait)) goto l2; } @@ -3275,7 +3373,7 @@ l2: * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) - update_xact = HeapTupleGetUpdateXid(oldtup.t_data); + update_xact = HeapTupleGetUpdateXid(&oldtup); else update_xact = InvalidTransactionId; @@ -3322,7 +3420,7 @@ l2: XLTW_Update); checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -3330,7 +3428,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, - HeapTupleHeaderGetRawXmax(oldtup.t_data))) + HeapTupleGetRawXmax(&oldtup))) goto l2; /* Otherwise check if it committed or aborted */ @@ -3369,9 +3467,9 @@ l2: if (result != TM_Ok) { tmfd->ctid = oldtup.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); + tmfd->cmax = HeapTupleGetCmax(&oldtup); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -3404,6 +3502,7 @@ l2: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); goto l2; } @@ -3413,7 +3512,7 @@ l2: * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, @@ -3432,7 +3531,7 @@ l2: (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else - xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xmax_new_tuple = HeapTupleGetRawXmax(&oldtup); if (!TransactionIdIsValid(xmax_new_tuple)) { @@ -3465,17 +3564,15 @@ l2: */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); - HeapTupleHeaderSetXmin(newtup->t_data, xid); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; - HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple); /* * Replace cid with a combo CID if necessary. Note that we already put * the plain cid into the new tuple. */ - HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); + HeapTupleAdjustCmax(&oldtup, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit @@ -3505,7 +3602,7 @@ l2: newtupsize = MAXALIGN(newtup->t_len); - if (need_toast || newtupsize > pagefree) + if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { TransactionId xmax_lock_old_tuple; uint16 infomask_lock_old_tuple, @@ -3530,7 +3627,7 @@ l2: * updating, because the potentially created multixact would otherwise * be wrong. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, @@ -3539,6 +3636,10 @@ l2: Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple)); + heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple, + (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); + START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ @@ -3547,9 +3648,9 @@ l2: HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_lock_old_tuple, false); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ @@ -3632,7 +3733,11 @@ l2: */ for (;;) { - if (newtupsize > pagefree) + /* + * We can't fit new tuple to "double xmax" page, since it's + * impossible to set xmin there. + */ + if (newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { /* It doesn't fit, must use RelationGetBufferForTuple. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, @@ -3666,6 +3771,9 @@ l2: break; } } + + /* Copy possibly updated xid base to old tuple after relocking */ + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); } else { @@ -3737,6 +3845,33 @@ l2: id_has_external, &old_key_copied); + newpage = BufferGetPage(newbuf); + + /* + * Prepare pages for the current xid, that witten to the new tuple's Xmax + * and old page's pd_prune_xid. + */ + heap_page_prepare_for_xid(relation, buffer, xid, false); + if (newbuf != buffer) + heap_page_prepare_for_xid(relation, newbuf, xid, false); + + /* Prepare pages for tuple's Xmax */ + heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple, + (infomask_old_tuple & HEAP_XMAX_IS_MULTI) != 0); + heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple, + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0); + + /* Copy possibly updated Xid bases to the both tuples. */ + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); + + /* + * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already shifted. + */ + HeapTupleAndHeaderSetXmin(newpage, heaptup, xid, + IsToastRelation(relation)); + HeapTupleAndHeaderSetXmax(newpage, heaptup, xmax_new_tuple, + IsToastRelation(relation)); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -3752,7 +3887,7 @@ l2: * not to optimize for aborts. Note that heap_xlog_update must be kept in * sync if this decision changes. */ - PageSetPrunable(page, xid); + PageSetPrunable(page, xid, false); if (use_hot_update) { @@ -3779,10 +3914,11 @@ l2: oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_old_tuple, false); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = heaptup->t_self; @@ -3836,6 +3972,18 @@ l2: END_CRIT_SECTION(); + if (newtup != heaptup) + { + /* + * Set new tuple's Xmin/Xmax only after both xid base preparations. + * Old tuple's Xmin/Xmax were already shifted because old tuple is on + * the page. + */ + Assert(!IsToastRelation(relation)); + HeapTupleAndHeaderSetXmin(newpage, heaptup, xid, false); + HeapTupleAndHeaderSetXmax(newpage, newtup, xmax_new_tuple, false); + } + if (newbuf != buffer) LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -4183,6 +4331,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); l3: result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); @@ -4209,7 +4358,7 @@ l3: ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); + xwait = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); @@ -4367,11 +4516,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * Make sure it's still an appropriate lock, else start over. @@ -4380,7 +4531,7 @@ l3: * now need to follow the update chain to lock the new * versions. */ - if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) && + if (!HeapTupleIsOnlyLocked(tuple) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; @@ -4407,6 +4558,7 @@ l3: !HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * Make sure it's still an appropriate lock, else start over. @@ -4435,8 +4587,10 @@ l3: * meantime, start over. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4447,10 +4601,11 @@ l3: else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; /* otherwise, we're good */ @@ -4475,8 +4630,10 @@ l3: { /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); @@ -4497,6 +4654,7 @@ l3: if (require_sleep && (result == TM_Updated || result == TM_Deleted)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } else if (require_sleep) @@ -4522,6 +4680,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } @@ -4548,6 +4707,8 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, + false); goto failed; } break; @@ -4588,6 +4749,8 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, + false); goto failed; } break; @@ -4614,11 +4777,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * xwait is done, but if xwait had just locked the tuple then some @@ -4626,7 +4791,7 @@ l3: * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4654,7 +4819,7 @@ l3: if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple->t_data)) + HeapTupleIsOnlyLocked(tuple)) result = TM_Ok; else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) result = TM_Updated; @@ -4680,9 +4845,9 @@ failed: Assert(result != TM_Updated || !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)); tmfd->ctid = tuple->t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(tuple); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); + tmfd->cmax = HeapTupleGetCmax(tuple); else tmfd->cmax = InvalidCommandId; goto out_locked; @@ -4702,10 +4867,11 @@ failed: LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto l3; } - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); old_infomask = tuple->t_data->t_infomask; /* @@ -4727,6 +4893,10 @@ failed: GetCurrentTransactionId(), mode, false, &xid, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, *buffer, xid, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + START_CRIT_SECTION(); /* @@ -4745,7 +4915,8 @@ failed: tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); - HeapTupleHeaderSetXmax(tuple->t_data, xid); + Assert(!IsToastRelation(relation)); + HeapTupleAndHeaderSetXmax(page, tuple, xid, false); /* * Make sure there is no forward chain link in t_ctid. Note that in the @@ -5339,12 +5510,19 @@ l4: LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); } + /* + * Copy xid base after buffer relocking, it could have changed since + * heap_fetch(). + */ + HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf), + IsToastRelation(rel)); + /* * Check the tuple XMIN against prior XMAX, if any. If we reached the * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), + !TransactionIdEquals(HeapTupleGetXmin(&mytup), priorXmax)) { result = TM_Ok; @@ -5356,7 +5534,7 @@ l4: * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ - if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data))) + if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup))) { result = TM_Ok; goto out_locked; @@ -5364,7 +5542,7 @@ l4: old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; - xmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + xmax = HeapTupleGetRawXmax(&mytup); /* * If this tuple version has been updated or locked by some concurrent @@ -5377,7 +5555,7 @@ l4: TransactionId rawxmax; bool needwait; - rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + rawxmax = HeapTupleGetRawXmax(&mytup); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; @@ -5518,14 +5696,25 @@ l4: VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; +#ifdef USE_ASSERT_CHECKING + if (IsToastRelation(rel)) + Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0); +#endif + + heap_page_prepare_for_xid(rel, buf, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf), + IsToastRelation(rel)); + START_CRIT_SECTION(); /* ... and set them */ - HeapTupleHeaderSetXmax(mytup.t_data, new_xmax); mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; + Assert(!IsToastRelation(rel)); + HeapTupleAndHeaderSetXmax(BufferGetPage(buf), &mytup, new_xmax, false); MarkBufferDirty(buf); @@ -5559,14 +5748,14 @@ next: if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || - HeapTupleHeaderIsOnlyLocked(mytup.t_data)) + HeapTupleIsOnlyLocked(&mytup)) { result = TM_Ok; goto out_locked; } /* tail recursion */ - priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); } @@ -5769,12 +5958,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid) tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ - if (tp.t_data->t_choice.t_heap.t_xmin != xid) + if (HeapTupleGetRawXmin(&tp) != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); @@ -5803,7 +5993,9 @@ heap_abort_speculative(Relation relation, ItemPointer tid) prune_xid = relation->rd_rel->relfrozenxid; else prune_xid = TransactionXmin; - PageSetPrunable(page, prune_xid); + Assert(TransactionIdIsValid(prune_xid)); + heap_page_prepare_for_xid(relation, buffer, prune_xid, false); + PageSetPrunable(page, prune_xid, IsToastRelation(relation)); /* store transaction information of xact deleting the tuple */ tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); @@ -5812,9 +6004,12 @@ heap_abort_speculative(Relation relation, ItemPointer tid) /* * Set the tuple header xmin to InvalidTransactionId. This makes the * tuple immediately invisible everyone. (In particular, to any - * transactions waiting on the speculative token, woken up later.) + * transactions waiting on the speculative token, woken up later.) Don't + * need to reload xid base from page because InvalidTransactionId doesn't + * require xid base to be valid. */ - HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tp, InvalidTransactionId, + IsToastRelation(relation)); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; @@ -5833,6 +6028,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid) XLogRecPtr recptr; xlrec.flags = XLH_DELETE_IS_SUPER; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION; xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask, tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); @@ -6101,7 +6298,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * been pruned away instead, since updater XID is < OldestXmin). * Just remove xmax. */ - if (TransactionIdDidCommit(update_xact)) + if (!TransactionIdDidAbort(update_xact)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("multixact %llu contains non-aborted update XID %llu from before removable cutoff %llu", @@ -6199,7 +6396,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * even member XIDs >= OldestXmin often won't be kept by second pass. */ nnewmembers = 0; - newmembers = palloc(sizeof(MultiXactMember) * nmembers); + newmembers = palloc0(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; @@ -6385,7 +6582,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * then caller had better have an exclusive lock on it already. */ bool -heap_prepare_freeze_tuple(HeapTupleHeader tuple, +heap_prepare_freeze_tuple(HeapTuple htup, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen) @@ -6397,8 +6594,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, replace_xmax = false, freeze_xmax = false; TransactionId xid; + HeapTupleHeader tuple = htup->t_data; - frz->xmax = HeapTupleHeaderGetRawXmax(tuple); + frz->xmax = HeapTupleGetRawXmax(htup); frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; frz->frzflags = 0; @@ -6409,7 +6607,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * will become frozen iff our freeze plan is executed by caller (could be * neither). */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (!TransactionIdIsNormal(xid)) xmin_already_frozen = true; else @@ -6551,6 +6749,15 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, /* MultiXactId processing forces freezing (barring FRM_NOOP case) */ Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax)); } + else if ((tuple->t_infomask & HEAP_XMAX_INVALID) && + TransactionIdIsNormal(xid)) + { + /* + * To reset xmax without reading clog. + * This prevent excess growth of xmax. + */ + freeze_xmax = true; + } else if (TransactionIdIsNormal(xid)) { /* Raw xmax is normal XID */ @@ -6572,7 +6779,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED; } - else if (!TransactionIdIsValid(xid)) + else if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) { /* Raw xmax is InvalidTransactionId XID */ Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0); @@ -6642,7 +6849,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * Does this tuple force caller to freeze the entire page? */ pagefrz->freeze_required = - heap_tuple_should_freeze(tuple, cutoffs, + heap_tuple_should_freeze(htup, cutoffs, &pagefrz->NoFreezePageRelfrozenXid, &pagefrz->NoFreezePageRelminMxid); } @@ -6661,18 +6868,32 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * in private storage (which is what CLUSTER and friends do). */ static inline void -heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz) +heap_execute_freeze_tuple(HeapTuple htup, HeapTupleFreeze *frz) { - HeapTupleHeaderSetXmax(tuple, frz->xmax); + HeapTupleHeader tuple = htup->t_data; + + tuple->t_infomask = frz->t_infomask; + tuple->t_infomask2 = frz->t_infomask2; + + HeapTupleSetXmax(htup, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); if (frz->frzflags & XLH_INVALID_XVAC) HeapTupleHeaderSetXvac(tuple, InvalidTransactionId); +} - tuple->t_infomask = frz->t_infomask; - tuple->t_infomask2 = frz->t_infomask2; +static inline void +heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup, + HeapTupleFreeze *frz, bool is_toast) +{ + HeapTupleData tuple; + + tuple.t_data = htup; + heap_execute_freeze_tuple(&tuple, frz); + + HeapTupleHeaderStoreXmax(page, &tuple, is_toast); } /* @@ -6709,34 +6930,31 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, { HeapTupleFreeze *frz = tuples + i; ItemId itemid = PageGetItemId(page, frz->offset); - HeapTupleHeader htup; + HeapTupleData tuple; - htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buffer, &tuple, page, IsToastRelation(rel)); /* Deliberately avoid relying on tuple hint bits here */ if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED) { - TransactionId xmin = HeapTupleHeaderGetRawXmin(htup); + TransactionId xmin = HeapTupleGetXmin(&tuple); - Assert(!HeapTupleHeaderXminFrozen(htup)); + Assert(!HeapTupleHeaderXminFrozen(tuple.t_data)); if (unlikely(!TransactionIdDidCommit(xmin))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("uncommitted xmin %llu needs to be frozen", (unsigned long long) xmin))); } - - /* - * TransactionIdDidAbort won't work reliably in the presence of XIDs - * left behind by transactions that were in progress during a crash, - * so we can only check that xmax didn't commit - */ if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED) { - TransactionId xmax = HeapTupleHeaderGetRawXmax(htup); + TransactionId xmax = HeapTupleGetRawXmax(&tuple); Assert(TransactionIdIsNormal(xmax)); - if (unlikely(TransactionIdDidCommit(xmax))) + if (unlikely(!TransactionIdDidAbort(xmax))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("cannot freeze non-aborted xmax %llu", @@ -6753,7 +6971,8 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, HeapTupleHeader htup; htup = (HeapTupleHeader) PageGetItem(page, itemid); - heap_execute_freeze_tuple(htup, frz); + heap_execute_freeze_tuple_page(page, htup, frz, + IsToastRelation(rel)); } MarkBufferDirty(buffer); @@ -6764,7 +6983,7 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, xl_heap_freeze_plan plans[MaxHeapTuplesPerPage]; OffsetNumber offsets[MaxHeapTuplesPerPage]; int nplans; - xl_heap_freeze_page xlrec; + xl_heap_freeze_page xlrec = {0}; XLogRecPtr recptr; /* Prepare deduplicated representation for use in WAL record */ @@ -6773,6 +6992,8 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, xlrec.snapshotConflictHorizon = snapshotConflictHorizon; xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel); xlrec.nplans = nplans; + if (IsToastRelation(rel)) + xlrec.flags = XLH_FREEZE_PAGE_ON_TOAST_RELATION; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage); @@ -6941,7 +7162,7 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, * Useful for callers like CLUSTER that perform their own WAL logging. */ bool -heap_freeze_tuple(HeapTupleHeader tuple, +heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff) { @@ -7118,10 +7339,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask) * checking the hint bits. */ TransactionId -HeapTupleGetUpdateXid(HeapTupleHeader tuple) +HeapTupleGetUpdateXid(HeapTuple tuple) { - return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple), - tuple->t_infomask); + return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple), + tuple->t_data->t_infomask); } /* @@ -7347,15 +7568,18 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, * will eventually require freezing (if tuple isn't removed by pruning first). */ bool -heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) +heap_tuple_needs_eventual_freeze(HeapTuple htup) { TransactionId xid; + HeapTupleHeader tuple; + + tuple = htup->t_data; /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) return true; @@ -7366,13 +7590,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { MultiXactId multi; - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); if (MultiXactIdIsValid(multi)) return true; } else { - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) return true; } @@ -7402,17 +7626,18 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) * point that it fully commits to not freezing the tuple/page in question. */ bool -heap_tuple_should_freeze(HeapTupleHeader tuple, +heap_tuple_should_freeze(HeapTuple htup, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid) { TransactionId xid; MultiXactId multi; + HeapTupleHeader tuple = htup->t_data; bool freeze = false; /* First deal with xmin */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) { Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); @@ -7426,9 +7651,9 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, xid = InvalidTransactionId; multi = InvalidMultiXactId; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); else - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) { @@ -7439,6 +7664,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) freeze = true; } + else if ((tuple->t_infomask & HEAP_XMAX_INVALID) && + TransactionIdIsNormal(xid)) + { + /* + * To reset xmax without reading clog. + */ + freeze = true; + } else if (!MultiXactIdIsValid(multi)) { /* xmax is a permanent XID or invalid MultiXactId/XID */ @@ -7510,14 +7743,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, * caller's WAL record) by REDO routine when it replays caller's operation. */ void -HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, +HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple, TransactionId *snapshotConflictHorizon) { - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple); - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + TransactionId xmin = HeapTupleGetXmin(tuple); + TransactionId xmax = HeapTupleGetUpdateXidAny(tuple); + TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data); - if (tuple->t_infomask & HEAP_MOVED) + if (tuple->t_data->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*snapshotConflictHorizon, xvac)) *snapshotConflictHorizon = xvac; @@ -7529,8 +7762,8 @@ HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, * * Look for a committed hint bit, or if no xmin bit is set, check clog. */ - if (HeapTupleHeaderXminCommitted(tuple) || - (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin))) + if (HeapTupleHeaderXminCommitted(tuple->t_data) || + (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *snapshotConflictHorizon)) @@ -7878,7 +8111,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) for (;;) { ItemId lp; - HeapTupleHeader htup; + HeapTupleData htup; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -7915,16 +8148,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) if (!ItemIdIsNormal(lp)) break; - htup = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_len = ItemIdGetLength(lp); + HeapTupleCopyXidsFromPage(buf, &htup, page, IsToastRelation(rel)); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax)) break; - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&htup, &snapshotConflictHorizon); /* @@ -7933,13 +8168,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) * chain (they get their own index entries) -- just move on to * next htid from index AM caller. */ - if (!HeapTupleHeaderIsHotUpdated(htup)) + if (!HeapTupleHeaderIsHotUpdated(htup.t_data)) break; /* Advance to next HOT chain member */ - Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno); - offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno); + offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid); + priorXmax = HeapTupleGetUpdateXidAny(&htup); } /* Enable further/final shrinking of deltids for caller */ @@ -8382,6 +8617,8 @@ log_heap_update(Relation reln, Buffer oldbuf, bool all_visible_cleared, bool new_all_visible_cleared) { xl_heap_update xlrec; + TransactionId xid_base, + multi_base; xl_heap_header xlhdr; xl_heap_header xlhdr_idx; uint8 info; @@ -8490,13 +8727,13 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); - xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data); + xlrec.old_xmax = HeapTupleGetRawXmax(oldtup); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); - xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); + xlrec.new_xmax = HeapTupleGetRawXmax(newtup); bufflags = REGBUF_STANDARD; if (init) @@ -8508,6 +8745,17 @@ log_heap_update(Relation reln, Buffer oldbuf, if (oldbuf != newbuf) XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD); + if (info & XLOG_HEAP_INIT_PAGE) + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + Assert(!IsToastRelation(reln)); + xid_base = special->pd_xid_base; + multi_base = special->pd_multi_base; + XLogRegisterData((char *) &xid_base, sizeof(xid_base)); + XLogRegisterData((char *) &multi_base, sizeof(multi_base)); + } + XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate); /* @@ -8620,8 +8868,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup) { Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID)); Assert(!HeapTupleHeaderXminInvalid(hdr)); - xlrec.cmin = HeapTupleHeaderGetCmin(hdr); - xlrec.cmax = HeapTupleHeaderGetCmax(hdr); + xlrec.cmin = HeapTupleGetCmin(tup); + xlrec.cmax = HeapTupleGetCmax(tup); xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr); } /* No combo CID, so only cmin or cmax can be set by this TX */ @@ -8825,7 +9073,9 @@ heap_xlog_prune(XLogReaderState *record) heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, - nowunused, nunused); + nowunused, nunused, + (xlrec->flags & XLH_PRUNE_REPAIR_FRAGMENTATION) != 0, + (xlrec->flags & XLH_PRUNE_ON_TOAST_RELATION) != 0); /* * Note: we don't worry about updating the page's prunability hints. @@ -9121,7 +9371,8 @@ heap_xlog_freeze_page(XLogReaderState *record) lp = PageGetItemId(page, offset); tuple = (HeapTupleHeader) PageGetItem(page, lp); - heap_execute_freeze_tuple(tuple, &frz); + heap_execute_freeze_tuple_page(page, tuple, &frz, + (xlrec->flags & XLH_FREEZE_PAGE_ON_TOAST_RELATION) != 0); } } @@ -9193,6 +9444,8 @@ heap_xlog_delete(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); if (PageGetMaxOffsetNumber(page) >= xlrec->offnum) @@ -9208,14 +9461,19 @@ heap_xlog_delete(XLogReaderState *record) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); + tuple.t_data = htup; + if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); else - HeapTupleHeaderSetXmin(htup, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tuple, InvalidTransactionId, + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ - PageSetPrunable(page, XLogRecGetXid(record)); + PageSetPrunable(page, XLogRecGetXid(record), + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); @@ -9236,7 +9494,7 @@ static void heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record); + xl_heap_insert *xlrec; Buffer buffer; Page page; union @@ -9252,6 +9510,20 @@ heap_xlog_insert(XLogReaderState *record) BlockNumber blkno; ItemPointerData target_tid; XLogRedoAction action; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(record); + TransactionId xid_base = InvalidTransactionId; + TransactionId multi_base = InvalidTransactionId; + + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_insert *) rec_data; XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno); ItemPointerSetBlockNumber(&target_tid, blkno); @@ -9276,11 +9548,28 @@ heap_xlog_insert(XLogReaderState *record) * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ - if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + if (isinit) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + + if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) + { + PageInit(page, BufferGetPageSize(buffer), + sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + PageInit(page, BufferGetPageSize(buffer), + sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + action = BLK_NEEDS_REDO; } else @@ -9289,6 +9578,7 @@ heap_xlog_insert(XLogReaderState *record) { Size datalen; char *data; + HeapTupleData tuple; page = BufferGetPage(buffer); @@ -9312,7 +9602,9 @@ heap_xlog_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), + (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; @@ -9372,12 +9664,22 @@ heap_xlog_multi_insert(XLogReaderState *record) int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + TransactionId xid_base = InvalidTransactionId, + multi_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ - xlrec = (xl_heap_multi_insert *) XLogRecGetData(record); + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + xlrec = (xl_heap_multi_insert *) rec_data; XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno); @@ -9404,7 +9706,22 @@ heap_xlog_multi_insert(XLogReaderState *record) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + + if ((xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + action = BLK_NEEDS_REDO; } else @@ -9425,6 +9742,7 @@ heap_xlog_multi_insert(XLogReaderState *record) { OffsetNumber offnum; xl_multi_insert_tuple *xlhdr; + HeapTupleData tuple; /* * If we're reinitializing the page, the tuples are stored in @@ -9455,7 +9773,9 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), + false); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); @@ -9503,8 +9823,8 @@ static void heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); RelFileLocator rlocator; + xl_heap_update *xlrec; BlockNumber oldblk; BlockNumber newblk; ItemPointerData newtid; @@ -9528,6 +9848,20 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Size freespace = 0; XLogRedoAction oldaction; XLogRedoAction newaction; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(record); + TransactionId xid_base = InvalidTransactionId, + multi_base = InvalidTransactionId; + + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_update *) rec_data; /* initialize to keep the compiler quiet */ oldtup.t_data = NULL; @@ -9574,6 +9908,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) &obuffer); if (oldaction == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(obuffer); offnum = xlrec->old_offnum; if (PageGetMaxOffsetNumber(page) >= offnum) @@ -9586,6 +9922,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) oldtup.t_data = htup; oldtup.t_len = ItemIdGetLength(lp); + /* Toast tuples are never updated. */ + HeapTupleCopyXidsFromPage(obuffer, &oldtup, page, false); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; @@ -9595,13 +9933,15 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->old_xmax, false); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; /* Mark the page as a candidate for pruning */ - PageSetPrunable(page, XLogRecGetXid(record)); + /* Toast tuples are never updated. */ + PageSetPrunable(page, XLogRecGetXid(record), false); if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); @@ -9618,11 +9958,18 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) nbuffer = obuffer; newaction = oldaction; } - else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + else if (isinit) { + HeapPageSpecial special; + nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); - PageInit(page, BufferGetPageSize(nbuffer), 0); + + /* Toast tuples are never updated. */ + PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; newaction = BLK_NEEDS_REDO; } else @@ -9650,6 +9997,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) char *recdata_end; Size datalen; Size tuplen; + HeapTupleData tuple; recdata = XLogRecGetBlockData(record, 0, &datalen); recdata_end = recdata + datalen; @@ -9728,9 +10076,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), false); HeapTupleHeaderSetCmin(htup, FirstCommandId); - HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->new_xmax, false); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9841,6 +10190,8 @@ heap_xlog_lock(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = (Page) BufferGetPage(buffer); offnum = xlrec->offnum; @@ -9869,7 +10220,9 @@ heap_xlog_lock(XLogReaderState *record) BufferGetBlockNumber(buffer), offnum); } - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -9914,6 +10267,8 @@ heap_xlog_lock_updated(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); offnum = xlrec->offnum; @@ -9929,7 +10284,8 @@ heap_xlog_lock_updated(XLogReaderState *record) htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -10077,6 +10433,10 @@ heap_mask(char *pagedata, BlockNumber blkno) mask_page_lsn_and_checksum(page); mask_page_hint_bits(page); + + /* Ignore prune_xid (it's like a hint-bit) */ + HeapPageSetPruneXid(page, InvalidTransactionId, false); + mask_unused_space(page); for (off = 1; off <= PageGetMaxOffsetNumber(page); off++) @@ -10192,14 +10552,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, case HEAPTUPLE_LIVE: if (visible) return; - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_DELETE_IN_PROGRESS: if (visible) - xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); + xid = HeapTupleGetUpdateXidAny(tuple); else - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); if (TransactionIdPrecedes(xid, TransactionXmin)) { @@ -10209,7 +10569,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, } break; case HEAPTUPLE_INSERT_IN_PROGRESS: - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_DEAD: Assert(!visible); @@ -10247,3 +10607,567 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, CheckForSerializableConflictOut(relation, xid, snapshot); } + +static void +xid_min_max(ShortTransactionId *min, ShortTransactionId *max, + ShortTransactionId xid, + bool *found) +{ + Assert(TransactionIdIsNormal(xid)); + Assert(xid <= MaxShortTransactionId); + + if (!*found) + { + *min = *max = xid; + *found = true; + } + else + { + *min = Min(*min, xid); + *max = Max(*max, xid); + } +} + +/* + * Find minimum and maximum short transaction ids which occurs in the page. + * + * Works for multi and non multi transaction. Which is defined by "multi" + * argument. + */ +static bool +heap_page_xid_min_max(Page page, bool multi, + ShortTransactionId *min, ShortTransactionId *max, + bool is_toast) +{ + bool found; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + maxoff = PageGetMaxOffsetNumber(page); + found = false; + + Assert(!multi || !is_toast); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + if (!multi) + { + /* + * For non multi transactions we should see inside the tuple for + * update transaction. + */ + Assert(!is_toast || !(htup->t_infomask & HEAP_XMAX_IS_MULTI)); + + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + xid_min_max(min, max, htup->t_choice.t_heap.t_xmin, &found); + } + + if ((htup->t_infomask & HEAP_XMAX_IS_MULTI) && + (!(htup->t_infomask & HEAP_XMAX_LOCK_ONLY))) + { + TransactionId update_xid; + ShortTransactionId xid; + + Assert(!is_toast); + update_xid = MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup), + htup->t_infomask); + xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, + update_xid); + + xid_min_max(min, max, xid, &found); + } + } + + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != ((htup->t_infomask & HEAP_XMAX_IS_MULTI) != 0)) + continue; + + xid_min_max(min, max, htup->t_choice.t_heap.t_xmax, &found); + } + + Assert(!found || (*min > InvalidTransactionId && *max <= MaxShortTransactionId)); + + return found; +} + +/* + * Shift xid base in the page. WAL-logged if buffer is specified. + */ +static void +heap_page_shift_base(Relation relation, Buffer buffer, Page page, + bool multi, int64 delta, bool is_toast) +{ + TransactionId *xid_base, + *multi_base; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + Assert(IsBufferLockedExclusive(buffer)); + + START_CRIT_SECTION(); + + if (is_toast) + { + Assert(!multi); + xid_base = &ToastPageGetSpecial(page)->pd_xid_base; + multi_base = NULL; + } + else + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + xid_base = &special->pd_xid_base; + multi_base = &special->pd_multi_base; + } + + /* Iterate over page items */ + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* Apply xid shift to heap tuple */ + if (!multi) + { + /* shift xmin */ + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmin -= delta; + } + } + + /* shift xmax */ + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + continue; + + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + + /* Apply xid shift to base as well */ + if (!multi) + *xid_base += delta; + else + *multi_base += delta; + + if (BufferIsValid(buffer)) + MarkBufferDirty(buffer); + + /* Write WAL record if needed */ + if (relation && RelationNeedsWAL(relation) && maxoff != 0) + { + XLogRecPtr recptr; + xl_heap_base_shift xlrec; + + xlrec.delta = delta; + xlrec.multi = multi; + xlrec.flags = 0; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_BASE_SHIFT_ON_TOAST_RELATION; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); +} + +/* + * Freeze xids in the single heap page. Useful when we can't fit new xid even + * with base shift. + */ +static void +freeze_single_heap_page(Relation relation, Buffer buffer) +{ + Page page = BufferGetPage(buffer); + OffsetNumber offnum, + maxoff; + HeapTupleData tuple; + int nfrozen = 0; + HeapTupleFreeze *frozen; + TransactionId FreezeXid; + GlobalVisState *vistest; + ItemId itemid; + bool totally_frozen; + VacuumParams params = {0}; + struct VacuumCutoffs cutoffs = {0}; + HeapPageFreeze pagefrz; + PruneResult presult; + + vacuum_get_cutoffs(relation, ¶ms, &cutoffs); + FreezeXid = cutoffs.relfrozenxid; /* ??? cutoffs.FreezeLimit; */ + pagefrz.freeze_required = true; + pagefrz.FreezePageRelfrozenXid = cutoffs.FreezeLimit; + pagefrz.FreezePageRelminMxid = cutoffs.MultiXactCutoff; + pagefrz.NoFreezePageRelfrozenXid = cutoffs.FreezeLimit; + pagefrz.NoFreezePageRelminMxid = cutoffs.MultiXactCutoff; + + vistest = GlobalVisTestFor(relation); + + heap_page_prune(relation, buffer, vistest, &presult, &offnum, false); + if (presult.ndeleted > presult.nnewlpdead) + pgstat_update_heap_dead_tuples(relation, + presult.ndeleted - presult.nnewlpdead); + + /* + * Now scan the page to collect vacuumable items and check for tuples + * requiring freezing. + */ + maxoff = PageGetMaxOffsetNumber(page); + frozen = palloc(sizeof(HeapTupleFreeze) * MaxHeapTuplesPerPage); + + /* + * Note: If you change anything in the loop below, also look at + * heap_page_is_all_visible to see if that needs to be changed. + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, &tuple, page, + IsToastRelation(relation)); + + /* + * Each non-removable tuple must be checked to see if it needs + * freezing. Note we already have exclusive buffer lock. + */ + if (heap_prepare_freeze_tuple(&tuple, &cutoffs, &pagefrz, + &frozen[nfrozen], &totally_frozen)) + frozen[nfrozen++].offset = offnum; + } + + /* + * If we froze any tuples, mark the buffer dirty, and write a WAL record + * recording the changes. We must log the changes to be crash-safe + * against future truncation of CLOG. + */ + if (nfrozen > 0) + heap_freeze_execute_prepared(relation, buffer, FreezeXid, frozen, + nfrozen); + + pfree(frozen); +} + +/* + * Check if xid still fits on a page with given base and delta. + */ +static inline bool +is_delta_fits_heap_page(TransactionId xid, TransactionId base, int64 delta) +{ + return xid >= base + delta + FirstNormalTransactionId && + xid <= base + delta + MaxShortTransactionId; +} + +/* + * Check if xid fits on a page with given base. + */ +static inline bool +is_xid_fits_heap_page(TransactionId xid, TransactionId base) +{ + return xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId; +} + +/* + * Check if delta fits on a page. + * + * If delta does not fits, never return. + */ +static void +heap_page_check_delta(Buffer buffer, + TransactionId xid, TransactionId base, + ShortTransactionId min, ShortTransactionId max, + int64 delta, int64 *freeDelta, int64 *requiredDelta) +{ + BufferDesc *buf; + char *path; + BackendId backend; + + Assert((freeDelta == NULL) == (requiredDelta == NULL)); + + /* + * If delta fits the page, we good to go ... + */ + if (is_delta_fits_heap_page(xid, base, delta)) + return; + + /* + * ... otherwise handle the error. + */ + if (buffer == InvalidBuffer) + return; + + if (BufferIsLocal(buffer)) + { + buf = GetLocalBufferDescriptor(-buffer - 1); + backend = MyBackendId; + } + else + { + buf = GetBufferDescriptor(buffer - 1); + backend = InvalidBackendId; + } + + path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend, + buf->tag.forkNum); + + if (freeDelta == NULL) + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) delta, + path, buf->tag.blockNum); + + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, freeDelta = %lld, requiredDelta = %lld, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) *freeDelta, (long long) *requiredDelta, + (long long) delta, + path, buf->tag.blockNum); +} + +/* + * Shift page base. + */ +static void +heap_page_apply_delta(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi, + TransactionId base, int64 delta, bool is_toast) +{ + Assert(is_delta_fits_heap_page(xid, base, delta)); + + heap_page_shift_base(relation, buffer, page, multi, delta, is_toast); + +#ifdef USE_ASSERT_CHECKING + if (is_toast) + { + Assert(!multi); + base = ToastPageGetSpecial(page)->pd_xid_base; + } + else + base = multi ? HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + + Assert(is_xid_fits_heap_page(xid, base)); +#endif /* USE_ASSERT_CHECKING */ +} + +/* + * Try to fit xid on a page. + */ +static int +heap_page_try_prepare_for_xid(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi, bool is_toast) +{ + TransactionId base; + ShortTransactionId min = InvalidTransactionId, + max = InvalidTransactionId; + int64 delta, + freeDelta, + requiredDelta; + + if (is_toast) + { + Assert(!multi); + base = ToastPageGetSpecial(page)->pd_xid_base; + } + else + base = multi ? HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + + /* If xid fits the page no action needed. */ + if (is_xid_fits_heap_page(xid, base)) + return 0; + + /* No items on the page? */ + if (!heap_page_xid_min_max(page, multi, &min, &max, is_toast)) + { + delta = (int64) (xid - FirstNormalTransactionId) - (int64) base; + heap_page_check_delta(buffer, xid, base, min, max, delta, NULL, NULL); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta, + is_toast); + return 0; + } + + /* Can we just shift base on the page? */ + if (xid < base + FirstNormalTransactionId) + { + freeDelta = MaxShortTransactionId - max; + requiredDelta = (base + FirstNormalTransactionId) - xid; + /* Shouldn't consider setting base less than 0 */ + freeDelta = Min(freeDelta, base); + + if (requiredDelta > freeDelta) + return -1; + + delta = -(freeDelta + requiredDelta) / 2; + } + else + { + freeDelta = min - FirstNormalTransactionId; + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta > freeDelta) + return -1; + + delta = (freeDelta + requiredDelta) / 2; + } + + heap_page_check_delta(buffer, xid, base, min, max, + delta, &freeDelta, &requiredDelta); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, + delta, is_toast); + + return 0; +} + +static void +heap_xlog_base_shift(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record); + Buffer buffer; + Page page; + BlockNumber blkno; + RelFileLocator target_node; + + XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi, + xlrec->delta, + xlrec->flags & XLH_BASE_SHIFT_ON_TOAST_RELATION); + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + +/* + * Ensure that given xid fits base of given page. + */ +static bool +heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi) +{ + Page page = BufferGetPage(buffer); + int res; + + /* "Double xmax" page format doesn't require any preparation */ + if (HeapPageIsDoubleXmax(page)) + return false; + + if (!TransactionIdIsNormal(xid)) + return false; + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi, + IsToastRelation(relation)); + if (res != -1) + return res == 1; + + /* Have to try freeing the page... */ + freeze_single_heap_page(relation, buffer); + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi, + IsToastRelation(relation)); + if (res != -1) + return res == 1; + + elog(ERROR, "could not fit xid into page"); + + return false; +} + +/* + * Ensure that given xid fits base of given page. + */ +void +rewrite_page_prepare_for_xid(Page page, HeapTuple tup, bool is_toast) +{ + TransactionId xid; + int res; + + /* xmin */ + xid = HeapTupleGetXmin(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + false, is_toast); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } + + /* xmax */ + xid = HeapTupleGetRawXmax(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI, + is_toast); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } +} + +void +heap3_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + switch (info & XLOG_HEAP_OPMASK) + { + case XLOG_HEAP3_BASE_SHIFT: + heap_xlog_base_shift(record); + break; + default: + elog(PANIC, "heap3_redo: unknown op code %u", info); + } +} diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 0ed612e244..7d02aa52bb 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -424,7 +424,7 @@ tuple_lock_retry: * changes in an existing tuple, except to invalid or * frozen, and neither of those can match priorXmax.) */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -484,7 +484,7 @@ tuple_lock_retry: * variable instead of doing HeapTupleHeaderGetXmin again. */ if (TransactionIdIsCurrentTransactionId(priorXmax) && - HeapTupleHeaderGetCmin(tuple->t_data) >= cid) + HeapTupleGetCmin(tuple) >= cid) { tmfd->xmax = priorXmax; @@ -492,7 +492,7 @@ tuple_lock_retry: * Cmin is the problematic value, so store that. See * above. */ - tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data); + tmfd->cmax = HeapTupleGetCmin(tuple); ReleaseBuffer(buffer); return TM_SelfModified; } @@ -518,7 +518,7 @@ tuple_lock_retry: /* * As above, if xmin isn't what we're expecting, do nothing. */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -549,7 +549,7 @@ tuple_lock_retry: /* updated, so look at the updated row */ *tid = tuple->t_data->t_ctid; /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + priorXmax = HeapTupleGetUpdateXidAny(tuple); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } @@ -865,7 +865,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * case we had better copy it. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ @@ -877,7 +877,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ @@ -1062,6 +1062,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, targtuple->t_tableOid = RelationGetRelid(scan->rs_rd); targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, targtuple, targpage, + IsToastRelation(scan->rs_rd)); switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, hscan->rs_cbuf)) @@ -1097,7 +1099,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * numbers we report to the cumulative stats system to make * this come out right.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple))) { sample_it = true; *liverows += 1; @@ -1128,7 +1130,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * but not the post-image. We also get sane results if the * concurrent transaction never commits. */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple))) *deadrows += 1; else { @@ -1377,7 +1379,8 @@ heapam_index_build_range_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); root_blkno = hscan->rs_cblock; @@ -1470,7 +1473,7 @@ heapam_index_build_range_scan(Relation heapRelation, * before commit there. Give a warning if neither case * applies. */ - xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + xwait = HeapTupleGetXmin(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1529,7 +1532,7 @@ heapam_index_build_range_scan(Relation heapRelation, break; } - xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + xwait = HeapTupleGetUpdateXidAny(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1674,7 +1677,8 @@ heapam_index_build_range_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); } @@ -1840,7 +1844,8 @@ heapam_index_validate_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); memset(in_index, 0, sizeof(in_index)); @@ -2180,7 +2185,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, ItemPointerSet(&tid, block, offnum); if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, &heapTuple, NULL, true)) - hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); + { + hscan->rs_vistuples[ntup] = ItemPointerGetOffsetNumber(&tid); + hscan->rs_xmin[ntup] = heapTuple.t_xmin; + hscan->rs_xmax[ntup] = heapTuple.t_xmax; + ++ntup; + } } } else @@ -2205,13 +2215,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp); loctup.t_len = ItemIdGetLength(lp); loctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, &loctup, page, + IsToastRelation(scan->rs_rd)); ItemPointerSet(&loctup.t_self, block, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) { - hscan->rs_vistuples[ntup++] = offnum; + hscan->rs_vistuples[ntup] = offnum; + hscan->rs_xmin[ntup] = loctup.t_xmin; + hscan->rs_xmax[ntup] = loctup.t_xmax; + ++ntup; PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot, - HeapTupleHeaderGetXmin(loctup.t_data)); + HeapTupleGetXmin(&loctup)); } HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, buffer, snapshot); @@ -2250,6 +2265,8 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp); hscan->rs_ctup.t_len = ItemIdGetLength(lp); hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + hscan->rs_ctup.t_xmin = hscan->rs_xmin[hscan->rs_cindex]; + hscan->rs_ctup.t_xmax = hscan->rs_xmax[hscan->rs_cindex]; ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset); pgstat_count_heap_fetch(scan->rs_rd); @@ -2390,8 +2407,17 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple->t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + if (pagemode) + { + tuple->t_xmin = InvalidTransactionId; + tuple->t_xmax = InvalidTransactionId; + } + else + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, tuple, page, + IsToastRelation(scan->rs_rd)); + + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); if (all_visible) visible = true; diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index a716001341..26fa51e6eb 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -220,7 +220,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -232,7 +232,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -244,7 +244,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -254,11 +254,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -287,7 +287,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -302,17 +302,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -330,7 +330,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; } @@ -419,7 +419,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ - else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + else if (!TransactionIdIsValid(HeapTupleGetXmin(htup))) return false; } @@ -509,9 +509,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= curcid) + if (HeapTupleGetCmin(htup) >= curcid) return TM_Invisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -521,7 +521,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleHeaderGetRawXmax(tuple); + xmax = HeapTupleGetRawXmax(htup); /* * Careful here: even though this tuple was created by our own @@ -552,7 +552,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -560,21 +560,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; return TM_Ok; } else { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -582,16 +582,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, return TM_Ok; } - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return TM_Invisible; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -625,17 +625,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return TM_BeingModified; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return TM_Ok; } - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); if (!TransactionIdIsValid(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; } @@ -644,13 +644,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; if (TransactionIdDidCommit(xmax)) @@ -666,7 +666,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * what about the other members? */ - if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * There's no member, even just a locker, alive anymore, so we can @@ -683,20 +683,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return TM_BeingModified; - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return TM_BeingModified; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -714,7 +714,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) return TM_Updated; /* updated by other */ else @@ -797,7 +797,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -809,7 +809,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -821,7 +821,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -831,7 +831,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * Return the speculative token to caller. Caller can worry about @@ -847,13 +847,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Assert(snapshot->speculativeToken != 0); } - snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); + snapshot->xmin = HeapTupleGetRawXmin(htup); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -882,7 +882,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -900,21 +900,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) - snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); + snapshot->xmax = HeapTupleGetRawXmax(htup); return true; } - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -932,7 +932,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; /* updated by other */ } @@ -1011,9 +1011,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) + if (HeapTupleGetCmin(htup) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -1026,7 +1026,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1034,13 +1034,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; - else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + else if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* updated after scan started */ else return false; /* updated before scan started */ } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1048,16 +1048,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return true; } - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -1070,7 +1070,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && - XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; /* treat as still in progress */ } @@ -1089,14 +1089,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ @@ -1111,18 +1111,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1132,12 +1132,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } else { /* xmax is committed, but maybe not according to our snapshot */ - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; /* treat as still in progress */ } @@ -1252,21 +1252,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de return HEAPTUPLE_DEAD; } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple)) + HeapTupleIsOnlyLocked(htup)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * It'd be possible to discern between INSERT/DELETE in progress @@ -1278,9 +1278,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* @@ -1322,14 +1322,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && - MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); @@ -1347,7 +1347,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - TransactionId xmax = HeapTupleGetUpdateXid(tuple); + TransactionId xmax = HeapTupleGetUpdateXid(htup); /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); @@ -1370,7 +1370,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de *dead_after = xmax; return HEAPTUPLE_RECENTLY_DEAD; } - else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * Not in Progress, Not Committed, so either Aborted or crashed. @@ -1384,11 +1384,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); else { /* @@ -1410,7 +1410,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * Deleter committed, allow caller to check if it was recent enough that * some open transactions could still see the tuple. */ - *dead_after = HeapTupleHeaderGetRawXmax(tuple); + *dead_after = HeapTupleGetRawXmax(htup); return HEAPTUPLE_RECENTLY_DEAD; } @@ -1506,7 +1506,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) /* Deleter committed, so tuple is dead if the XID is old enough. */ return GlobalVisTestIsRemovableXid(vistest, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } /* @@ -1519,8 +1519,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) * at the top of this file. */ bool -HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) +HeapTupleIsOnlyLocked(HeapTuple htup) { + HeapTupleHeader tuple = htup->t_data; TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ @@ -1531,7 +1532,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return true; /* invalid xmax means no update */ - if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) return true; /* @@ -1542,7 +1543,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1590,8 +1591,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); + TransactionId xmin = HeapTupleGetXmin(htup); + TransactionId xmax = HeapTupleGetRawXmax(htup); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); @@ -1691,7 +1692,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); } /* check if it's one of our txids, toplevel is also in there */ diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index 52ecd45654..3d0e62ba64 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, result_tuple->t_len = new_tuple_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; + HeapTupleCopyXids(result_tuple, newtup); new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; @@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) */ new_tuple->t_self = tup->t_self; new_tuple->t_tableOid = tup->t_tableOid; + HeapTupleCopyXids(new_tuple, tup); new_tuple->t_data->t_choice = tup->t_data->t_choice; new_tuple->t_data->t_ctid = tup->t_data->t_ctid; @@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup, ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tup; + HeapTupleSetZeroXids(&tmptup); /* * Break down the tuple into fields. diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index ccc4c6966a..9e6a0a3d5d 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -19,6 +19,7 @@ #include "access/hio.h" #include "access/htup_details.h" #include "access/visibilitymap.h" +#include "catalog/catalog.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" @@ -59,6 +60,9 @@ RelationPutHeapTuple(Relation relation, /* Add the tuple to the page */ pageHeader = BufferGetPage(buffer); + HeapTupleHeaderStoreXmin(pageHeader, tuple, IsToastRelation(relation)); + HeapTupleHeaderStoreXmax(pageHeader, tuple, IsToastRelation(relation)); + offnum = PageAddItem(pageHeader, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, true); @@ -361,7 +365,17 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, first_block, RelationGetRelationName(relation)); - PageInit(page, BufferGetPageSize(buffer), 0); + if (IsToastRelation(relation)) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + else + { + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + MarkBufferDirty(buffer); /* @@ -394,7 +408,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, if (use_fsm && i >= not_in_fsm_pages) { Size freespace = BufferGetPageSize(victim_buffers[i]) - - SizeOfPageHeaderData; + SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData)); RecordPageWithFreeSpace(relation, curBlock, freespace); } @@ -685,6 +699,9 @@ loop: /* * Now we can check to see if there's enough free space here. If so, * we're done. + * + * "Double xmax" page is not suitable for any new tuple, since xmin + * can't be set there. */ page = BufferGetPage(buffer); @@ -696,12 +713,23 @@ loop: */ if (PageIsNew(page)) { - PageInit(page, BufferGetPageSize(buffer), 0); + if (IsToastRelation(relation)) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + else + { + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + MarkBufferDirty(buffer); } pageFreeSpace = PageGetHeapFreeSpace(page); - if (targetFreeSpace <= pageFreeSpace) + if (targetFreeSpace <= pageFreeSpace && + !HeapPageIsDoubleXmax(page)) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index c5f1abd95a..9ab46bfc31 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -70,6 +70,17 @@ static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum); static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); static void page_verify_redirects(Page page); +static inline bool +XidFitsPage(Page page, TransactionId xid, bool is_toast) +{ + TransactionId base; + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + return xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId; +} /* * Optionally prune and repair fragmentation in the specified page. @@ -104,7 +115,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * determining the appropriate horizon is a waste if there's no prune_xid * (i.e. no updates/deletes left potentially dead tuples around). */ - prune_xid = ((PageHeader) page)->pd_prune_xid; + prune_xid = HeapPageGetPruneXidNoAssert(page, IsToastRelation(relation)); + if (!TransactionIdIsValid(prune_xid)) return; @@ -148,7 +160,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) { PruneResult presult; - heap_page_prune(relation, buffer, vistest, &presult, NULL); + heap_page_prune(relation, buffer, vistest, &presult, NULL, false); /* * Report the number of tuples reclaimed to pgstats. This is @@ -204,7 +216,8 @@ void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, PruneResult *presult, - OffsetNumber *off_loc) + OffsetNumber *off_loc, + bool repairFragmentation) { Page page = BufferGetPage(buffer); BlockNumber blockno = BufferGetBlockNumber(buffer); @@ -278,6 +291,8 @@ heap_page_prune(Relation relation, Buffer buffer, htup = (HeapTupleHeader) PageGetItem(page, itemid); tup.t_data = htup; tup.t_len = ItemIdGetLength(itemid); + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); ItemPointerSet(&(tup.t_self), blockno, offnum); /* @@ -333,13 +348,17 @@ heap_page_prune(Relation relation, Buffer buffer, heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused); + prstate.nowunused, prstate.nunused, + repairFragmentation, + IsToastRelation(relation)); /* * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + if (XidFitsPage(page, prstate.new_prune_xid, IsToastRelation(relation))) + HeapPageSetPruneXid(page, prstate.new_prune_xid, + IsToastRelation(relation)); /* * Also clear the "page is full" flag, since there's no point in @@ -362,6 +381,13 @@ heap_page_prune(Relation relation, Buffer buffer, xlrec.snapshotConflictHorizon = prstate.snapshotConflictHorizon; xlrec.nredirected = prstate.nredirected; xlrec.ndead = prstate.ndead; + xlrec.flags = 0; + + if (IsToastRelation(relation)) + xlrec.flags |= XLH_PRUNE_ON_TOAST_RELATION; + + if (repairFragmentation) + xlrec.flags |= XLH_PRUNE_REPAIR_FRAGMENTATION; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapPrune); @@ -402,10 +428,12 @@ heap_page_prune(Relation relation, Buffer buffer, * point in repeating the prune/defrag process until something else * happens to the page. */ - if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid || + bool is_toast = IsToastRelation(relation); + + if (HeapPageGetPruneXid(page, is_toast) != prstate.new_prune_xid || PageIsFull(page)) { - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + HeapPageSetPruneXid(page, prstate.new_prune_xid, is_toast); PageClearFull(page); MarkBufferDirtyHint(buffer, true); } @@ -485,6 +513,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, OffsetNumber chainitems[MaxHeapTuplesPerPage]; int nchain = 0, i; + HeapTupleData tup; + + tup.t_tableOid = RelationGetRelid(prstate->rel); rootlp = PageGetItemId(dp, rootoffnum); @@ -496,6 +527,12 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, Assert(htsv[rootoffnum] != -1); htup = (HeapTupleHeader) PageGetItem(dp, rootlp); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(rootlp); + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum); + HeapTupleCopyXidsFromPage(buffer, &tup, dp, + IsToastRelation(prstate->rel)); + if (HeapTupleHeaderIsHeapOnly(htup)) { /* @@ -520,7 +557,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&tup, &prstate->snapshotConflictHorizon); ndeleted++; } @@ -586,11 +623,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, Assert(ItemIdIsNormal(lp)); htup = (HeapTupleHeader) PageGetItem(dp, lp); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(lp); + HeapTupleCopyXidsFromPage(buffer, &tup, dp, + IsToastRelation(prstate->rel)); + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); + /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax)) break; /* @@ -617,7 +660,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: @@ -627,7 +670,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_LIVE: @@ -656,7 +699,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, if (tupdead) { latestdead = offnum; - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&tup, &prstate->snapshotConflictHorizon); } else if (!recent_dead) @@ -678,7 +721,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } /* @@ -795,7 +838,9 @@ void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused) + OffsetNumber *nowunused, int nunused, + bool repairFragmentation, + bool is_toast) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; @@ -919,7 +964,8 @@ heap_page_prune_execute(Buffer buffer, * Finally, repair any fragmentation, and update the page's hint bit about * whether it has free pointers. */ - PageRepairFragmentation(page); + if (repairFragmentation) + PageRepairFragmentation(page, is_toast); /* * Now that the page has been modified, assert that redirect items still @@ -991,7 +1037,8 @@ page_verify_redirects(Page page) * and reused by a completely unrelated tuple. */ void -heap_get_root_tuples(Page page, OffsetNumber *root_offsets) +heap_get_root_tuples(Relation relation, Buffer buffer, Page page, + OffsetNumber *root_offsets) { OffsetNumber offnum, maxoff; @@ -1006,6 +1053,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; + HeapTupleData tup; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) @@ -1014,6 +1062,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); /* * Check if this tuple is part of a HOT-chain rooted at some other @@ -1035,7 +1086,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } else { @@ -1074,9 +1125,12 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) break; htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup))) break; /* Remember the root line pointer for this item */ @@ -1090,7 +1144,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup)); nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } } } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 424958912c..2b485cdc72 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -378,6 +378,7 @@ rewrite_heap_tuple(RewriteState state, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); + HeapTupleCopyXids(new_tuple, old_tuple); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= @@ -387,7 +388,7 @@ rewrite_heap_tuple(RewriteState state, * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ - heap_freeze_tuple(new_tuple->t_data, + heap_freeze_tuple(new_tuple, state->rs_old_rel->rd_rel->relfrozenxid, state->rs_old_rel->rd_rel->relminmxid, state->rs_freeze_xid, @@ -403,7 +404,7 @@ rewrite_heap_tuple(RewriteState state, * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && + HeapTupleIsOnlyLocked(old_tuple)) && !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) @@ -411,7 +412,7 @@ rewrite_heap_tuple(RewriteState state, OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); + hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) @@ -484,7 +485,7 @@ rewrite_heap_tuple(RewriteState state, * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple), state->rs_oldest_xmin)) { /* @@ -493,7 +494,7 @@ rewrite_heap_tuple(RewriteState state, UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(new_tuple); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -581,7 +582,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple) bool found; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(old_tuple); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -617,6 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) Size len; OffsetNumber newoff; HeapTuple heaptup; + TransactionId xmin; + bool immutable_tuple; /* * If the new tuple is too big for storage or contains already toasted @@ -651,9 +654,19 @@ raw_heap_insert(RewriteState state, HeapTuple tup) len = MAXALIGN(heaptup->t_len); /* be conservative */ /* - * If we're gonna fail for oversize tuple, do it right away + * Due to update to 64-xid maximum plain tuple size was decreased due to adding + * PageSpecial to a heap page. Pages with tuple that became too large to fit, + * should remain in Double Xmax format (read only). Inserting plain tuples with + * size over new MaxHeapTupleSizs is prohibited anyway, but vaccum full will + * transfer this page to a rebuild relation unmodified. */ - if (len > MaxHeapTupleSize) + immutable_tuple = len <= MaxHeapTupleSize_32 && len > MaxHeapTupleSize; + + /* + * If we're gonna fail for oversize tuple, do it right away. But allow to process + * immutable_tuple (see above). + */ + if (len > MaxHeapTupleSize && !immutable_tuple) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("row is too big: size %zu, maximum size %zu", @@ -702,10 +715,42 @@ raw_heap_insert(RewriteState state, HeapTuple tup) if (!state->rs_buffer_valid) { /* Initialize a new empty page */ - PageInit(page, BLCKSZ, 0); + if (immutable_tuple) + /* Initialize DoubleXmax page */ + PageInit(page, BLCKSZ, 0); + else + { + Size special_size; + + special_size = IsToastRelation(state->rs_new_rel) ? + sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + PageInit(page, BLCKSZ, special_size); + } state->rs_buffer_valid = true; } + rewrite_page_prepare_for_xid(page, heaptup, + IsToastRelation(state->rs_new_rel)); + + /* + * Tuple with HEAP_XMIN_FROZEN in t_infomask should have xmin set + * to FrozenTransactionId to avoid these tuples be treated like normal. + */ + xmin = HeapTupleGetXmin(heaptup); + HeapTupleSetXmin(heaptup, xmin); + + /* + * Tuples on DoubleXmax page could not appear modified after they had been + * frozen by pg_upgrade. Just check this to be safe. + */ + Assert(!immutable_tuple || xmin == FrozenTransactionId); + + if (!immutable_tuple) + HeapTupleAndHeaderSetXmin(page, heaptup, xmin, false); + + HeapTupleHeaderStoreXmax(page, heaptup, false); + /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); @@ -986,19 +1031,24 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid, */ if (!found) { - char path[MAXPGPATH]; - Oid dboid; + char path[MAXPGPATH]; + Oid dboid; + TransactionId current_xid; if (state->rs_old_rel->rd_rel->relisshared) dboid = InvalidOid; else dboid = MyDatabaseId; + current_xid = GetCurrentTransactionId(); snprintf(path, MAXPGPATH, "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, dboid, relid, LSN_FORMAT_ARGS(state->rs_begin_lsn), - xid, GetCurrentTransactionId()); + (uint32) (xid >> 32), + (uint32) xid, + (uint32) (current_xid >> 32), + (uint32) current_xid); dclist_init(&src->mappings); src->off = 0; @@ -1045,9 +1095,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, if (!state->rs_logical_rewrite) return; - xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + xmin = HeapTupleGetXmin(new_tuple); /* use *GetUpdateXid to correctly deal with multixacts */ - xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); + xmax = HeapTupleGetUpdateXidAny(new_tuple); /* * Log the mapping iff the tuple has been created recently. @@ -1111,14 +1161,19 @@ heap_xlog_logical_rewrite(XLogReaderState *r) xl_heap_rewrite_mapping *xlrec; uint32 len; char *data; + TransactionId xid; xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r); + xid = XLogRecGetXid(r); snprintf(path, MAXPGPATH, "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, xlrec->mapped_db, xlrec->mapped_rel, LSN_FORMAT_ARGS(xlrec->start_lsn), - xlrec->mapped_xid, XLogRecGetXid(r)); + (uint32) (xlrec->mapped_xid >> 32), + (uint32) xlrec->mapped_xid, + (uint32) (xid >> 32), + (uint32) xid); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY); @@ -1213,10 +1268,12 @@ CheckPointLogicalRewriteHeap(void) Oid dboid; Oid relid; XLogRecPtr lsn; - TransactionId rewrite_xid; - TransactionId create_xid; - uint32 hi, - lo; + uint32 lsn_hi, + lsn_lo, + rewrite_xid_hi, + rewrite_xid_lo, + create_xid_hi, + create_xid_lo; PGFileType de_type; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -1234,10 +1291,12 @@ CheckPointLogicalRewriteHeap(void) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) + &dboid, &relid, &lsn_hi, &lsn_lo, + &rewrite_xid_hi, &rewrite_xid_lo, + &create_xid_hi, &create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - lsn = ((uint64) hi) << 32 | lo; + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 3623f13b07..3afd41b9b9 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -46,6 +46,7 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "catalog/index.h" +#include "catalog/catalog.h" #include "catalog/storage.h" #include "commands/dbcommands.h" #include "commands/progress.h" @@ -259,7 +260,6 @@ static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static void lazy_vacuum_heap_rel(LVRelState *vacrel); static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer vmbuffer); -static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void lazy_cleanup_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, @@ -496,7 +496,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * ensure that parallel VACUUM won't be attempted at all when relfrozenxid * is already dangerously old.) */ - lazy_check_wraparound_failsafe(vacrel); dead_items_alloc(vacrel, params->nworkers); /* @@ -613,7 +612,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, WalUsage walusage; StringInfoData buf; char *msgfmt; - int32 diff; + int64 diff; int64 PageHitOp = VacuumPageHit - StartPageHit, PageMissOp = VacuumPageMiss - StartPageMiss, PageDirtyOp = VacuumPageDirty - StartPageDirty; @@ -666,16 +665,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, orig_rel_pages == 0 ? 100.0 : 100.0 * vacrel->scanned_pages / orig_rel_pages); appendStringInfo(&buf, - _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"), + _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %llu\n"), (long long) vacrel->tuples_deleted, (long long) vacrel->new_rel_tuples, - (long long) vacrel->recently_dead_tuples); + (long long) vacrel->recently_dead_tuples, + (unsigned long long) vacrel->cutoffs.OldestXmin); if (vacrel->missed_dead_tuples > 0) appendStringInfo(&buf, _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"), (long long) vacrel->missed_dead_tuples, vacrel->missed_dead_pages); - diff = (int32) (ReadNextTransactionId() - + diff = (int64) (ReadNextTransactionId() - vacrel->cutoffs.OldestXmin); appendStringInfo(&buf, _("removable cutoff: %llu, which was %lld XIDs old when operation ended\n"), @@ -683,7 +683,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, (long long) diff); if (frozenxid_updated) { - diff = (int32) (vacrel->NewRelfrozenXid - + diff = (int64) (vacrel->NewRelfrozenXid - vacrel->cutoffs.relfrozenxid); appendStringInfo(&buf, _("new relfrozenxid: %llu, which is %lld XIDs ahead of previous value\n"), @@ -692,7 +692,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, } if (minmulti_updated) { - diff = (int32) (vacrel->NewRelminMxid - + diff = (int64) (vacrel->NewRelminMxid - vacrel->cutoffs.relminmxid); appendStringInfo(&buf, _("new relminmxid: %llu, which is %lld MXIDs ahead of previous value\n"), @@ -894,18 +894,6 @@ lazy_scan_heap(LVRelState *vacrel) vacuum_delay_point(); - /* - * Regularly check if wraparound failsafe should trigger. - * - * There is a similar check inside lazy_vacuum_all_indexes(), but - * relfrozenxid might start to look dangerously old before we reach - * that point. This check also provides failsafe coverage for the - * one-pass strategy, and the two-pass strategy with the index_cleanup - * param set to 'off'. - */ - if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) - lazy_check_wraparound_failsafe(vacrel); - /* * Consider if we definitely have enough space to process TIDs on page * already. If we are close to overrunning the available space for @@ -1448,7 +1436,14 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0) { - freespace = BLCKSZ - SizeOfPageHeaderData; + Size special_size; + + special_size = IsToastRelation(vacrel->rel) ? + sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + freespace = BufferGetPageSize(buf) + - SizeOfPageHeaderData + - special_size; RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); } @@ -1552,6 +1547,7 @@ lazy_scan_prune(LVRelState *vacrel, OffsetNumber offnum, maxoff; ItemId itemid; + HeapTupleData tuple; PruneResult presult; int tuples_frozen, lpdead_items, @@ -1590,7 +1586,7 @@ lazy_scan_prune(LVRelState *vacrel, * lpdead_items's final value can be thought of as the number of tuples * that were deleted from indexes. */ - heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum); + heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum, true); /* * Now scan the page to collect LP_DEAD items and check for tuples @@ -1650,6 +1646,11 @@ lazy_scan_prune(LVRelState *vacrel, Assert(ItemIdIsNormal(itemid)); htup = (HeapTupleHeader) PageGetItem(page, itemid); + ItemPointerSet(&(tuple.t_self), blkno, offnum); + tuple.t_data = htup; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, IsToastRelation(rel)); /* * The criteria for counting a tuple as live in this block need to @@ -1702,7 +1703,7 @@ lazy_scan_prune(LVRelState *vacrel, * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(htup); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->cutoffs.OldestXmin)) { @@ -1756,7 +1757,7 @@ lazy_scan_prune(LVRelState *vacrel, prunestate->hastup = true; /* page makes rel truncation unsafe */ /* Tuple with storage -- consider need to freeze */ - if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz, + if (heap_prepare_freeze_tuple(&tuple, &vacrel->cutoffs, &pagefrz, &frozen[tuples_frozen], &totally_frozen)) { /* Save prepared freeze plan for later */ @@ -1956,7 +1957,6 @@ lazy_scan_noprune(LVRelState *vacrel, live_tuples, recently_dead_tuples, missed_dead_tuples; - HeapTupleHeader tupleheader; TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; @@ -2002,8 +2002,13 @@ lazy_scan_noprune(LVRelState *vacrel, } *hastup = true; /* page prevents rel truncation */ - tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); - if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs, + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); + ItemPointerSet(&(tuple.t_self), blkno, offnum); + if (heap_tuple_should_freeze(&tuple, &vacrel->cutoffs, &NoFreezePageRelfrozenXid, &NoFreezePageRelminMxid)) { @@ -2039,6 +2044,8 @@ lazy_scan_noprune(LVRelState *vacrel, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, buf)) @@ -2322,13 +2329,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(vacrel->do_index_vacuuming); Assert(vacrel->do_index_cleanup); - /* Precheck for XID wraparound emergencies */ - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- don't even start an index scan */ - return false; - } - /* * Report that we are now vacuuming indexes and the number of indexes to * vacuum. @@ -2352,12 +2352,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED, idx + 1); - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- end current index scan */ - allindexes = false; - break; - } } } else @@ -2365,13 +2359,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) /* Outsource everything to parallel variant */ parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples, vacrel->num_index_scans); - - /* - * Do a postcheck to consider applying wraparound failsafe now. Note - * that parallel VACUUM only gets the precheck and this postcheck. - */ - if (lazy_check_wraparound_failsafe(vacrel)) - allindexes = false; } /* @@ -2612,68 +2599,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, return index; } -/* - * Trigger the failsafe to avoid wraparound failure when vacrel table has a - * relfrozenxid and/or relminmxid that is dangerously far in the past. - * Triggering the failsafe makes the ongoing VACUUM bypass any further index - * vacuuming and heap vacuuming. Truncating the heap is also bypassed. - * - * Any remaining work (work that VACUUM cannot just bypass) is typically sped - * up when the failsafe triggers. VACUUM stops applying any cost-based delay - * that it started out with. - * - * Returns true when failsafe has been triggered. - */ -static bool -lazy_check_wraparound_failsafe(LVRelState *vacrel) -{ - /* Don't warn more than once per VACUUM */ - if (VacuumFailsafeActive) - return true; - - if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs))) - { - const int progress_index[] = { - PROGRESS_VACUUM_INDEXES_TOTAL, - PROGRESS_VACUUM_INDEXES_PROCESSED - }; - int64 progress_val[2] = {0, 0}; - - VacuumFailsafeActive = true; - - /* - * Abandon use of a buffer access strategy to allow use of all of - * shared buffers. We assume the caller who allocated the memory for - * the BufferAccessStrategy will free it. - */ - vacrel->bstrategy = NULL; - - /* Disable index vacuuming, index cleanup, and heap rel truncation */ - vacrel->do_index_vacuuming = false; - vacrel->do_index_cleanup = false; - vacrel->do_rel_truncate = false; - - /* Reset the progress counters */ - pgstat_progress_update_multi_param(2, progress_index, progress_val); - - ereport(WARNING, - (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans", - vacrel->dbname, vacrel->relnamespace, vacrel->relname, - vacrel->num_index_scans), - errdetail("The table's relfrozenxid or relminmxid is too far in the past."), - errhint("Consider increasing configuration parameter maintenance_work_mem or autovacuum_work_mem.\n" - "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); - - /* Stop applying cost limits from this point on */ - VacuumCostActive = false; - VacuumCostBalance = 0; - - return true; - } - - return false; -} - /* * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. */ @@ -3319,7 +3244,8 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); - + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, buf)) { @@ -3339,7 +3265,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->cutoffs.OldestXmin)) { @@ -3355,7 +3281,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, /* Check whether this tuple is already frozen or not */ if (all_visible && *all_frozen && - heap_tuple_needs_eventual_freeze(tuple.t_data)) + heap_tuple_needs_eventual_freeze(&tuple)) *all_frozen = false; } break; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index b7660a459e..726edb24a3 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -549,6 +549,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), @@ -652,6 +653,7 @@ _bt_gettrueroot(Relation rel) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index 85834c3dd7..fd985f9282 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -140,6 +140,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft, perfectpenalty, leaffillfactor; + int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY; FindSplitData state; FindSplitStrat strategy; ItemId itemid; @@ -153,6 +154,7 @@ _bt_findsplitloc(Relation rel, opaque = BTPageGetOpaque(origpage); maxoff = PageGetMaxOffsetNumber(origpage); + maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY)); /* Total free space available on a btree page, after fixed overhead */ leftspace = rightspace = @@ -214,6 +216,18 @@ _bt_findsplitloc(Relation rel, itemid = PageGetItemId(origpage, offnum); itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); +#ifdef USE_ASSERT_CHECKING + + /* + * Ending of rightmost tuple on a page can be shifted relative to left + * boundary of BTPageOpaqueData due to conversion from EE96, which + * used different BTPageOpaqueData layout. It is only checked in the + * assert below. + */ + if (maxTupleEnd < ItemIdGetTupleEnd(itemid)) + maxTupleEnd = ItemIdGetTupleEnd(itemid); +#endif + /* * When item offset number is not newitemoff, neither side of the * split can be newitem. Record a split after the previous data item @@ -248,7 +262,7 @@ _bt_findsplitloc(Relation rel, * (Though only when it's possible that newitem will end up alone on new * right page.) */ - Assert(olddataitemstoleft == olddataitemstotal); + Assert(olddataitemstoleft + ((PageHeader) origpage)->pd_special - maxTupleEnd == olddataitemstotal); if (newitemoff > maxoff) _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index baac2a65b3..fd9ca7df94 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -29,7 +29,7 @@ out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) appendStringInfo(buf, "rel %u/%u/%u; blk %u; snapshotConflictHorizon %llu", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, xlrec->block, - (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon)); + (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon)); } static void @@ -51,7 +51,7 @@ static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { appendStringInfo(buf, "deleteXid %llu; downlink %u", - (unsigned long long) U64FromFullTransactionId(xlrec->deleteXid), + (unsigned long long) XidFromFullTransactionId(xlrec->deleteXid), xlrec->downlinkOffset); } diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 1bf2c1ab85..4b6d6c3904 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -308,6 +308,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record) } } +void +heap3_desc(StringInfo buf, XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + info &= XLOG_HEAP_OPMASK; + if (info == XLOG_HEAP3_BASE_SHIFT) + { + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec; + + appendStringInfo(buf, "%s delta %lld ", + xlrec->multi ? "MultiXactId" : "XactId", + (long long) xlrec->delta); + } +} + const char * heap_identify(uint8 info) { @@ -391,3 +408,18 @@ heap2_identify(uint8 info) return id; } + +const char * +heap3_identify(uint8 info) +{ + const char *id = NULL; + + switch (info & ~XLR_INFO_MASK) + { + case XLOG_HEAP3_BASE_SHIFT: + id = "BASE_SHIFT"; + break; + } + + return id; +} diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index f26664f32d..f42de19c87 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -95,7 +95,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "left: %u, right: %u, level: %u, safexid: %llu, ", xlrec->leftsib, xlrec->rightsib, xlrec->level, - (unsigned long long) U64FromFullTransactionId(xlrec->safexid)); + (unsigned long long) XidFromFullTransactionId(xlrec->safexid)); appendStringInfo(buf, "leafleft: %u, leafright: %u, leaftopparent: %u", xlrec->leafleftsib, xlrec->leafrightsib, xlrec->leaftopparent); @@ -115,7 +115,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "rel: %u/%u/%u, snapshotConflictHorizon: %llu", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, - (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon)); + (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon)); break; } case XLOG_BTREE_META_CLEANUP: diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index 6b3e8fe9e5..dfc7741e79 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -110,7 +110,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); @@ -205,7 +206,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed) { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 48fb5022e0..108991680d 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -53,7 +53,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", - (unsigned long long) U64FromFullTransactionId(checkpoint->nextXid), + (unsigned long long) XidFromFullTransactionId(checkpoint->nextXid), checkpoint->nextOid, (unsigned long long) checkpoint->nextMulti, (unsigned long long) checkpoint->nextMultiOffset, diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 0e75dd2ca0..e96b739c8c 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -302,7 +302,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, * sub-XIDs and all of the XIDs for which we're adjusting clog should be * on the same page. Check those conditions, too. */ - if (all_xact_same_page && xid == MyProc->xid && + if (all_xact_same_page && xid == pg_atomic_read_u64(&MyProc->xid) && nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT && nsubxids == MyProc->subxidStatus.count && (nsubxids == 0 || @@ -920,24 +920,11 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) SimpleLruTruncate(XactCtl, cutoffPage); } - /* * Decide whether a CLOG page number is "older" for truncation purposes. * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() - * would get weird about permanent xact IDs. So, offset both such that xid1, - * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset - * is relevant to page 0 and to the page preceding page 0. - * - * The page containing oldestXact-2^31 is the important edge case. The - * portion of that page equaling or following oldestXact-2^31 is expendable, - * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is - * the first XID of a page and segment, the entire page and segment is - * expendable, and we could truncate the segment. Recognizing that case would - * require making oldestXact, not just the page containing oldestXact, - * available to this callback. The benefit would be rare and small, so we - * don't optimize that edge case. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool CLOGPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index f157854e70..36582e5b4f 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -908,25 +908,6 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* * Decide whether a commitTS page number is "older" for truncation purposes. * Analogous to CLOGPagePrecedes(). - * - * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This - * introduces differences compared to CLOG and the other SLRUs having (1 << - * 31) % per_page == 0. This function never tests exactly - * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, - * there are two possible counts of page boundaries between oldestXact and the - * latest XID assigned, depending on whether oldestXact is within the first - * 128 entries of its page. Since this function doesn't know the location of - * oldestXact within page2, it returns false for one page that actually is - * expendable. This is a wider (yet still negligible) version of the - * truncation opportunity that CLOGPagePrecedes() cannot recognize. - * - * For the sake of a worked example, number entries with decimal values such - * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of - * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, - * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, - * because entry=2.85 is the border that toggles whether entries precede the - * last entry of the oldestXact page. While page 2 is expendable at - * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 4c30525250..c14efe56f6 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -112,15 +112,15 @@ ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) #define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) -#define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* * The situation for members is a bit more complex: we store one byte of * additional flag bits for each TransactionId. To do this without getting - * into alignment issues, we store four bytes of flags, and then the - * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and - * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups - * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and + * into alignment issues, we store eight bytes of flags, and then the + * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and + * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups + * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and * performance) trumps space efficiency here. * * Note that the "offset" macros work with byte offset, not array indexes, so @@ -132,7 +132,7 @@ #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) /* how many full bytes of flags are there in a group? */ -#define MULTIXACT_FLAGBYTES_PER_GROUP 4 +#define MULTIXACT_FLAGBYTES_PER_GROUP 8 #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) /* size in bytes of a complete group */ @@ -142,22 +142,9 @@ #define MULTIXACT_MEMBERS_PER_PAGE \ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP) -/* - * Because the number of items per page is not a divisor of the last item - * number (member 0xFFFFFFFF), the last segment does not use the maximum number - * of pages, and moreover the last used page therein does not use the same - * number of items as previous pages. (Another way to say it is that the - * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page - * has some empty space after that item.) - * - * This constant is the number of members in the last page of the last segment. - */ -#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \ - ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1)) - /* page in which a member is to be found */ #define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE) -#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* Location (byte offset within page) of flag word for a given member */ #define MXOffsetToFlagsOffset(xid) \ @@ -216,22 +203,8 @@ typedef struct MultiXactStateData MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; - /* - * Oldest multixact offset that is potentially referenced by a multixact - * referenced by a relation. We don't always know this value, so there's - * a flag here to indicate whether or not we currently do. - */ - MultiXactOffset oldestOffset; - bool oldestOffsetKnown; - /* support for anti-wraparound measures */ MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - - /* support for members anti-wraparound measures */ - MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */ /* * Per-backend data starts here. We have two arrays stored in the area @@ -362,9 +335,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); -static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, - MultiXactOffset start, uint32 distance); -static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, @@ -398,6 +368,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); + /* memset members array because with 64-bit xids it has a padding hole */ + MemSet(members, 0, sizeof(members)); + /* * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs * are still running. In typical usage, xid2 will be our own XID and the @@ -513,7 +486,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) * end of the loop. */ newMembers = (MultiXactMember *) - palloc(sizeof(MultiXactMember) * (nmembers + 1)); + palloc0(sizeof(MultiXactMember) * (nmembers + 1)); for (i = 0, j = 0; i < nmembers; i++) { @@ -528,7 +501,6 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) newMembers[j].xid = xid; newMembers[j++].status = status; - newMulti = MultiXactIdCreateFromMembers(j, newMembers); pfree(members); @@ -905,8 +877,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; - uint32 *flagsptr; - uint32 flagsval; + uint64 *flagsptr; + uint64 flagsval; int bshift; int flagsoff; int memberoff; @@ -929,12 +901,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, *memberptr = members[i].xid; - flagsptr = (uint32 *) + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; - flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); - flagsval |= (members[i].status << bshift); + flagsval &= ~((uint64) ((1ULL << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); + flagsval |= ((uint64) members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; @@ -987,8 +959,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * If we're past multiVacLimit or the safe threshold for member storage * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. - * If we're past multiWarnLimit, start issuing warnings. - * If we're past multiStopLimit, refuse to create new MultiXactIds. * * Note these are pretty much the same protections in GetNewTransactionId. *---------- @@ -1002,41 +972,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit; - MultiXactId multiStopLimit = MultiXactState->multiStopLimit; - MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit; - Oid oldest_datoid = MultiXactState->oldestMultiXactDB; LWLockRelease(MultiXactGenLock); - if (IsUnderPostmaster && - !MultiXactIdPrecedes(result, multiStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* - * Immediately kick autovacuum into action as we're already in - * ERROR territory. - */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K multis generated. This still gives @@ -1045,31 +983,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) if (IsUnderPostmaster && (result % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (!MultiXactIdPrecedes(result, multiWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datname, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datoid, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); result = MultiXactState->nextMXact; @@ -1094,78 +1007,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) else *offset = nextOffset; - /*---------- - * Protect against overrun of the members space as well, with the - * following rules: - * - * If we're past offsetStopLimit, refuse to generate more multis. - * If we're close to offsetStopLimit, emit a warning. - * - * Arbitrarily, we start emitting warnings when we're 20 segments or less - * from offsetStopLimit. - * - * Note we haven't updated the shared state yet, so if we fail at this - * point, the multixact ID we grabbed can still be used by the next guy. - * - * Note that there is no point in forcing autovacuum runs here: the - * multixact freeze settings would have to be reduced for that to have any - * effect. - *---------- - */ -#define OFFSET_WARN_SEGMENTS 20 - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, - nmembers)) - { - /* see comment in the corresponding offsets wraparound case */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("multixact \"members\" limit exceeded"), - errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.", - "This command would create a multixact with %u members, but the remaining space is only enough for %u members.", - MultiXactState->offsetStopLimit - nextOffset - 1, - nmembers, - MultiXactState->offsetStopLimit - nextOffset - 1), - errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", - MultiXactState->oldestMultiXactDB))); - } - - /* - * Check whether we should kick autovacuum into action, to prevent members - * wraparound. NB we use a much larger window to trigger autovacuum than - * just the warning limit. The warning is just a measure of last resort - - * this is in line with GetNewTransactionId's behaviour. - */ - if (!MultiXactState->oldestOffsetKnown || - (MultiXactState->nextOffset - MultiXactState->oldestOffset - > MULTIXACT_MEMBER_SAFE_THRESHOLD)) - { - /* - * To avoid swamping the postmaster with signals, we issue the autovac - * request only when crossing a segment boundary. With default - * compilation settings that's roughly after 50k members. This still - * gives plenty of chances before we get into real trouble. - */ - if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) != - (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT)) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - } - - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, - nextOffset, - nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) - ereport(WARNING, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used", - "database with OID %u must be vacuumed before %d more multixact members are used", - MultiXactState->offsetStopLimit - nextOffset + nmembers, - MultiXactState->oldestMultiXactDB, - MultiXactState->offsetStopLimit - nextOffset + nmembers), - errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); - ExtendMultiXactMember(nextOffset, nmembers); /* @@ -1358,7 +1199,10 @@ retry: offptr += entryno; offset = *offptr; - Assert(offset != 0); + if (offset == 0) + ereport(ERROR, + (errmsg("found invalid zero offset in multixact %llu", + (unsigned long long) multi))); /* * Use the same increment rule as GetNewMultiXactId(), that is, don't @@ -1405,7 +1249,7 @@ retry: LWLockRelease(MultiXactOffsetSLRULock); - ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember)); + ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember)); /* Now get the members themselves. */ LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -1415,7 +1259,7 @@ retry: for (i = 0; i < length; i++, offset++) { TransactionId *xactptr; - uint32 *flagsptr; + uint64 *flagsptr; int flagsoff; int bshift; int memberoff; @@ -1441,7 +1285,7 @@ retry: flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); - flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; @@ -2228,47 +2072,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup) { MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - MultiXactId curMulti; - bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); - /* - * We pretend that a wrap will happen halfway through the multixact ID - * space, but that's not really true, because multixacts wrap differently - * from transaction IDs. Note that, separately from any concern about - * multixact IDs wrapping, we must ensure that multixact members do not - * wrap. Limits for that are set in SetOffsetVacuumLimit, not here. - */ - multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1); - if (multiWrapLimit < FirstMultiXactId) - multiWrapLimit += FirstMultiXactId; - - /* - * We'll refuse to continue assigning MultiXactIds once we get within 3M - * multi of data loss. See SetTransactionIdLimit. - */ - multiStopLimit = multiWrapLimit - 3000000; - if (multiStopLimit < FirstMultiXactId) - multiStopLimit -= FirstMultiXactId; - - /* - * We'll start complaining loudly when we get within 40M multis of data - * loss. This is kind of arbitrary, but if you let your gas gauge get - * down to 2% of full, would you be looking for the next gas station? We - * need to be fairly liberal about this number because there are lots of - * scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - multiWarnLimit = multiWrapLimit - 40000000; - if (multiWarnLimit < FirstMultiXactId) - multiWarnLimit -= FirstMultiXactId; - /* * We'll start trying to force autovacuums when oldest_datminmxid gets to * be more than autovacuum_multixact_freeze_max_age mxids old. @@ -2278,25 +2084,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, * its value. See SetTransactionIdLimit. */ multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age; - if (multiVacLimit < FirstMultiXactId) - multiVacLimit += FirstMultiXactId; /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; MultiXactState->oldestMultiXactDB = oldest_datoid; MultiXactState->multiVacLimit = multiVacLimit; - MultiXactState->multiWarnLimit = multiWarnLimit; - MultiXactState->multiStopLimit = multiStopLimit; - MultiXactState->multiWrapLimit = multiWrapLimit; - curMulti = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u", - multiWrapLimit, oldest_datoid))); - /* * Computing the actual limits is only possible once the data directory is * in a consistent state. There's no need to compute the limits while @@ -2308,59 +2103,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, return; Assert(!InRecovery); - - /* Set limits for offset vacuum. */ - needs_offset_vacuum = SetOffsetVacuumLimit(is_startup); - - /* - * If past the autovacuum force point, immediately signal an autovac - * request. The reason for this is that autovac only processes one - * database per invocation. Once it's finished cleaning up the oldest - * database, it'll call here, and we'll signal the postmaster to start - * another iteration immediately if there are still any old databases. - */ - if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - needs_offset_vacuum) && IsUnderPostmaster) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (MultiXactIdPrecedes(multiWarnLimit, curMulti)) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datname, - multiWrapLimit - curMulti), - errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datoid, - multiWrapLimit - curMulti), - errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } /* @@ -2458,7 +2200,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) { int flagsoff; int flagsbit; - uint32 difference; + uint64 difference; /* * Only zero when at first entry of a page. @@ -2479,23 +2221,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) LWLockRelease(MultiXactMemberSLRULock); } - /* - * Compute the number of items till end of current page. Careful: if - * addition of unsigned ints wraps around, we're at the last page of - * the last segment; since that page holds a different number of items - * than other pages, we need to do it differently. - */ - if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset) - { - /* - * This is the last page of the last segment; we can compute the - * number of items left to allocate in it without modulo - * arithmetic. - */ - difference = MaxMultiXactOffset - offset + 1; - } - else - difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; + difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; /* * Advance to next page, taking care to properly handle the wraparound @@ -2560,184 +2286,6 @@ GetOldestMultiXactId(void) return oldestMXact; } -/* - * Determine how aggressively we need to vacuum in order to prevent member - * wraparound. - * - * To do so determine what's the oldest member offset and install the limit - * info in MultiXactState, where it can be used to prevent overrun of old data - * in the members SLRU area. - * - * The return value is true if emergency autovacuum is required and false - * otherwise. - */ -static bool -SetOffsetVacuumLimit(bool is_startup) -{ - MultiXactId oldestMultiXactId; - MultiXactId nextMXact; - MultiXactOffset oldestOffset = 0; /* placate compiler */ - MultiXactOffset prevOldestOffset; - MultiXactOffset nextOffset; - bool oldestOffsetKnown = false; - bool prevOldestOffsetKnown; - MultiXactOffset offsetStopLimit = 0; - MultiXactOffset prevOffsetStopLimit; - - /* - * NB: Have to prevent concurrent truncation, we might otherwise try to - * lookup an oldestMulti that's concurrently getting truncated away. - */ - LWLockAcquire(MultiXactTruncationLock, LW_SHARED); - - /* Read relevant fields from shared memory. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMXact = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; - prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown; - prevOldestOffset = MultiXactState->oldestOffset; - prevOffsetStopLimit = MultiXactState->offsetStopLimit; - Assert(MultiXactState->finishedStartup); - LWLockRelease(MultiXactGenLock); - - /* - * Determine the offset of the oldest multixact. Normally, we can read - * the offset from the multixact itself, but there's an important special - * case: if there are no multixacts in existence at all, oldestMXact - * obviously can't point to one. It will instead point to the multixact - * ID that will be assigned the next time one is needed. - */ - if (oldestMultiXactId == nextMXact) - { - /* - * When the next multixact gets created, it will be stored at the next - * offset. - */ - oldestOffset = nextOffset; - oldestOffsetKnown = true; - } - else - { - /* - * Figure out where the oldest existing multixact's offsets are - * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, - * the supposedly-earliest multixact might not really exist. We are - * careful not to fail in that case. - */ - oldestOffsetKnown = - find_multixact_start(oldestMultiXactId, &oldestOffset); - - if (oldestOffsetKnown) - ereport(DEBUG1, - (errmsg_internal("oldest MultiXactId member is at offset %llu", - (unsigned long long) oldestOffset))); - else - ereport(LOG, - (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %llu does not exist on disk", - (unsigned long long) oldestMultiXactId))); - } - - LWLockRelease(MultiXactTruncationLock); - - /* - * If we can, compute limits (and install them MultiXactState) to prevent - * overrun of old data in the members SLRU area. We can only do so if the - * oldest offset is known though. - */ - if (oldestOffsetKnown) - { - /* move back to start of the corresponding segment */ - offsetStopLimit = oldestOffset - (oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); - - /* always leave one segment before the wraparound point */ - offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); - - if (!prevOldestOffsetKnown && !is_startup) - ereport(LOG, - (errmsg("MultiXact member wraparound protections are now enabled"))); - - ereport(DEBUG1, - (errmsg_internal("MultiXact member stop limit is now %llu based on MultiXact %llu", - (unsigned long long) offsetStopLimit, - (unsigned long long) oldestMultiXactId))); - } - else if (prevOldestOffsetKnown) - { - /* - * If we failed to get the oldest offset this time, but we have a - * value from a previous pass through this function, use the old - * values rather than automatically forcing an emergency autovacuum - * cycle again. - */ - oldestOffset = prevOldestOffset; - oldestOffsetKnown = true; - offsetStopLimit = prevOffsetStopLimit; - } - - /* Install the computed values */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - MultiXactState->oldestOffset = oldestOffset; - MultiXactState->oldestOffsetKnown = oldestOffsetKnown; - MultiXactState->offsetStopLimit = offsetStopLimit; - LWLockRelease(MultiXactGenLock); - - /* - * Do we need an emergency autovacuum? If we're not sure, assume yes. - */ - return !oldestOffsetKnown || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); -} - -/* - * Return whether adding "distance" to "start" would move past "boundary". - * - * We use this to determine whether the addition is "wrapping around" the - * boundary point, hence the name. The reason we don't want to use the regular - * 2^31-modulo arithmetic here is that we want to be able to use the whole of - * the 2^32-1 space here, allowing for more multixacts than would fit - * otherwise. - */ -static bool -MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, - uint32 distance) -{ - MultiXactOffset finish; - - /* - * Note that offset number 0 is not used (see GetMultiXactIdMembers), so - * if the addition wraps around the UINT_MAX boundary, skip that value. - */ - finish = start + distance; - if (finish < start) - finish++; - - /*----------------------------------------------------------------------- - * When the boundary is numerically greater than the starting point, any - * value numerically between the two is not wrapped: - * - * <----S----B----> - * [---) = F wrapped past B (and UINT_MAX) - * [---) = F not wrapped - * [----] = F wrapped past B - * - * When the boundary is numerically less than the starting point (i.e. the - * UINT_MAX wraparound occurs somewhere in between) then all values in - * between are wrapped: - * - * <----B----S----> - * [---) = F not wrapped past B (but wrapped past UINT_MAX) - * [---) = F wrapped past B (and UINT_MAX) - * [----] = F not wrapped - *----------------------------------------------------------------------- - */ - if (start < boundary) - return finish >= boundary || finish < start; - else - return finish >= boundary && finish < start; -} - /* * Find the starting offset of the given MultiXactId. * @@ -2781,93 +2329,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result) return true; } -/* - * Determine how many multixacts, and how many multixact members, currently - * exist. Return false if unable to determine. - */ -static bool -ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) -{ - MultiXactOffset nextOffset; - MultiXactOffset oldestOffset; - MultiXactId oldestMultiXactId; - MultiXactId nextMultiXactId; - bool oldestOffsetKnown; - - LWLockAcquire(MultiXactGenLock, LW_SHARED); - nextOffset = MultiXactState->nextOffset; - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMultiXactId = MultiXactState->nextMXact; - oldestOffset = MultiXactState->oldestOffset; - oldestOffsetKnown = MultiXactState->oldestOffsetKnown; - LWLockRelease(MultiXactGenLock); - - if (!oldestOffsetKnown) - return false; - - *members = nextOffset - oldestOffset; - *multixacts = nextMultiXactId - oldestMultiXactId; - return true; -} - -/* - * Multixact members can be removed once the multixacts that refer to them - * are older than every datminmxid. autovacuum_multixact_freeze_max_age and - * vacuum_multixact_freeze_table_age work together to make sure we never have - * too many multixacts; we hope that, at least under normal circumstances, - * this will also be sufficient to keep us from using too many offsets. - * However, if the average multixact has many members, we might exhaust the - * members space while still using few enough members that these limits fail - * to trigger relminmxid advancement by VACUUM. At that point, we'd have no - * choice but to start failing multixact-creating operations with an error. - * - * To prevent that, if more than a threshold portion of the members space is - * used, we effectively reduce autovacuum_multixact_freeze_max_age and - * to a value just less than the number of multixacts in use. We hope that - * this will quickly trigger autovacuuming on the table or tables with the - * oldest relminmxid, thus allowing datminmxid values to advance and removing - * some members. - * - * As the fraction of the member space currently in use grows, we become - * more aggressive in clamping this value. That not only causes autovacuum - * to ramp up, but also makes any manual vacuums the user issues more - * aggressive. This happens because vacuum_get_cutoffs() will clamp the - * freeze table and the minimum freeze age cutoffs based on the effective - * autovacuum_multixact_freeze_max_age this function returns. In the worst - * case, we'll claim the freeze_max_age to zero, and every vacuum of any - * table will freeze every multixact. - */ -int -MultiXactMemberFreezeThreshold(void) -{ - MultiXactOffset members; - uint32 multixacts; - uint32 victim_multixacts; - double fraction; - - /* If we can't determine member space utilization, assume the worst. */ - if (!ReadMultiXactCounts(&multixacts, &members)) - return 0; - - /* If member space utilization is low, no special action is required. */ - if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) - return autovacuum_multixact_freeze_max_age; - - /* - * Compute a target for relminmxid advancement. The number of multixacts - * we try to eliminate from the system is based on how far we are past - * MULTIXACT_MEMBER_SAFE_THRESHOLD. - */ - fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / - (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); - victim_multixacts = multixacts * fraction; - - /* fraction could be > 1.0, but lowest possible freeze age is zero */ - if (victim_multixacts > multixacts) - return 0; - return multixacts - victim_multixacts; -} - typedef struct mxtruncinfo { int64 earliestExistingPage; @@ -2894,35 +2355,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data /* * Delete members segments [oldest, newOldest) - * - * The members SLRU can, in contrast to the offsets one, be filled to almost - * the full range at once. This means SimpleLruTruncate() can't trivially be - * used - instead the to-be-deleted range is computed using the offsets - * SLRU. C.f. TruncateMultiXact(). */ static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset) { - const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset); - int startsegment = MXOffsetToMemberSegment(oldestOffset); - int endsegment = MXOffsetToMemberSegment(newOldestOffset); - int segment = startsegment; - - /* - * Delete all the segments but the last one. The last segment can still - * contain, possibly partially, valid data. - */ - while (segment != endsegment) - { - elog(DEBUG2, "truncating multixact members segment %x", segment); - SlruDeleteSegment(MultiXactMemberCtl, segment); - - /* move to next segment, handling wraparound correctly */ - if (segment == maxsegment) - segment = 0; - else - segment += 1; - } + SimpleLruTruncate(MultiXactMemberCtl, + MXOffsetToMemberPage(newOldestOffset)); } /* @@ -3147,7 +2585,7 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2) /* * Decide whether a MultiXactMember page number is "older" for truncation - * purposes. There is no "invalid offset number" so use the numbers verbatim. + * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2) @@ -3172,7 +2610,7 @@ MultiXactMemberPagePrecedes(int64 page1, int64 page2) bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff < 0); } @@ -3186,7 +2624,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff <= 0); } @@ -3198,7 +2636,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) { - int32 diff = (int32) (offset1 - offset2); + int64 diff = (int64) (offset1 - offset2); return (diff < 0); } diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index ce1730740a..89669fbfb4 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1453,7 +1453,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) * must not assign. */ lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ - rhs = lhs + (1U << 31); + rhs = lhs + (1ULL << 63); Assert(TransactionIdPrecedes(lhs, rhs)); Assert(TransactionIdPrecedes(rhs, lhs)); Assert(!TransactionIdPrecedes(lhs - 1, rhs)); @@ -1469,13 +1469,14 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) - || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + || (1ULL << 63) % per_page != 0); /* See CommitTsPagePrecedes() */ Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) - || (1U << 31) % per_page != 0); + || (1ULL << 63) % per_page != 0); Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + /* * GetNewTransactionId() has assigned the last XID it can safely use, and * that XID is in the *LAST* page of the second segment. We must not @@ -1485,7 +1486,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - @@ -1501,7 +1502,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index f45836192b..39260bf64b 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -222,11 +222,14 @@ void BootStrapSUBTRANS(void) { int slotno; + int64 pageno; + + pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid)); LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE); /* Create and zero the first page of the subtrans log */ - slotno = ZeroSUBTRANSPage(0); + slotno = ZeroSUBTRANSPage(pageno); /* Make sure it's written out */ SimpleLruWritePage(SubTransCtl, slotno); @@ -279,9 +282,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID) { (void) ZeroSUBTRANSPage(startPage); startPage++; - /* must account for wraparound */ - if (startPage > TransactionIdToPage(MaxTransactionId)) - startPage = 0; } (void) ZeroSUBTRANSPage(startPage); @@ -358,6 +358,7 @@ TruncateSUBTRANS(TransactionId oldestXact) * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ + TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index beb7d54f4d..de3b30cec8 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -281,14 +281,14 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) { /* * If either ID is a permanent XID then we can just do unsigned - * comparison. If both are normal, do a modulo-2^32 comparison. + * comparison. If both are normal, do a modulo-2^64 comparison. */ - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 < id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff < 0); } @@ -298,12 +298,12 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 <= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff <= 0); } @@ -313,12 +313,12 @@ TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) bool TransactionIdFollows(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 > id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff > 0); } @@ -328,12 +328,12 @@ TransactionIdFollows(TransactionId id1, TransactionId id2) bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 >= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff >= 0); } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 6007fd50f0..205d577584 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -477,8 +477,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->lxid = xid; proc->backendId = InvalidBackendId; } - proc->xid = xid; - Assert(proc->xmin == InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, xid); + Assert(pg_atomic_read_u64(&proc->xmin) == InvalidTransactionId); proc->delayChkptFlags = 0; proc->statusFlags = 0; proc->pid = 0; @@ -793,7 +793,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS) * Form tuple with appropriate data. */ - values[0] = TransactionIdGetDatum(proc->xid); + values[0] = TransactionIdGetDatum(pg_atomic_read_u64(&proc->xid)); values[1] = CStringGetTextDatum(gxact->gid); values[2] = TimestampTzGetDatum(gxact->prepared_at); values[3] = ObjectIdGetDatum(gxact->owner); @@ -943,46 +943,8 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) /* State file support */ /************************************************************************/ -/* - * Compute the FullTransactionId for the given TransactionId. - * - * The wrap logic is safe here because the span of active xids cannot exceed one - * epoch at any given time. - */ -static inline FullTransactionId -AdjustToFullTransactionId(TransactionId xid) -{ - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 epoch; - - Assert(TransactionIdIsValid(xid)); - - LWLockAcquire(XidGenLock, LW_SHARED); - nextFullXid = ShmemVariableCache->nextXid; - LWLockRelease(XidGenLock); - - nextXid = XidFromFullTransactionId(nextFullXid); - epoch = EpochFromFullTransactionId(nextFullXid); - if (unlikely(xid > nextXid)) - { - /* Wraparound occurred, must be from a prev epoch. */ - Assert(epoch > 0); - epoch--; - } - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - -static inline int -TwoPhaseFilePath(char *path, TransactionId xid) -{ - FullTransactionId fxid = AdjustToFullTransactionId(xid); - - return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", - EpochFromFullTransactionId(fxid), - XidFromFullTransactionId(fxid)); -} +#define TwoPhaseFilePath(path, xid) \ + snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%016llX", (unsigned long long) xid) /* * 2PC state file format: @@ -1925,11 +1887,9 @@ restoreTwoPhaseData(void) strspn(clde->d_name, "0123456789ABCDEF") == 16) { TransactionId xid; - FullTransactionId fxid; char *buf; - fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); - xid = XidFromFullTransactionId(fxid); + xid = (TransactionId) strtou64(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); @@ -2261,7 +2221,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { - /* Read and validate file */ buf = ReadTwoPhaseFile(xid, false); } else diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 4be1055c1a..b8eefe7d3f 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -66,9 +66,9 @@ GetNewTransactionId(bool isSubXact) if (IsBootstrapProcessingMode()) { Assert(!isSubXact); - MyProc->xid = BootstrapTransactionId; - ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId; - return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId); + pg_atomic_write_u64(&MyProc->xid, BootstrapTransactionId); + pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], BootstrapTransactionId); + return FullTransactionIdFromXid(BootstrapTransactionId); } /* safety check, we should never get this far in a HS standby */ @@ -80,19 +80,6 @@ GetNewTransactionId(bool isSubXact) full_xid = ShmemVariableCache->nextXid; xid = XidFromFullTransactionId(full_xid); - /*---------- - * Check to see if it's safe to assign another XID. This protects against - * catastrophic data loss due to XID wraparound. The basic rules are: - * - * If we're past xidVacLimit, start trying to force autovacuum cycles. - * If we're past xidWarnLimit, start issuing warnings. - * If we're past xidStopLimit, refuse to execute transactions, unless - * we are running in single-user mode (which gives an escape hatch - * to the DBA who somehow got past the earlier defenses). - * - * Note that this coding also appears in GetNewMultiXactId. - *---------- - */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit)) { /* @@ -102,11 +89,6 @@ GetNewTransactionId(bool isSubXact) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; - TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; - TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; - Oid oldest_datoid = ShmemVariableCache->oldestXidDB; - LWLockRelease(XidGenLock); /* @@ -117,48 +99,6 @@ GetNewTransactionId(bool isSubXact) if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, xidStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new XIDs to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new XIDs to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %llu transactions", - oldest_datname, - (unsigned long long) xidWrapLimit - xid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %llu transactions", - oldest_datoid, - (unsigned long long) xidWrapLimit - xid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); full_xid = ShmemVariableCache->nextXid; @@ -228,8 +168,8 @@ GetNewTransactionId(bool isSubXact) Assert(!MyProc->subxidStatus.overflowed); /* LWLockRelease acts as barrier */ - MyProc->xid = xid; - ProcGlobal->xids[MyProc->pgxactoff] = xid; + pg_atomic_write_u64(&MyProc->xid, xid); + pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], xid); } else { @@ -270,7 +210,7 @@ ReadNextFullTransactionId(void) } /* - * Advance nextXid to the value after a given xid. The epoch is inferred. + * Advance nextXid to the value after a given xid. * This must only be called during recovery or from two-phase start-up code. */ void @@ -278,7 +218,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) { FullTransactionId newNextFullXid; TransactionId next_xid; - uint32 epoch; /* * It is safe to read nextXid without a lock, because this is only called @@ -292,19 +231,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) if (!TransactionIdFollowsOrEquals(xid, next_xid)) return; - /* - * Compute the FullTransactionId that comes after the given xid. To do - * this, we preserve the existing epoch, but detect when we've wrapped - * into a new epoch. This is necessary because WAL records and 2PC state - * currently contain 32 bit xids. The wrap logic is safe in those cases - * because the span of active xids cannot exceed one epoch at any given - * point in the WAL stream. - */ + /* Compute the FullTransactionId that comes after the given xid. */ TransactionIdAdvance(xid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - if (unlikely(xid < next_xid)) - ++epoch; - newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid); + newNextFullXid = FullTransactionIdFromXid(xid); /* * We still need to take a lock to modify the value when there are @@ -345,61 +274,14 @@ void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; - TransactionId xidWarnLimit; - TransactionId xidStopLimit; - TransactionId xidWrapLimit; TransactionId curXid; Assert(TransactionIdIsNormal(oldest_datfrozenxid)); - /* - * The place where we actually get into deep trouble is halfway around - * from the oldest potentially-existing XID. (This calculation is - * probably off by one or two counts, because the special XIDs reduce the - * size of the loop a little bit. But we throw in plenty of slop below, - * so it doesn't matter.) - */ - xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1); - if (xidWrapLimit < FirstNormalTransactionId) - xidWrapLimit += FirstNormalTransactionId; - - /* - * We'll refuse to continue assigning XIDs in interactive mode once we get - * within 3M transactions of data loss. This leaves lots of room for the - * DBA to fool around fixing things in a standalone backend, while not - * being significant compared to total XID space. (VACUUM requires an XID - * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA - * might do by reflex, assigns an XID. Hence, we had better be sure - * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two - * completely-idle segments. In the event of edge-case bugs involving - * page or segment arithmetic, idle segments render the bugs unreachable - * outside of single-user mode. - */ - xidStopLimit = xidWrapLimit - 3000000; - if (xidStopLimit < FirstNormalTransactionId) - xidStopLimit -= FirstNormalTransactionId; - - /* - * We'll start complaining loudly when we get within 40M transactions of - * data loss. This is kind of arbitrary, but if you let your gas gauge - * get down to 2% of full, would you be looking for the next gas station? - * We need to be fairly liberal about this number because there are lots - * of scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - xidWarnLimit = xidWrapLimit - 40000000; - if (xidWarnLimit < FirstNormalTransactionId) - xidWarnLimit -= FirstNormalTransactionId; - /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. * - * Note: guc.c ensures that autovacuum_freeze_max_age is in a sane range, - * so that xidVacLimit will be well before xidWarnLimit. - * * Note: autovacuum_freeze_max_age is a PGC_POSTMASTER parameter so that * we don't have to worry about dealing with on-the-fly changes in its * value. It doesn't look practical to update shared state from a GUC @@ -416,18 +298,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->oldestXid = oldest_datfrozenxid; ShmemVariableCache->xidVacLimit = xidVacLimit; - ShmemVariableCache->xidWarnLimit = xidWarnLimit; - ShmemVariableCache->xidStopLimit = xidStopLimit; - ShmemVariableCache->xidWrapLimit = xidWrapLimit; ShmemVariableCache->oldestXidDB = oldest_datoid; curXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); LWLockRelease(XidGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("transaction ID wrap limit is %llu, limited by database with OID %u", - (unsigned long long) xidWrapLimit, oldest_datoid))); - /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -438,41 +312,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %llu transactions", - oldest_datname, - (unsigned long long) xidWrapLimit - curXid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %llu transactions", - oldest_datoid, - (unsigned long long) xidWrapLimit - curXid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 8d7427115b..483afac8b7 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5714,6 +5714,17 @@ XactLogCommitRecord(TimestampTz commit_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS; @@ -5733,16 +5744,6 @@ XactLogCommitRecord(TimestampTz commit_time, xl_invals.nmsgs = nmsgs; } - if (TransactionIdIsValid(twophase_xid)) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; - Assert(twophase_gid != NULL); - - if (XLogLogicalInfoActive()) - xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; - } - /* dump transaction origin information */ if (replorigin_session_origin != InvalidRepOriginId) { @@ -5863,6 +5864,17 @@ XactLogAbortRecord(TimestampTz abort_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS; @@ -5879,7 +5891,8 @@ XactLogAbortRecord(TimestampTz abort_time, if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); Assert(twophase_gid != NULL); if (XLogLogicalInfoActive()) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2d603d8dee..6613563aff 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4805,8 +4805,7 @@ BootStrapXLOG(void) checkPoint.ThisTimeLineID = BootstrapTimeLineID; checkPoint.PrevTimeLineID = BootstrapTimeLineID; checkPoint.fullPageWrites = fullPageWrites; - checkPoint.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + checkPoint.nextXid = FullTransactionIdFromXid(FirstNormalTransactionId); checkPoint.nextOid = FirstGenbkiObjectId; checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; @@ -6945,7 +6944,7 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -7976,7 +7975,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -8037,7 +8036,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index e4aaa551a0..e84261684c 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -274,6 +274,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) BufferGetTag(buffer, ®buf->rlocator, ®buf->forkno, ®buf->block); regbuf->page = BufferGetPage(buffer); regbuf->flags = flags; + if (IsBufferConverted(buffer)) + { + regbuf->flags |= REGBUF_CONVERTED; + MarkBufferConverted(buffer, false); + } regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head; regbuf->rdata_len = 0; @@ -607,6 +612,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, needs_backup = true; else if (regbuf->flags & REGBUF_NO_IMAGE) needs_backup = false; + else if (regbuf->flags & REGBUF_CONVERTED) + needs_backup = true; else if (!doPageWrites) needs_backup = false; else diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index e0baa86bd3..5cbd428e52 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -2134,37 +2134,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) return true; } - -#ifndef FRONTEND - -/* - * Extract the FullTransactionId from a WAL record. - */ -FullTransactionId -XLogRecGetFullXid(XLogReaderState *record) -{ - TransactionId xid, - next_xid; - uint32 epoch; - - /* - * This function is only safe during replay, because it depends on the - * replay state. See AdvanceNextFullTransactionIdPastXid() for more. - */ - Assert(AmStartupProcess() || !IsUnderPostmaster); - - xid = XLogRecGetXid(record); - next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - - /* - * If xid is numerically greater than next_xid, it has to be from the last - * epoch. - */ - if (unlikely(xid > next_xid)) - --epoch; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - -#endif diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index e01dca9b7c..16de61f5ed 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -120,7 +120,7 @@ static const struct typinfo TypInfo[] = { F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid, F_TIDIN, F_TIDOUT}, - {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_XID, TYPSTORAGE_PLAIN, InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_CIDIN, F_CIDOUT}, diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 7224d96695..3313d309ee 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -159,8 +159,8 @@ static const FormData_pg_attribute a2 = { .attnum = MinTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, @@ -187,8 +187,8 @@ static const FormData_pg_attribute a4 = { .attnum = MaxTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index da969bd2f9..ac6fd7cc2a 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -146,7 +146,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached, TransactionId xmin; Snapshot snap; - xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data); + xmin = HeapTupleGetXmin(inheritsTuple); snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 264f25a8f9..ff60d303e1 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -186,7 +186,7 @@ typedef struct AsyncQueueEntry } AsyncQueueEntry; /* Currently, no field of AsyncQueueEntry requires more than int alignment */ -#define QUEUEALIGN(len) INTALIGN(len) +#define QUEUEALIGN(len) TYPEALIGN(8, len) #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index ae38f83024..246220a1de 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -130,7 +130,8 @@ static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath); static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, - List *rlocatorlist, Snapshot snapshot); + List *rlocatorlist, Snapshot snapshot, + bool is_toast); static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid, char *srcpath); @@ -308,9 +309,10 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) } /* Append relevant pg_class tuples for current page to rlocatorlist. */ + /* No toast is expected in sys tables */ rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid, srcpath, rlocatorlist, - snapshot); + snapshot, false); UnlockReleaseBuffer(buf); } @@ -328,7 +330,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) static List * ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, List *rlocatorlist, - Snapshot snapshot) + Snapshot snapshot, bool is_toast) { BlockNumber blkno = BufferGetBlockNumber(buf); OffsetNumber offnum; @@ -358,6 +360,7 @@ ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationRelationId; + HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast); /* Skip tuples that are not visible to this snapshot. */ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf)) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index e56205abd8..d0aac5d080 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1760,7 +1760,7 @@ DefineIndex(Oid tableId, set_indexsafe_procflags(); /* We should now definitely not be advertising any xmin. */ - Assert(MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); /* * The index is now valid in the sense that it contains all currently @@ -4433,8 +4433,8 @@ set_indexsafe_procflags(void) * This should only be called before installing xid or xmin in MyProc; * otherwise, concurrent processes could see an Xmin that moves backwards. */ - Assert(MyProc->xid == InvalidTransactionId && - MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xid) == InvalidTransactionId && + pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_IN_SAFE_IC; diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index da2ace79cc..47e7b879ea 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -48,6 +48,25 @@ #include "utils/syscache.h" #include "utils/varlena.h" +static inline void +SeqTupleSetXmin(HeapTuple htup, TransactionId xid) +{ + htup->t_xmin = xid; + htup->t_data->t_choice.t_heap.t_xmin = xid; +} + +static inline void +SeqTupleSetXmax(HeapTuple htup, TransactionId xid) +{ + htup->t_xmin = xid; + htup->t_data->t_choice.t_heap.t_xmax = xid; +} + +static inline TransactionId +SeqTupleHeaderGetRawXmax(HeapTupleHeader htup) +{ + return htup->t_choice.t_heap.t_xmax; +} /* * We don't want to log each fetching of a value from a sequence, @@ -383,10 +402,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); - HeapTupleHeaderSetXminFrozen(tuple->t_data); + SeqTupleSetXmin(tuple, FrozenTransactionId); + HeapTupleHeaderStoreXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + SeqTupleSetXmax(tuple, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); @@ -1208,6 +1227,7 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) /* Note we currently only bother to set these two fields of *seqdatatuple */ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); seqdatatuple->t_len = ItemIdGetLength(lp); + HeapTupleCopyHeaderXids(seqdatatuple); /* * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on @@ -1218,9 +1238,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + if (SeqTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + SeqTupleSetXmax(seqdatatuple, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 087ac3e3b4..3931c075ee 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -66,12 +66,12 @@ /* * GUC parameters */ -int vacuum_freeze_min_age; -int vacuum_freeze_table_age; -int vacuum_multixact_freeze_min_age; -int vacuum_multixact_freeze_table_age; -int vacuum_failsafe_age; -int vacuum_multixact_failsafe_age; +int64 vacuum_freeze_min_age; +int64 vacuum_freeze_table_age; +int64 vacuum_multixact_freeze_min_age; +int64 vacuum_multixact_freeze_table_age; +int64 vacuum_failsafe_age; +int64 vacuum_multixact_failsafe_age; /* * Variables for cost-based vacuum delay. The defaults differ between @@ -1075,7 +1075,7 @@ bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs) { - int freeze_min_age, + int64 freeze_min_age, multixact_freeze_min_age, freeze_table_age, multixact_freeze_table_age, @@ -1125,7 +1125,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Almost ready to set freeze output parameters; check if OldestXmin or @@ -1484,6 +1484,9 @@ vac_update_relstats(Relation relation, futurexid = false; if (frozenxid_updated) *frozenxid_updated = false; + + Assert(TransactionIdPrecedesOrEquals(frozenxid, ReadNextTransactionId())); + if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid) { bool update = false; @@ -1507,6 +1510,9 @@ vac_update_relstats(Relation relation, futuremxid = false; if (minmulti_updated) *minmulti_updated = false; + + Assert(MultiXactIdPrecedesOrEquals(minmulti, ReadNextMultiXactId())); + if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti) { bool update = false; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 24c2b60c62..56d09d28c3 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3196,6 +3196,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = tuphdr; /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 16704c0c2f..5d299e0083 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1047,6 +1047,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull) tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = tuple; result = heap_getattr(&tmptup, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index b16fbe9e22..2981dc4934 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3783,6 +3783,7 @@ ExecModifyTable(PlanState *pstate) HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ + HeapTupleCopyHeaderXids(&oldtupdata); oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 0e46c59d25..88846e4fd2 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1154,6 +1154,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(mtuple, tuple); } else { diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 72c7963578..e32335ea3c 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1027,14 +1027,14 @@ _read${n}(void) elsif ($t eq 'uint32' || $t eq 'bits32' || $t eq 'BlockNumber' - || $t eq 'Index' - || $t eq 'SubTransactionId') + || $t eq 'Index') { print $off "\tWRITE_UINT_FIELD($f);\n"; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; } elsif ($t eq 'uint64' - || $t eq 'AclMode') + || $t eq 'AclMode' + || $t eq 'SubTransactionId') { print $off "\tWRITE_UINT64_FIELD($f);\n"; print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7159c775fb..825d23a18e 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -243,7 +243,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple), TransactionXmin)) { root->glob->transientPlan = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 3e037248d6..a508d5d694 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -123,8 +123,8 @@ int autovacuum_vac_ins_thresh; double autovacuum_vac_ins_scale; int autovacuum_anl_thresh; double autovacuum_anl_scale; -int autovacuum_freeze_max_age; -int autovacuum_multixact_freeze_max_age; +int64 autovacuum_freeze_max_age; +int64 autovacuum_multixact_freeze_max_age; double autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; @@ -159,10 +159,10 @@ static TransactionId recentXid; static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ -static int default_freeze_min_age; -static int default_freeze_table_age; -static int default_multixact_freeze_min_age; -static int default_multixact_freeze_table_age; +static int64 default_freeze_min_age; +static int64 default_freeze_table_age; +static int64 default_multixact_freeze_min_age; +static int64 default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; @@ -338,15 +338,15 @@ static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age); + int64 effective_multixact_freeze_max_age); static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1143,6 +1143,7 @@ do_start_worker(void) ListCell *cell; TransactionId xidForceLimit; MultiXactId multiForceLimit; + int64 multiMembersThreshold; bool for_xid_wrap; bool for_multi_wrap; avw_dbase *avdb; @@ -1179,17 +1180,18 @@ do_start_worker(void) * particular tables, but not loosened.) */ recentXid = ReadNextTransactionId(); - xidForceLimit = recentXid - autovacuum_freeze_max_age; - /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ - /* this can cause the limit to go backwards by 3, but that's OK */ - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + xidForceLimit = recentXid - autovacuum_freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + multiMembersThreshold = autovacuum_multixact_freeze_max_age; + if (recentMulti > FirstMultiXactId + multiMembersThreshold) + multiForceLimit = recentMulti - multiMembersThreshold; + else + multiForceLimit = FirstMultiXactId; /* * Choose a database to connect to. We pick the database that was least @@ -2010,7 +2012,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; - int effective_multixact_freeze_max_age; + int64 effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; @@ -2033,7 +2035,7 @@ do_autovacuum(void) * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Find the pg_database entry and select the default freeze ages. We use @@ -2811,7 +2813,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age) + int64 effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2850,10 +2852,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, /* OK, it needs something done */ if (doanalyze || dovacuum) { - int freeze_min_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_table_age; int log_min_duration; /* @@ -2951,7 +2953,7 @@ static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound) @@ -3013,7 +3015,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -3042,8 +3044,8 @@ relation_needs_vacanalyze(Oid relid, anltuples; /* freeze parameters */ - int freeze_max_age; - int multixact_freeze_max_age; + int64 freeze_max_age; + int64 multixact_freeze_max_age; TransactionId xidForceLimit; MultiXactId multiForceLimit; @@ -3093,17 +3095,19 @@ relation_needs_vacanalyze(Oid relid, av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ - xidForceLimit = recentXid - freeze_max_age; - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + freeze_max_age) + xidForceLimit = recentXid - freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) && TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit)); if (!force_vacuum) { - multiForceLimit = recentMulti - multixact_freeze_max_age; - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + if (recentMulti > FirstMultiXactId + multixact_freeze_max_age) + multiForceLimit = recentMulti - multixact_freeze_max_age; + else + multiForceLimit = FirstMultiXactId; force_vacuum = MultiXactIdIsValid(classForm->relminmxid) && MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 1237118e84..ffc4ead751 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -902,8 +902,14 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileLocator target_locator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_insert *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + + xlrec = (xl_heap_insert *) rec_data; /* * Ignore insert records without new tuples (this does happen when @@ -959,8 +965,13 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferChange *change; char *data; RelFileLocator target_locator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_update *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + xlrec = (xl_heap_update *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL); @@ -1120,8 +1131,13 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) char *tupledata; Size tuplelen; RelFileLocator rlocator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + xlrec = (xl_heap_multi_insert *) rec_data; /* * Ignore insert records without new tuples. This happens when a @@ -1178,6 +1194,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * We can only figure this out after reassembling the transactions. */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple->tuple); tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; @@ -1269,6 +1286,7 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple) /* we can only figure this out after reassembling the transactions */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple->tuple); /* data is not stored aligned, copy to aligned storage */ memcpy((char *) &xlhdr, diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index aa471dccdf..21cd2a5202 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -64,7 +64,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn) /* fixed fields */ pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); } /* @@ -78,7 +78,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data) if (begin_data->final_lsn == InvalidXLogRecPtr) elog(ERROR, "final_lsn not set in begin message"); begin_data->committime = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); } @@ -132,7 +132,7 @@ logicalrep_write_begin_prepare(StringInfo out, ReorderBufferTXN *txn) pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -152,7 +152,7 @@ logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_da if (begin_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn not set in begin prepare message"); begin_data->prepare_time = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(begin_data->gid, pq_getmsgstring(in), sizeof(begin_data->gid)); @@ -185,7 +185,7 @@ logicalrep_write_prepare_common(StringInfo out, LogicalRepMsgType type, pq_sendint64(out, prepare_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -224,7 +224,7 @@ logicalrep_read_prepare_common(StringInfo in, char *msgtype, if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in %s message", msgtype); prepare_data->prepare_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); if (prepare_data->xid == InvalidTransactionId) elog(ERROR, "invalid two-phase transaction ID in %s message", msgtype); @@ -265,7 +265,7 @@ logicalrep_write_commit_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, commit_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -291,7 +291,7 @@ logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData * if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in commit prepared message"); prepare_data->commit_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(prepare_data->gid, pq_getmsgstring(in), sizeof(prepare_data->gid)); @@ -323,7 +323,7 @@ logicalrep_write_rollback_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, txn->end_lsn); pq_sendint64(out, prepare_time); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -351,7 +351,7 @@ logicalrep_read_rollback_prepared(StringInfo in, elog(ERROR, "rollback_end_lsn is not set in rollback prepared message"); rollback_data->prepare_time = pq_getmsgint64(in); rollback_data->rollback_time = pq_getmsgint64(in); - rollback_data->xid = pq_getmsgint(in, 4); + rollback_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(rollback_data->gid, pq_getmsgstring(in), sizeof(rollback_data->gid)); @@ -418,7 +418,7 @@ logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -467,7 +467,7 @@ logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -542,7 +542,7 @@ logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -596,7 +596,7 @@ logicalrep_write_truncate(StringInfo out, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint32(out, nrelids); @@ -654,7 +654,7 @@ logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint8(out, flags); pq_sendint64(out, lsn); @@ -676,7 +676,7 @@ logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -732,7 +732,7 @@ logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid) /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid)); if (!HeapTupleIsValid(tup)) @@ -1074,7 +1074,7 @@ logicalrep_write_stream_start(StringInfo out, Assert(TransactionIdIsValid(xid)); /* transaction ID (we're starting to stream, so must be valid) */ - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* 1 if this is the first streaming segment for this xid */ pq_sendbyte(out, first_segment ? 1 : 0); @@ -1090,7 +1090,7 @@ logicalrep_read_stream_start(StringInfo in, bool *first_segment) Assert(first_segment); - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); *first_segment = (pq_getmsgbyte(in) == 1); return xid; @@ -1119,7 +1119,7 @@ logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn, Assert(TransactionIdIsValid(txn->xid)); /* transaction ID */ - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send the flags field (unused for now) */ pq_sendbyte(out, flags); @@ -1139,7 +1139,7 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data) TransactionId xid; uint8 flags; - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); /* read flags (unused for now) */ flags = pq_getmsgbyte(in); @@ -1172,8 +1172,8 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid, Assert(TransactionIdIsValid(xid) && TransactionIdIsValid(subxid)); /* transaction ID */ - pq_sendint32(out, xid); - pq_sendint32(out, subxid); + pq_sendint64(out, xid); + pq_sendint64(out, subxid); if (write_abort_info) { @@ -1195,8 +1195,8 @@ logicalrep_read_stream_abort(StringInfo in, { Assert(abort_data); - abort_data->xid = pq_getmsgint(in, 4); - abort_data->subxid = pq_getmsgint(in, 4); + abort_data->xid = pq_getmsgint64(in); + abort_data->subxid = pq_getmsgint64(in); if (read_abort_info) { diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 4c4b1c4868..f355e26257 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -5151,8 +5151,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; - uint32 f_hi, - f_lo; + uint32 f_lsn_hi, + f_lsn_lo, + f_mapped_xid_hi, + f_mapped_xid_lo, + f_create_xid_hi, + f_create_xid_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -5164,11 +5168,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &f_dboid, &f_relid, &f_hi, &f_lo, - &f_mapped_xid, &f_create_xid) != 6) + &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo, + &f_mapped_xid_hi, &f_mapped_xid_lo, + &f_create_xid_hi, &f_create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - f_lsn = ((uint64) f_hi) << 32 | f_lo; + f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo; + f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo; + f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo; /* mapping for another database */ if (f_dboid != dboid) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 5c630116ec..95871f5ad9 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -591,7 +591,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder) elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore"); /* so we don't overwrite the existing value */ - if (TransactionIdIsValid(MyProc->xmin)) + if (TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) elog(ERROR, "cannot build an initial slot snapshot when MyProc->xmin already is valid"); snap = SnapBuildBuildSnapshot(builder); @@ -613,7 +613,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder) elog(ERROR, "cannot build an initial slot snapshot as oldest safe xid %llu follows snapshot's xmin %llu", (unsigned long long) safeXid, (unsigned long long) snap->xmin); - MyProc->xmin = snap->xmin; + pg_atomic_write_u64(&MyProc->xmin, snap->xmin); /* allocate in transaction context */ newxip = (TransactionId *) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 899ea5502d..37a8cc944f 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -601,7 +601,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s) * We should have received XID of the subxact as the first part of the * message, so extract it. */ - current_xid = pq_getmsgint(s, 4); + current_xid = pq_getmsgint64(s); if (!TransactionIdIsValid(current_xid)) ereport(ERROR, diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 5507e2e1f6..2fc25a298d 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1167,10 +1167,6 @@ static void XLogWalRcvSendHSFeedback(bool immed) { TimestampTz now; - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 xmin_epoch, - catalog_xmin_epoch; TransactionId xmin, catalog_xmin; @@ -1222,31 +1218,15 @@ XLogWalRcvSendHSFeedback(bool immed) catalog_xmin = InvalidTransactionId; } - /* - * Get epoch and adjust if nextXid and oldestXmin are different sides of - * the epoch boundary. - */ - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - xmin_epoch = EpochFromFullTransactionId(nextFullXid); - catalog_xmin_epoch = xmin_epoch; - if (nextXid < xmin) - xmin_epoch--; - if (nextXid < catalog_xmin) - catalog_xmin_epoch--; - - elog(DEBUG2, "sending hot standby feedback xmin %llu epoch %u catalog_xmin %llu catalog_xmin_epoch %u", - (unsigned long long) xmin, xmin_epoch, - (unsigned long long) catalog_xmin, catalog_xmin_epoch); + elog(DEBUG2, "sending hot standby feedback xmin %llu catalog_xmin %llu", + (unsigned long long) xmin, (unsigned long long) catalog_xmin); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); - pq_sendint32(&reply_message, xmin); - pq_sendint32(&reply_message, xmin_epoch); - pq_sendint32(&reply_message, catalog_xmin); - pq_sendint32(&reply_message, catalog_xmin_epoch); + pq_sendint64(&reply_message, xmin); + pq_sendint64(&reply_message, catalog_xmin); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) primary_has_standby_xmin = true; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index bdfa5e8fa3..719dc89365 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -255,7 +255,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); -static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p); @@ -293,7 +292,7 @@ InitWalSender(void) */ if (MyDatabaseId == InvalidOid) { - Assert(MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS; ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; @@ -2174,7 +2173,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac ReplicationSlot *slot = MyReplicationSlot; SpinLockAcquire(&slot->mutex); - MyProc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId); /* * For physical replication we don't need the interlock provided by xmin @@ -2206,44 +2205,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac } } -/* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. - * - * Epoch of nextXid should be same as standby, or if the counter has - * wrapped, then one greater than standby. - * - * This check doesn't care about whether clog exists for these xids - * at all. - */ -static bool -TransactionIdInRecentPast(TransactionId xid, uint32 epoch) -{ - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 nextEpoch; - - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - nextEpoch = EpochFromFullTransactionId(nextFullXid); - - if (xid <= nextXid) - { - if (epoch != nextEpoch) - return false; - } - else - { - if (epoch + 1 != nextEpoch) - return false; - } - - if (!TransactionIdPrecedesOrEquals(xid, nextXid)) - return false; /* epoch OK, but it's wrapped around */ - - return true; -} - /* * Hot Standby feedback */ @@ -2251,9 +2212,7 @@ static void ProcessStandbyHSFeedbackMessage(void) { TransactionId feedbackXmin; - uint32 feedbackEpoch; TransactionId feedbackCatalogXmin; - uint32 feedbackCatalogEpoch; TimestampTz replyTime; /* @@ -2262,10 +2221,8 @@ ProcessStandbyHSFeedbackMessage(void) * of this message. */ replyTime = pq_getmsgint64(&reply_message); - feedbackXmin = pq_getmsgint(&reply_message, 4); - feedbackEpoch = pq_getmsgint(&reply_message, 4); - feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); - feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); + feedbackXmin = pq_getmsgint64(&reply_message); + feedbackCatalogXmin = pq_getmsgint64(&reply_message); if (message_level_is_interesting(DEBUG2)) { @@ -2274,11 +2231,9 @@ ProcessStandbyHSFeedbackMessage(void) /* Copy because timestamptz_to_str returns a static buffer */ replyTimeStr = pstrdup(timestamptz_to_str(replyTime)); - elog(DEBUG2, "hot standby feedback xmin %llu epoch %u, catalog_xmin %llu epoch %u reply_time %s", + elog(DEBUG2, "hot standby feedback xmin %llu, catalog_xmin %llu reply_time %s", (unsigned long long) feedbackXmin, - feedbackEpoch, (unsigned long long) feedbackCatalogXmin, - feedbackCatalogEpoch, replyTimeStr); pfree(replyTimeStr); @@ -2303,24 +2258,12 @@ ProcessStandbyHSFeedbackMessage(void) if (!TransactionIdIsNormal(feedbackXmin) && !TransactionIdIsNormal(feedbackCatalogXmin)) { - MyProc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId); if (MyReplicationSlot != NULL) PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin); return; } - /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. Ignore if not. - */ - if (TransactionIdIsNormal(feedbackXmin) && - !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch)) - return; - - if (TransactionIdIsNormal(feedbackCatalogXmin) && - !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch)) - return; - /* * Set the WalSender's xmin equal to the standby's requested xmin, so that * the xmin will be taken into account by GetSnapshotData() / @@ -2358,9 +2301,9 @@ ProcessStandbyHSFeedbackMessage(void) { if (TransactionIdIsNormal(feedbackCatalogXmin) && TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin)) - MyProc->xmin = feedbackCatalogXmin; + pg_atomic_write_u64(&MyProc->xmin, feedbackCatalogXmin); else - MyProc->xmin = feedbackXmin; + pg_atomic_write_u64(&MyProc->xmin, feedbackXmin); } } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 7f014a0cbb..16e46a331a 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -2456,6 +2456,7 @@ statext_expressions_load(Oid stxoid, bool inh, int idx) ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = td; + HeapTupleCopyHeaderXids(&tmptup); tup = heap_copytuple(&tmptup); diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile index fd7c40dcb0..ffcc0fc290 100644 --- a/src/backend/storage/buffer/Makefile +++ b/src/backend/storage/buffer/Makefile @@ -17,6 +17,7 @@ OBJS = \ buf_table.o \ bufmgr.o \ freelist.o \ - localbuf.o + localbuf.o \ + heap_convert.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index f7c67d504c..3487610497 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -472,7 +472,8 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) ) -static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, +static Buffer ReadBuffer_common(Relation reln, + SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit); @@ -800,7 +801,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * miss. */ pgstat_count_buffer_read(reln); - buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence, + buf = ReadBuffer_common(reln, + RelationGetSmgr(reln), reln->rd_rel->relpersistence, forkNum, blockNum, mode, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); @@ -827,7 +829,7 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, SMgrRelation smgr = smgropen(rlocator, InvalidBackendId); - return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT : + return ReadBuffer_common(NULL, smgr, permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED, forkNum, blockNum, mode, strategy, &hit); } @@ -1002,7 +1004,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, bool hit; Assert(extended_by == 0); - buffer = ReadBuffer_common(bmr.smgr, bmr.relpersistence, + buffer = ReadBuffer_common(bmr.rel, bmr.smgr, bmr.relpersistence, fork, extend_to - 1, mode, strategy, &hit); } @@ -1016,7 +1018,8 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, * *hit is set to true if the request was satisfied from shared buffer cache. */ static Buffer -ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, +ReadBuffer_common(Relation reln, + SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) { @@ -1170,6 +1173,30 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, blockNum, relpath(smgr->smgr_rlocator, forkNum)))); } + + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION && + !PageIsNew((Page) bufBlock)) + { + Buffer buf = BufferDescriptorGetBuffer(bufHdr); + + /* + * All the forks but MAIN_FORKNUM should be converted to the + * actual page layout version in pg_upgrade. + */ + if (forkNum != MAIN_FORKNUM) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid fork type (%d) in block %u of relation %s", + forkNum, blockNum, + relpath(smgr->smgr_rlocator, forkNum)))); + + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + /* Check for no concurrent changes */ + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION) + convert_page(reln, bufBlock, buf, blockNum); + + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + } } /* @@ -4767,6 +4794,64 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) } } +/* + * Mark buffer as converted - ie its format is changed without logical changes. + * + * It will override `full_page_write` GUC setting in XLogRecordAssemble. + */ +void +MarkBufferConverted(Buffer buffer, bool converted) +{ + BufferDesc *bufHdr; + uint32 buf_state; + bool has_mark; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + if (converted) + { + /* here, either share or exclusive lock is OK */ + Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); + } + + buf_state = pg_atomic_read_u32(&bufHdr->state); + has_mark = (buf_state & BM_CONVERTED) != 0; + if (converted == has_mark) + return; + + buf_state = LockBufHdr(bufHdr); + buf_state &= ~BM_CONVERTED; + if (converted) + buf_state |= BM_CONVERTED; + UnlockBufHdr(bufHdr, buf_state); +} + +bool +IsBufferConverted(Buffer buffer) +{ + + BufferDesc *bufHdr; + uint32 buf_state; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + + buf_state = pg_atomic_read_u32(&bufHdr->state); + return (buf_state & BM_CONVERTED) != 0; +} + /* * Release buffer content locks for shared buffers. * @@ -4801,6 +4886,47 @@ UnlockBuffers(void) } } +/* + * Is shared buffer is locked? + */ +bool +IsBufferLocked(Buffer buffer) +{ + BufferDesc *buf; + + if (buffer == InvalidBuffer) + return true; + + Assert(BufferIsPinned(buffer)); + if (BufferIsLocal(buffer)) + return true; /* local buffers need no lock */ + + buf = GetBufferDescriptor(buffer - 1); + + return LWLockHeldByMe(BufferDescriptorGetContentLock(buf)); +} + +/* + * Is shared buffer is locked exclusive? + */ +bool +IsBufferLockedExclusive(Buffer buffer) +{ + BufferDesc *buf; + + if (buffer == InvalidBuffer) + return true; + + Assert(BufferIsPinned(buffer)); + if (BufferIsLocal(buffer)) + return true; /* local buffers need no lock */ + + buf = GetBufferDescriptor(buffer - 1); + + return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(buf), + LW_EXCLUSIVE); +} + /* * Acquire or release the content_lock for the buffer. */ diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c new file mode 100644 index 0000000000..2609f11072 --- /dev/null +++ b/src/backend/storage/buffer/heap_convert.c @@ -0,0 +1,549 @@ +/*------------------------------------------------------------------------- + * + * heap_convert.c + * Heap page converter from 32bit to 64bit xid format + * + * Copyright (c) 2015-2022, Postgres Professional + * + * IDENTIFICATION + * src/backend/storage/buffer/heap_convert.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "catalog/catalog.h" +#include "storage/bufmgr.h" +#include "storage/checksum.h" + +static void repack_heap_tuples(Relation rel, Page page, Buffer buf, + BlockNumber blkno, bool double_xmax); + +/* + * itemoffcompare + * Sorting support for repack_tuples() + */ +int +itemoffcompare(const void *item1, const void *item2) +{ + /* Sort in decreasing itemoff order */ + return ((ItemIdCompactData *) item2)->itemoff - + ((ItemIdCompactData *) item1)->itemoff; +} + +/* + * Lazy page conversion from 32-bit to 64-bit XID at first read. + */ +void +convert_page(Relation rel, Page page, Buffer buf, BlockNumber blkno) +{ + static unsigned logcnt = 0; + bool logit; + PageHeader hdr = (PageHeader) page; + GenericXLogState *state = NULL; + uint16 checksum; + bool try_double_xmax; + + /* Not during XLog replaying */ + Assert(rel != NULL); + + /* Verify checksum */ + if (hdr->pd_checksum) + { + checksum = pg_checksum_page((char *) page, blkno); + if (checksum != hdr->pd_checksum) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("page verification failed, calculated checksum %u but expected %u", + checksum, hdr->pd_checksum))); + } + + /* + * We occasionally force logging of page conversion, so never-changed + * pages are converted in the end. FORCE_LOG_EVERY is chosen arbitrarily + * to log neither too much nor too little. + */ +#define FORCE_LOG_EVERY 128 + logit = !RecoveryInProgress() && XLogIsNeeded() && RelationNeedsWAL(rel); + logit = logit && (++logcnt % FORCE_LOG_EVERY) == 0; + if (logit) + { + state = GenericXLogStart(rel); + page = GenericXLogRegisterBuffer(state, buf, + GENERIC_XLOG_FULL_IMAGE); + hdr = (PageHeader) page; + } +#ifdef USE_ASSERT_CHECKING + else + { + /* Not already converted */ + Assert(PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION); + /* Page in 32-bit xid format should not have PageSpecial. */ + Assert(PageGetSpecialSize(page) == 0); + } +#endif + + switch (rel->rd_rel->relkind) + { + case 't': + try_double_xmax = hdr->pd_upper - hdr->pd_lower < + MAXALIGN(sizeof(ToastPageSpecialData)); + repack_heap_tuples(rel, page, buf, blkno, try_double_xmax); + break; + case 'r': + case 'p': + case 'm': + try_double_xmax = hdr->pd_upper - hdr->pd_lower < + MAXALIGN(sizeof(HeapPageSpecialData)); + repack_heap_tuples(rel, page, buf, blkno, try_double_xmax); + break; + case 'i': + /* no need to convert index */ + case 'S': + /* no real need to convert sequences */ + break; + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("conversion for relation \"%s\" cannot be done", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + } + + hdr->pd_checksum = pg_checksum_page((char *) page, blkno); + + PageSetPageSizeAndVersion(page, PageGetPageSize(page), + PG_PAGE_LAYOUT_VERSION); + + if (logit) + { + /* + * Finish logging buffer conversion and mark buffer as dirty. + */ + Assert(state != NULL); + MarkBufferDirty(buf); + GenericXLogFinish(state); + } + else + { + /* + * Otherwise, it will be logged with full-page-write record on first + * actual change. + */ + MarkBufferConverted(buf, true); + } +} + +/* + * Convert xmin and xmax in a tuple. + * This also considers special cases: "double xmax" page format and multixact + * in xmax. + */ +static void +convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base, + MultiXactId multi_base, bool double_xmax) +{ + /* Convert xmin */ + if (double_xmax) + { + /* Prepare tuple for "double xmax" page format */ + tuple->t_infomask |= HEAP_XMIN_FROZEN; + tuple->t_choice.t_heap.t_xmin = 0; + } + else + { + TransactionId xmin = tuple->t_choice.t_heap.t_xmin; + + if (TransactionIdIsNormal(xmin)) + { + if (HeapTupleHeaderXminFrozen(tuple)) + tuple->t_choice.t_heap.t_xmin = FrozenTransactionId; + else if (HeapTupleHeaderXminInvalid(tuple)) + tuple->t_choice.t_heap.t_xmin = InvalidTransactionId; + else + { + Assert(xmin >= xid_base + FirstNormalTransactionId); + /* Subtract xid_base from normal xmin */ + tuple->t_choice.t_heap.t_xmin = xmin - xid_base; + } + } + } + + /* If tuple has multixact flag, handle mxid wraparound */ + if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) && + !(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + MultiXactId mxid = tuple->t_choice.t_heap.t_xmax; + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + if (double_xmax) + { + /* Save converted mxid into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, mxid); + } + else + { + /* + * Save converted mxid offset relative to (minmxid - 1), which + * will be page's mxid base. + */ + Assert(mxid - multi_base + FirstMultiXactId <= PG_UINT32_MAX); + tuple->t_choice.t_heap.t_xmax = + (uint32) (mxid - multi_base + FirstMultiXactId); + } + } + /* Convert xmax */ + else if (!(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xmax = tuple->t_choice.t_heap.t_xmax; + + if (double_xmax) + { + /* Save converted xmax into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, xmax); + } + else if (TransactionIdIsNormal(xmax)) + { + /* Subtract xid_base from normal xmax */ + Assert(xmax >= xid_base + FirstNormalTransactionId); + tuple->t_choice.t_heap.t_xmax = xmax - xid_base; + } + } + else + { + if (double_xmax) + HeapTupleHeaderSetDoubleXmax(tuple, InvalidTransactionId); + else + tuple->t_choice.t_heap.t_xmax = InvalidTransactionId; + } +} + +/* + * Correct page xmin/xmax based on tuple xmin/xmax values. + */ +static void +compute_xid_min_max(HeapTuple tuple, MultiXactId multi_base, + TransactionId *xid_min, TransactionId *xid_max, + MultiXactId *multi_min, MultiXactId *multi_max) +{ + /* xmin */ + if (!HeapTupleHeaderXminInvalid(tuple->t_data) && + !HeapTupleHeaderXminFrozen(tuple->t_data)) + { + TransactionId xid = HeapTupleGetRawXmin(tuple); + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } + + /* xmax */ + if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xid; + + if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + { + MultiXactId mxid = HeapTupleGetRawXmax(tuple); + + Assert(MultiXactIdIsValid(mxid)); + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + *multi_max = Max(*multi_max, mxid); + *multi_min = Min(*multi_min, mxid); + + /* + * Also take into account hidden update xid, which can be + * extracted by the vacuum. + */ + if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY) + xid = InvalidTransactionId; + else + xid = HeapTupleGetUpdateXid(tuple); + } + else + { + xid = HeapTupleGetRawXmax(tuple); + } + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } +} + +/* + * Returns true if both: + * - xid_max: an uppper boundary of xmin's and xmax'es of all tuples on a page + * - xid_min: a lower boundary of xmin's and xmax'es of all tuples on a page + * can be expressed by 32-bit number relative to page's xid_base/multi_base + * or invalid. + * + * True value effectively means that these tuples can be directly put on one + * page in 64-xid format. + */ +static inline bool +xids_fit_page(TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max) +{ + bool xid_max_fits = false; + bool multi_max_fits = false; + + if (xid_max == InvalidTransactionId) + xid_max_fits = true; + + if (xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId) + xid_max_fits = true; + + if (multi_max == InvalidMultiXactId) + multi_max_fits = true; + + if (multi_max - multi_min <= MaxShortTransactionId - FirstMultiXactId) + multi_max_fits = true; + + return xid_max_fits && multi_max_fits; +} + +/* + * Set "base" for page in 64-bit XID format. + * + * This should not be called for double xmax pages. They do not have place for + * page special. + */ +static inline void +heap_page_set_base(Page page, + TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max, + TransactionId *xid_base, MultiXactId *multi_base, + bool is_toast) +{ + PageHeader hdr = (PageHeader) page; + + if (xid_max != InvalidTransactionId) + *xid_base = xid_min - FirstNormalTransactionId; + else + *xid_base = InvalidTransactionId; + + if (multi_max != InvalidMultiXactId) + *multi_base = multi_min - FirstMultiXactId; + else + *multi_base = InvalidMultiXactId; + + if (is_toast) + { + ToastPageSpecial special; + + hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData)); + special = ToastPageGetSpecial(page); + special->pd_xid_base = *xid_base; + } + else + { + HeapPageSpecial special; + + hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = *xid_base; + special->pd_multi_base = *multi_base; + } +} + +/* + * repack_heap_tuples + * Convert heap page format reusing space of dead tuples + */ +static void +repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno, + bool try_double_xmax) +{ + ItemIdCompactData items[MaxHeapTuplesPerPage]; + ItemIdCompact itemPtr = items; + int nitems = 0, + maxoff = PageGetMaxOffsetNumber(page), + idx, + occupied_space = 0; + Offset upper; + bool double_xmax, + special_fits, + toast; + PageHeader hdr = (PageHeader) page, + new_hdr; + PGAlignedBlock zerobuf = {0}; + Page new_page; + MultiXactId multi_base = rel->rd_rel->relminmxid, + multi_min = MaxMultiXactId, + multi_max = InvalidMultiXactId; + TransactionId xid_base = rel->rd_rel->relfrozenxid, + xid_min = MaxTransactionId, + xid_max = InvalidTransactionId; + + toast = IsToastRelation(rel); + + if (TransactionIdIsNormal(hdr->pd_prune_xid)) + xid_min = xid_max = hdr->pd_prune_xid; + + for (idx = 0; idx < maxoff; idx++) + { + HeapTupleData tuple; + ItemId lp; + + lp = PageGetItemId(page, idx + 1); + + /* Skip redirects and items without storage */ + if (!ItemIdHasStorage(lp)) + continue; + + /* Build in-memory tuple representation */ + tuple.t_tableOid = 1; /* doesn't matter in this case */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); + HeapTupleCopyHeaderXids(&tuple); + tuple.t_len = ItemIdGetLength(lp); + ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp)); + + /* + * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or + * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read + * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See + * HeapTupleSatisfiesVacuum() for details + */ + if (try_double_xmax && + HeapTupleSatisfiesVacuum(&tuple, + (TransactionId) 1 << 32, buf) == HEAPTUPLE_DEAD) + { + ItemIdSetDead(lp); + } + + if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp)) + { + itemPtr->offsetindex = idx; + itemPtr->itemoff = ItemIdGetOffset(lp); + if (unlikely(itemPtr->itemoff < hdr->pd_upper || + itemPtr->itemoff >= hdr->pd_special)) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item pointer: %u", + itemPtr->itemoff))); + } + + itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp)); + occupied_space += itemPtr->alignedlen; + nitems++; + itemPtr++; + if (try_double_xmax) + { + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleHeaderStoreXminFrozen(tuple.t_data); + } + + compute_xid_min_max(&tuple, multi_base, + &xid_min, &xid_max, + &multi_min, &multi_max); + } + } + + new_page = (Page) zerobuf.data; + MemSet(new_page, 0, BLCKSZ); + /* Write new header */ + new_hdr = (PageHeader) new_page; + *new_hdr = *hdr; + new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData); + + if (toast) + special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >= + sizeof(ToastPageSpecialData); + else + special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >= + sizeof(HeapPageSpecialData); + + double_xmax = !special_fits || + !xids_fit_page(xid_min, xid_max, multi_min, multi_max); + + if (!double_xmax) + { + Assert(xid_max == InvalidTransactionId || xid_max >= xid_min); + Assert(multi_max == InvalidMultiXactId || multi_max >= multi_min); + + heap_page_set_base(new_page, + xid_min, xid_max, + multi_min, multi_max, + &xid_base, &multi_base, + toast); + + HeapPageSetPruneXid(new_page, new_hdr->pd_prune_xid, toast); + } + else + { + /* No space for special area, switch to "double xmax" format */ + elog(DEBUG2, "convert heap page %u of relation \"%s\" to double xmax format", + blkno, RelationGetRelationName(rel)); + + if (try_double_xmax) + { + xid_base = InvalidTransactionId; + multi_base = InvalidMultiXactId; + } + else + { + repack_heap_tuples(rel, page, buf, blkno, true); + return; + } + } + + /* Copy ItemIds with an offset */ + memcpy((char *) new_page + SizeOfPageHeaderData, + (char *) page + SizeOfPageHeaderData, + hdr->pd_lower - SizeOfPageHeaderData); + + /* Move live tuples */ + upper = new_hdr->pd_special; + for (idx = 0; idx < nitems; idx++) + { + HeapTupleHeader tuple; + ItemId lp; + + itemPtr = &items[idx]; + lp = PageGetItemId(new_page, itemPtr->offsetindex + 1); + upper -= itemPtr->alignedlen; + occupied_space -= itemPtr->alignedlen; + + memcpy((char *) new_page + upper, + (char *) page + itemPtr->itemoff, + itemPtr->alignedlen); + + tuple = (HeapTupleHeader) (((char *) new_page) + upper); + + convert_heap_tuple_xids(tuple, xid_base, multi_base, double_xmax); + + lp->lp_off = upper; + } + + Assert(occupied_space == 0); + + new_hdr->pd_upper = upper; + if (new_hdr->pd_lower > new_hdr->pd_upper) + elog(ERROR, "cannot convert block %u of relation \"%s\"", + blkno, RelationGetRelationName(rel)); + + memcpy(page, new_page, BLCKSZ); +} diff --git a/src/backend/storage/buffer/meson.build b/src/backend/storage/buffer/meson.build index ea2f9c045a..e1ca4a23d4 100644 --- a/src/backend/storage/buffer/meson.build +++ b/src/backend/storage/buffer/meson.build @@ -5,5 +5,6 @@ backend_sources += files( 'buf_table.c', 'bufmgr.c', 'freelist.c', + 'heap_convert.c', 'localbuf.c', ) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 55d719816f..5967955150 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -66,7 +66,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" -#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) +#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var)))) /* Our shared memory area */ typedef struct ProcArrayStruct @@ -366,8 +366,6 @@ static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); static void MaintainLatestCompletedXid(TransactionId latestXid); static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); -static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, - TransactionId xid); static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons); /* @@ -525,7 +523,8 @@ ProcArrayAdd(PGPROC *proc) arrayP->pgprocnos[index] = proc->pgprocno; proc->pgxactoff = index; - ProcGlobal->xids[index] = proc->xid; + pg_atomic_write_u64(&ProcGlobal->xids[index], + pg_atomic_read_u64(&proc->xid)); ProcGlobal->subxidStates[index] = proc->subxidStatus; ProcGlobal->statusFlags[index] = proc->statusFlags; @@ -585,7 +584,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) if (TransactionIdIsValid(latestXid)) { - Assert(TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[myoff]))); /* Advance global latestCompletedXid while holding the lock */ MaintainLatestCompletedXid(latestXid); @@ -593,17 +592,17 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) /* Same with xactCompletionCount */ ShmemVariableCache->xactCompletionCount++; - ProcGlobal->xids[myoff] = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[myoff], InvalidTransactionId); ProcGlobal->subxidStates[myoff].overflowed = false; ProcGlobal->subxidStates[myoff].count = 0; } else { /* Shouldn't be trying to remove a live transaction here */ - Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff])))); } - Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff])))); Assert(ProcGlobal->subxidStates[myoff].count == 0); Assert(ProcGlobal->subxidStates[myoff].overflowed == false); @@ -649,7 +648,6 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) LWLockRelease(ProcArrayLock); } - /* * ProcArrayEndTransaction -- mark a transaction as no longer running * @@ -674,7 +672,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) * else is taking a snapshot. See discussion in * src/backend/access/transam/README. */ - Assert(TransactionIdIsValid(proc->xid)); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); /* * If we can immediately acquire ProcArrayLock, we clear our own XID @@ -696,12 +694,12 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) * anyone else's calculation of a snapshot. We might change their * estimate of global xmin, but that's OK. */ - Assert(!TransactionIdIsValid(proc->xid)); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); Assert(proc->subxidStatus.count == 0); Assert(!proc->subxidStatus.overflowed); proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; @@ -737,13 +735,14 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid) * processes' PGPROC entries. */ Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE)); - Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff])); - Assert(ProcGlobal->xids[pgxactoff] == proc->xid); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]))); + Assert(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]) == + pg_atomic_read_u64(&proc->xid)); - ProcGlobal->xids[pgxactoff] = InvalidTransactionId; - proc->xid = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, InvalidTransactionId); proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; @@ -796,7 +795,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) uint32 wakeidx; /* We should definitely have an XID to clear. */ - Assert(TransactionIdIsValid(proc->xid)); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); /* Add ourselves to the list of processes needing a group XID clear. */ proc->procArrayGroupMember = true; @@ -925,11 +924,11 @@ ProcArrayClearTransaction(PGPROC *proc) pgxactoff = proc->pgxactoff; - ProcGlobal->xids[pgxactoff] = InvalidTransactionId; - proc->xid = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, InvalidTransactionId); proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); proc->recoveryConflictPending = false; Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK)); @@ -973,8 +972,7 @@ MaintainLatestCompletedXid(TransactionId latestXid) if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(cur_latest, latestXid); + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(IsBootstrapProcessingMode() || @@ -988,7 +986,6 @@ static void MaintainLatestCompletedXidRecovery(TransactionId latestXid) { FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid; - FullTransactionId rel; Assert(AmStartupProcess() || !IsUnderPostmaster); Assert(LWLockHeldByMe(ProcArrayLock)); @@ -998,14 +995,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid) * latestCompletedXid to be initialized in recovery. But in recovery it's * safe to access nextXid without a lock for the startup process. */ - rel = ShmemVariableCache->nextXid; Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid)); if (!FullTransactionIdIsValid(cur_latest) || TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(rel, latestXid); + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); @@ -1383,7 +1378,7 @@ bool TransactionIdIsInProgress(TransactionId xid) { static TransactionId *xids = NULL; - static TransactionId *other_xids; + static pg_atomic_uint64 *other_xids; XidCacheStatus *other_subxidstates; int nxids = 0; ProcArrayStruct *arrayP = procArray; @@ -1479,7 +1474,7 @@ TransactionIdIsInProgress(TransactionId xid) continue; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + pxid = pg_atomic_read_u64(&(other_xids[pgxactoff])); if (!TransactionIdIsValid(pxid)) continue; @@ -1511,7 +1506,7 @@ TransactionIdIsInProgress(TransactionId xid) for (j = pxids - 1; j >= 0; j--) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]); + TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]); if (TransactionIdEquals(cxid, xid)) { @@ -1596,7 +1591,7 @@ TransactionIdIsInProgress(TransactionId xid) topxid = SubTransGetTopmostTransaction(xid); Assert(TransactionIdIsValid(topxid)); if (!TransactionIdEquals(topxid, xid) && - pg_lfind32(topxid, xids, nxids)) + pg_lfind64(topxid, xids, nxids)) return true; cachedXidIsNotInProgress = xid; @@ -1616,7 +1611,7 @@ TransactionIdIsActive(TransactionId xid) { bool result = false; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; int i; /* @@ -1635,7 +1630,7 @@ TransactionIdIsActive(TransactionId xid) TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[i]); + pxid = pg_atomic_read_u64(&(other_xids[i])); if (!TransactionIdIsValid(pxid)) continue; @@ -1718,7 +1713,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) ProcArrayStruct *arrayP = procArray; TransactionId kaxmin; bool in_recovery = RecoveryInProgress(); - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; /* inferred after ProcArrayLock is released */ h->catalog_oldest_nonremovable = InvalidTransactionId; @@ -1734,7 +1729,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * additions. */ { - TransactionId initial; + TransactionId initial, + xid; initial = XidFromFullTransactionId(h->latest_completed); Assert(TransactionIdIsValid(initial)); @@ -1756,8 +1752,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * definition, can't be any newer changes in the temp table than * latestCompletedXid. */ - if (TransactionIdIsValid(MyProc->xid)) - h->temp_oldest_nonremovable = MyProc->xid; + xid = pg_atomic_read_u64(&MyProc->xid); + if (TransactionIdIsValid(xid)) + h->temp_oldest_nonremovable = xid; else h->temp_oldest_nonremovable = initial; } @@ -1779,8 +1776,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionId xmin; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); - xmin = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&(other_xids[index])); + xmin = pg_atomic_read_u64(&proc->xmin); /* * Consider both the transaction's Xmin, and its Xid. @@ -2106,8 +2103,8 @@ GetSnapshotDataReuse(Snapshot snapshot) * requirement that concurrent GetSnapshotData() calls yield the same * xmin. */ - if (!TransactionIdIsValid(MyProc->xmin)) - MyProc->xmin = TransactionXmin = snapshot->xmin; + if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = snapshot->xmin); RecentXmin = snapshot->xmin; Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin)); @@ -2158,7 +2155,7 @@ Snapshot GetSnapshotData(Snapshot snapshot) { ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; TransactionId xmin; TransactionId xmax; int count = 0; @@ -2221,8 +2218,8 @@ GetSnapshotData(Snapshot snapshot) latest_completed = ShmemVariableCache->latestCompletedXid; mypgxactoff = MyProc->pgxactoff; - myxid = other_xids[mypgxactoff]; - Assert(myxid == MyProc->xid); + myxid = pg_atomic_read_u64(&other_xids[mypgxactoff]); + Assert(myxid == pg_atomic_read_u64(&MyProc->xid)); oldestxid = ShmemVariableCache->oldestXid; curXactCompletionCount = ShmemVariableCache->xactCompletionCount; @@ -2256,7 +2253,7 @@ GetSnapshotData(Snapshot snapshot) for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + TransactionId xid = pg_atomic_read_u64(&(other_xids[pgxactoff])); uint8 statusFlags; Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff); @@ -2393,8 +2390,8 @@ GetSnapshotData(Snapshot snapshot) replication_slot_xmin = procArray->replication_slot_xmin; replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; - if (!TransactionIdIsValid(MyProc->xmin)) - MyProc->xmin = TransactionXmin = xmin; + if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); LWLockRelease(ProcArrayLock); @@ -2406,12 +2403,7 @@ GetSnapshotData(Snapshot snapshot) FullTransactionId def_vis_fxid_data; FullTransactionId oldestfxid; - /* - * Converting oldestXid is only safe when xid horizon cannot advance, - * i.e. holding locks. While we don't hold the lock anymore, all the - * necessary data has been gathered with lock held. - */ - oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); + oldestfxid = FullTransactionIdFromXid(oldestxid); /* Check whether there's a replication slot requiring an older xmin. */ def_vis_xid_data = @@ -2430,8 +2422,8 @@ GetSnapshotData(Snapshot snapshot) def_vis_xid = TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid); - def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid); - def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data); + def_vis_fxid = FullTransactionIdFromXid(def_vis_xid); + def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data); /* * Check if we can increase upper bound. As a previous @@ -2450,7 +2442,7 @@ GetSnapshotData(Snapshot snapshot) /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ if (TransactionIdIsNormal(myxid)) GlobalVisTempRels.definitely_needed = - FullXidRelativeTo(latest_completed, myxid); + FullTransactionIdFromXid(myxid); else { GlobalVisTempRels.definitely_needed = latest_completed; @@ -2557,7 +2549,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, /* * Likewise, let's just make real sure its xmin does cover us. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&proc->xmin); if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin)) continue; @@ -2568,7 +2560,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, * GetSnapshotData first, we'll be overwriting a valid xmin here, so * we don't check that.) */ - MyProc->xmin = TransactionXmin = xmin; + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); result = true; break; @@ -2612,7 +2604,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * can't go backwards. Also, make sure it's running in the same database, * so that the per-database xmin cannot go backwards. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&proc->xmin); if (proc->databaseId == MyDatabaseId && TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, xmin)) @@ -2621,7 +2613,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * Install xmin and propagate the statusFlags that affect how the * value is interpreted by vacuum. */ - MyProc->xmin = TransactionXmin = xmin; + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) | (proc->statusFlags & PROC_XMIN_FLAGS); ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; @@ -2672,7 +2664,7 @@ GetRunningTransactionData(void) static RunningTransactionsData CurrentRunningXactsData; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; TransactionId latestCompletedXid; TransactionId oldestRunningXid; @@ -2731,7 +2723,7 @@ GetRunningTransactionData(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); /* * We don't need to store transactions that don't have a TransactionId @@ -2844,7 +2836,7 @@ TransactionId GetOldestActiveTransactionId(void) { ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; TransactionId oldestRunningXid; int index; @@ -2870,7 +2862,7 @@ GetOldestActiveTransactionId(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); if (!TransactionIdIsNormal(xid)) continue; @@ -2958,7 +2950,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) */ if (!recovery_in_progress) { - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; /* * Spin over procArray collecting min(ProcGlobal->xids[i]) @@ -2968,7 +2960,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); if (!TransactionIdIsNormal(xid)) continue; @@ -3163,7 +3155,7 @@ BackendXidGetPid(TransactionId xid) { int result = 0; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; int index; if (xid == InvalidTransactionId) /* never match invalid xid */ @@ -3173,7 +3165,7 @@ BackendXidGetPid(TransactionId xid) for (index = 0; index < arrayP->numProcs; index++) { - if (other_xids[index] == xid) + if (pg_atomic_read_u64(&other_xids[index]) == xid) { int pgprocno = arrayP->pgprocnos[index]; PGPROC *proc = &allProcs[pgprocno]; @@ -3257,7 +3249,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = pg_atomic_read_u64(&proc->xmin); if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; @@ -3357,7 +3349,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) proc->databaseId == dbOid) { /* Fetch xmin just once - can't change on us, but good coding */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = pg_atomic_read_u64(&proc->xmin); /* * We ignore an invalid pxmin because this means that backend has @@ -3484,7 +3476,7 @@ MinimumActiveBackends(int min) continue; /* do not count deleted entries */ if (proc == MyProc) continue; /* do not count myself */ - if (proc->xid == InvalidTransactionId) + if (pg_atomic_read_u64(&proc->xid) == InvalidTransactionId) continue; /* do not count if no XID assigned */ if (proc->pid == 0) continue; /* do not count prepared xacts */ @@ -4071,17 +4063,13 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) { GlobalVisSharedRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->shared_oldest_nonremovable); + FullTransactionIdFromXid(horizons->shared_oldest_nonremovable); GlobalVisCatalogRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->catalog_oldest_nonremovable); + FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable); GlobalVisDataRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->data_oldest_nonremovable); + FullTransactionIdFromXid(horizons->data_oldest_nonremovable); GlobalVisTempRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->temp_oldest_nonremovable); + FullTransactionIdFromXid(horizons->temp_oldest_nonremovable); /* * In longer running transactions it's possible that transactions we @@ -4170,15 +4158,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid) { FullTransactionId fxid; - /* - * Convert 32 bit argument to FullTransactionId. We can do so safely - * because we know the xid has to, at the very least, be between - * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a - * lock to determine either, we can just compare with - * state->definitely_needed, which was based on those value at the time - * the current snapshot was built. - */ - fxid = FullXidRelativeTo(state->definitely_needed, xid); + fxid = FullTransactionIdFromXid(xid); return GlobalVisTestIsRemovableFullXid(state, fxid); } @@ -4241,32 +4221,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) return GlobalVisTestIsRemovableXid(state, xid); } -/* - * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it - * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). - * - * Be very careful about when to use this function. It can only safely be used - * when there is a guarantee that xid is within MaxTransactionId / 2 xids of - * rel. That e.g. can be guaranteed if the caller assures a snapshot is - * held by the backend and xid is from a table (where vacuum/freezing ensures - * the xid has to be within that range), or if xid is from the procarray and - * prevents xid wraparound that way. - */ -static inline FullTransactionId -FullXidRelativeTo(FullTransactionId rel, TransactionId xid) -{ - TransactionId rel_xid = XidFromFullTransactionId(rel); - - Assert(TransactionIdIsValid(xid)); - Assert(TransactionIdIsValid(rel_xid)); - - /* not guaranteed to find issues, but likely to catch mistakes */ - AssertTransactionIdInAllowableRange(xid); - - return FullTransactionIdFromU64(U64FromFullTransactionId(rel) - + (int32) (xid - rel_xid)); -} - /* ---------------------------------------------- * KnownAssignedTransactionIds sub-module diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 3d97c75bf1..4170653cc6 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -429,8 +429,8 @@ BackendIdGetTransactionIds(int backendID, TransactionId *xid, if (proc != NULL) { - *xid = proc->xid; - *xmin = proc->xmin; + *xid = pg_atomic_read_u64(&proc->xid); + *xmin = pg_atomic_read_u64(&proc->xmin); *nsubxid = proc->subxidStatus.count; *overflowed = proc->subxidStatus.overflowed; } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 9352428e99..010ab63c2f 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -522,8 +522,8 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHor FullTransactionId nextXid = ReadNextFullTransactionId(); uint64 diff; - diff = U64FromFullTransactionId(nextXid) - - U64FromFullTransactionId(snapshotConflictHorizon); + diff = XidFromFullTransactionId(nextXid) - + XidFromFullTransactionId(snapshotConflictHorizon); if (diff < MaxTransactionId / 2) { TransactionId truncated; diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index b447ddf11b..27dc00ac6a 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -1240,10 +1240,16 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field1); break; case LOCKTAG_TRANSACTION: - appendStringInfo(buf, - _("transaction %u"), - tag->locktag_field1); - break; + { + TransactionId xid; + + xid = (TransactionId) tag->locktag_field2 << 32; + xid += tag->locktag_field1; + + appendStringInfo(buf, _("transaction %llu"), + (unsigned long long) xid); + break; + } case LOCKTAG_VIRTUALTRANSACTION: appendStringInfo(buf, _("virtual transaction %d/%u"), diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index b8c57b3e16..3e2a55e25e 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -3981,7 +3981,7 @@ GetRunningTransactionLocks(int *nlocks) { PGPROC *proc = proclock->tag.myProc; LOCK *lock = proclock->tag.myLock; - TransactionId xid = proc->xid; + TransactionId xid = pg_atomic_read_u64(&proc->xid); /* * Don't record locks for transactions if we know they have @@ -4601,7 +4601,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) * so we won't save an XID of a different VXID. It doesn't matter whether * we save this before or after setting up the primary lock table entry. */ - xid = proc->xid; + xid = pg_atomic_read_u64(&proc->xid); /* Done with proc->fpLockBits */ LWLockRelease(&proc->fpInfoLock); diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 8bfff14a86..1fd326736a 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -334,9 +334,9 @@ static SlruCtlData SerialSlruCtlData; #define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \ (SerialSlruCtl->shared->page_buffer[slotno] + \ - ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) + ((((uint64) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) -#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE) +#define SerialPage(xid) ((int64) (((uint64) (xid)) / SERIAL_ENTRIESPERPAGE)) typedef struct SerialControlData { @@ -1042,31 +1042,6 @@ CheckPointPredicate(void) /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. - * - * XXX: It's possible that the SLRU is not needed again until XID - * wrap-around has happened, so that the segment containing headPage - * that we leave behind will appear to be new again. In that case it - * won't be removed until XID horizon advances enough to make it - * current again. - * - * XXX: This should happen in vac_truncate_clog(), not in checkpoints. - * Consider this scenario, starting from a system with no in-progress - * transactions and VACUUM FREEZE having maximized oldestXact: - * - Start a SERIALIZABLE transaction. - * - Start, finish, and summarize a SERIALIZABLE transaction, creating - * one SLRU page. - * - Consume XIDs to reach xidStopLimit. - * - Finish all transactions. Due to the long-running SERIALIZABLE - * transaction, earlier checkpoints did not touch headPage. The - * next checkpoint will change it, but that checkpoint happens after - * the end of the scenario. - * - VACUUM to advance XID limits. - * - Consume ~2M XIDs, crossing the former xidWrapLimit. - * - Start, finish, and summarize a SERIALIZABLE transaction. - * SerialAdd() declines to create the targetPage, because headPage - * is not regarded as in the past relative to that targetPage. The - * transaction instigating the summarize fails in - * SimpleLruReadPage(). */ truncateCutoffPage = serialControl->headPage; serialControl->headPage = -1; @@ -3935,7 +3910,7 @@ XidIsConcurrent(TransactionId xid) if (TransactionIdFollowsOrEquals(xid, snap->xmax)) return true; - return pg_lfind32(xid, snap->xip, snap->xcnt); + return pg_lfind64(xid, snap->xip, snap->xcnt); } bool diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index b6451d9d08..cdfdc47ddb 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -203,7 +203,7 @@ InitProcGlobal(void) * how hotly they are accessed. */ ProcGlobal->xids = - (TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids)); + (pg_atomic_uint64 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids)); MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids)); ProcGlobal->subxidStates = (XidCacheStatus *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->subxidStates)); MemSet(ProcGlobal->subxidStates, 0, TotalProcs * sizeof(*ProcGlobal->subxidStates)); @@ -216,6 +216,8 @@ InitProcGlobal(void) /* Common initialization for all PGPROCs, regardless of type. */ + pg_atomic_init_u64(&ProcGlobal->xids[i], 0); + /* * Set up per-PGPROC semaphore, latch, and fpInfoLock. Prepared xact * dummy PGPROCs don't need these though - they're never associated @@ -377,8 +379,8 @@ InitProcess(void) MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId); + pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId); MyProc->pid = MyProcPid; /* backendId, databaseId and roleId will be filled in later */ MyProc->backendId = InvalidBackendId; @@ -574,8 +576,8 @@ InitAuxiliaryProcess(void) MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId); + pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId); MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 9a302ddc30..572c6d6b63 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -21,11 +21,31 @@ #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" /* GUC variable */ bool ignore_checksum_failure = false; +/* + * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format + * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then + * all xmin's are frozen (can do this for all live tuples after pg_upgrade), + * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax. + * This is so-called "double xmax" format. + */ +static HeapPageSpecialData heapDoubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId, + .pd_multi_base = MaxTransactionId +}; +HeapPageSpecial heapDoubleXmaxSpecial = &heapDoubleXmaxSpecialData; + +static ToastPageSpecialData toastDoubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId +}; +ToastPageSpecial toastDoubleXmaxSpecial = &toastDoubleXmaxSpecialData; /* ---------------------------------------------------------------- * Page support functions @@ -432,15 +452,144 @@ PageRestoreTempPage(Page tempPage, Page oldPage) } /* - * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + * Get minimum and maximum values of xid and multixact on "double xmax" page. */ -typedef struct itemIdCompactData +static void +heap_page_double_xmax_get_min_max(Page page, + TransactionId *xid_min, + TransactionId *xid_max, + MultiXactId *multi_min, + MultiXactId *multi_max) { - uint16 offsetindex; /* linp array index */ - int16 itemoff; /* page offset of item data */ - uint16 alignedlen; /* MAXALIGN(item data len) */ -} itemIdCompactData; -typedef itemIdCompactData *itemIdCompact; + bool xid_found = false, + multi_found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + TransactionId xmax; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + xmax = HeapTupleHeaderGetDoubleXmax(htup); + + if (!TransactionIdIsNormal(xmax)) + continue; + + if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!xid_found) + { + *xid_min = *xid_max = xmax; + xid_found = true; + } + else + { + *xid_min = Min(*xid_min, xmax); + *xid_max = Max(*xid_max, xmax); + } + } + else + { + if (!multi_found) + { + *multi_min = *multi_max = xmax; + multi_found = true; + } + else + { + *multi_min = Min(*multi_min, xmax); + *multi_max = Max(*multi_max, xmax); + } + } + } +} + +/* + * Add special area to heap page, so convert from "double xmax" to normal + * format. + */ +static void +heap_page_add_special_area(ItemIdCompact itemidbase, int nitems, Page page, + TransactionId xid_base, MultiXactId multi_base, + bool is_toast) +{ + char newPage[BLCKSZ]; + PageHeader phdr = (PageHeader) page; + PageHeader new_phdr = (PageHeader) newPage; + Offset upper; + int i; + + memcpy(newPage, page, phdr->pd_lower); + + /* Add special area */ + if (is_toast) + { + ToastPageSpecial special; + + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(ToastPageSpecialData); + special = (ToastPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData); + special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + + /* sort itemIdSortData array into decreasing itemoff order */ + qsort((char *) itemidbase, nitems, sizeof(ItemIdCompactData), + itemoffcompare); + + upper = new_phdr->pd_special; + for (i = 0; i < nitems; i++) + { + ItemIdCompact itemidptr = &itemidbase[i]; + ItemId lp; + HeapTupleHeader old_htup; + HeapTupleHeader new_htup; + TransactionId xmax; + + lp = PageGetItemId(page, itemidptr->offsetindex + 1); + old_htup = (HeapTupleHeader) PageGetItem(page, lp); + upper -= itemidptr->alignedlen; + memcpy((Pointer) newPage + upper, + (Pointer) page + itemidptr->itemoff, + itemidptr->alignedlen); + lp = PageGetItemId(newPage, itemidptr->offsetindex + 1); + lp->lp_off = upper; + new_htup = (HeapTupleHeader) PageGetItem(newPage, lp); + + /* Convert xmax value */ + new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId; + xmax = HeapTupleHeaderGetDoubleXmax(old_htup); + if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI)) + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax); + else + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax); + } + + new_phdr->pd_upper = upper; + + memcpy(page, newPage, PageGetPageSize(newPage)); + elog(DEBUG2, "convert heap page from double xmax to normal format"); +} /* * After removing or marking some line pointers unused, move the tuples to @@ -471,21 +620,47 @@ typedef itemIdCompactData *itemIdCompact; * Callers must ensure that nitems is > 0 */ static void -compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted) +compactify_tuples(ItemIdCompact itemidbase, int nitems, Page page, + bool presorted, bool addspecial, bool is_toast) { PageHeader phdr = (PageHeader) page; Offset upper; Offset copy_tail; Offset copy_head; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; int i; /* Code within will not work correctly if nitems == 0 */ Assert(nitems > 0); - if (presorted) + /* Add special area to the heap page if possible */ + if (addspecial) { + TransactionId xid_min = FirstNormalTransactionId, + xid_max = FirstNormalTransactionId; + MultiXactId multi_min = FirstNormalTransactionId, + multi_max = FirstNormalTransactionId; + Assert(phdr->pd_special == PageGetPageSize(page)); + + heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max, + &multi_min, &multi_max); + + if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) && + multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId)) + { + Assert(xid_min >= FirstNormalTransactionId); + Assert(multi_min >= FirstNormalTransactionId); + heap_page_add_special_area(itemidbase, nitems, page, + xid_min - FirstNormalTransactionId, + multi_min - FirstNormalTransactionId, + is_toast); + return; + } + } + + if (presorted) + { #ifdef USE_ASSERT_CHECKING { /* @@ -696,14 +871,14 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte * the line pointer array following array truncation. */ void -PageRepairFragmentation(Page page) +PageRepairFragmentation(Page page, bool is_toast) { Offset pd_lower = ((PageHeader) page)->pd_lower; Offset pd_upper = ((PageHeader) page)->pd_upper; Offset pd_special = ((PageHeader) page)->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxHeapTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompactData itemidbase[MaxHeapTuplesPerPage]; + ItemIdCompact itemidptr; ItemId lp; int nline, nstorage, @@ -777,11 +952,30 @@ PageRepairFragmentation(Page page) nstorage = itemidptr - itemidbase; if (nstorage == 0) { + if (pd_special == PageGetPageSize(page)) + { + if (is_toast) + { + pd_special = PageGetPageSize(page) - sizeof(ToastPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + ToastPageGetSpecial(page)->pd_xid_base = 0; + } + else + { + pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + HeapPageGetSpecial(page)->pd_xid_base = 0; + HeapPageGetSpecial(page)->pd_multi_base = 0; + } + } + /* Page is completely empty, so just reset it quickly */ ((PageHeader) page)->pd_upper = pd_special; } else { + bool addspecial = false; + /* Need to compact the page the hard way */ if (totallen > (Size) (pd_special - pd_lower)) ereport(ERROR, @@ -789,7 +983,25 @@ PageRepairFragmentation(Page page) errmsg("corrupted item lengths: total %u, available space %u", (unsigned int) totallen, pd_special - pd_lower))); - compactify_tuples(itemidbase, nstorage, page, presorted); + /* + * Try to add special area to the heap page if it has enough of free + * space. + */ + if (pd_special == PageGetPageSize(page)) + { + Size special_size, + actual_size; + + special_size = is_toast ? sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + actual_size = (Size) (pd_special - pd_lower) - totallen; + + if (actual_size >= special_size) + addspecial = true; + } + + compactify_tuples(itemidbase, nstorage, page, presorted, addspecial, + is_toast); } if (finalusedlp != nline) @@ -992,6 +1204,9 @@ PageGetHeapFreeSpace(Page page) { Size space; + if (HeapPageIsDoubleXmax(page)) + return 0; + space = PageGetFreeSpace(page); if (space > 0) { @@ -1165,9 +1380,9 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) Offset pd_upper = phdr->pd_upper; Offset pd_special = phdr->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxIndexTuplesPerPage]; + ItemIdCompactData itemidbase[MaxIndexTuplesPerPage]; ItemIdData newitemids[MaxIndexTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; ItemId lp; int nline, nused; @@ -1275,7 +1490,12 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) /* and compactify the tuple data */ if (nused > 0) - compactify_tuples(itemidbase, nused, page, presorted); + { + bool is_toast; + + is_toast = BLCKSZ - pd_special == sizeof(ToastPageSpecialData); + compactify_tuples(itemidbase, nused, page, presorted, false, is_toast); + } else phdr->pd_upper = pd_special; } diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index fdfdf7d0d2..5886c60200 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup) * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ - xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); + xmin = HeapTupleGetXmin(enumval_tup); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index aa37c401e5..7b5e522465 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -3310,6 +3310,7 @@ populate_record(TupleDesc tupdesc, tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = defaultval; /* Break down the tuple into fields */ @@ -3756,6 +3757,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj) tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = tuphead; tuplestore_puttuple(state->tuple_store, &tuple); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 974aa4fb43..a1e9406009 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -79,7 +79,7 @@ VXIDGetDatum(BackendId bid, LocalTransactionId lxid) * The representation is "/", decimal and unsigned decimal * respectively. Note that elog.c also knows how to format a vxid. */ - char vxidstr[32]; + char vxidstr[64]; snprintf(vxidstr, sizeof(vxidstr), "%d/%llu", bid, (unsigned long long) lxid); @@ -293,7 +293,9 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_TRANSACTION: values[6] = - TransactionIdGetDatum(instance->locktag.locktag_field1); + TransactionIdGetDatum( + (TransactionId) instance->locktag.locktag_field1 | + ((TransactionId) instance->locktag.locktag_field2 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; @@ -305,7 +307,8 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, - instance->locktag.locktag_field2); + (TransactionId) instance->locktag.locktag_field2 | + ((TransactionId) instance->locktag.locktag_field3 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 0cea320c00..77c54f5d9f 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/xact.h" #include "access/xlog.h" #include "access/xlogprefetcher.h" #include "catalog/catalog.h" diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index eb8fe95933..37abb9061f 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -354,6 +354,7 @@ record_out(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; /* @@ -712,6 +713,7 @@ record_send(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; /* @@ -862,10 +864,12 @@ record_cmp(FunctionCallInfo fcinfo) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple1); tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple2); tuple2.t_data = record2; /* @@ -1107,10 +1111,12 @@ record_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1369,10 +1375,12 @@ record_image_cmp(FunctionCallInfo fcinfo) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1615,10 +1623,12 @@ record_image_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1818,6 +1828,7 @@ hash_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroXids(&tuple); /* * We arrange to look up the needed hashing info just once per series of @@ -1939,6 +1950,7 @@ hash_record_extended(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroXids(&tuple); /* * We arrange to look up the needed hashing info just once per series of diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index 8ac1679c38..8e0830f4cc 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -33,7 +33,7 @@ xidin(PG_FUNCTION_ARGS) char *str = PG_GETARG_CSTRING(0); TransactionId result; - result = uint32in_subr(str, NULL, "xid", fcinfo->context); + result = uint64in_subr(str, NULL, "xid", fcinfo->context); PG_RETURN_TRANSACTIONID(result); } @@ -41,9 +41,9 @@ Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); - char *result = (char *) palloc(16); + char *result = (char *) palloc(32); - snprintf(result, 16, "%lu", (unsigned long) transactionId); + snprintf(result, 32, "%llu", (unsigned long long) transactionId); PG_RETURN_CSTRING(result); } @@ -54,8 +54,13 @@ Datum xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint32 lo, + hi; + + lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); + hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); - PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); + PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32)); } /* @@ -66,9 +71,15 @@ xidsend(PG_FUNCTION_ARGS) { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; + uint32 lo, + hi; + + lo = (uint32) (arg1 & 0xFFFFFFFF); + hi = (uint32) (arg1 >> 32); pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); + pq_sendint(&buf, lo, sizeof(lo)); + pq_sendint(&buf, hi, sizeof(hi)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -107,9 +118,9 @@ xid_age(PG_FUNCTION_ARGS) /* Permanent XIDs are always infinitely old */ if (!TransactionIdIsNormal(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -122,9 +133,9 @@ mxid_age(PG_FUNCTION_ARGS) MultiXactId now = ReadNextMultiXactId(); if (!MultiXactIdIsValid(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -188,7 +199,7 @@ xid8in(PG_FUNCTION_ARGS) uint64 result; result = uint64in_subr(str, NULL, "xid8", fcinfo->context); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(result)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(result)); } Datum @@ -197,7 +208,7 @@ xid8out(PG_FUNCTION_ARGS) FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); char *result = (char *) palloc(21); - snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid)); + snprintf(result, 21, UINT64_FORMAT, XidFromFullTransactionId(fxid)); PG_RETURN_CSTRING(result); } @@ -208,7 +219,7 @@ xid8recv(PG_FUNCTION_ARGS) uint64 value; value = (uint64) pq_getmsgint64(buf); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(value)); } Datum @@ -218,7 +229,7 @@ xid8send(PG_FUNCTION_ARGS) StringInfoData buf; pq_begintypsend(&buf); - pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1)); + pq_sendint64(&buf, (uint64) XidFromFullTransactionId(arg1)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 06ae940df6..becd220981 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -88,8 +88,7 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP, * It is an ERROR if the xid is in the future. Otherwise, returns true if * the transaction is still new enough that we can determine whether it * committed and false otherwise. If *extracted_xid is not NULL, it is set - * to the low 32 bits of the transaction ID (i.e. the actual XID, without the - * epoch). + * to the actual transaction ID. * * The caller must hold XactTruncationLock since it's dealing with arbitrary * XIDs, and must continue to hold it until it's done with any clog lookups @@ -98,15 +97,10 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP, static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { - uint32 xid_epoch = EpochFromFullTransactionId(fxid); TransactionId xid = XidFromFullTransactionId(fxid); - uint32 now_epoch; - TransactionId now_epoch_next_xid; FullTransactionId now_fullxid; now_fullxid = ReadNextFullTransactionId(); - now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); - now_epoch = EpochFromFullTransactionId(now_fullxid); if (extracted_xid != NULL) *extracted_xid = xid; @@ -123,7 +117,7 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("transaction ID %llu is in the future", - (unsigned long long) U64FromFullTransactionId(fxid)))); + (unsigned long long) XidFromFullTransactionId(fxid)))); /* * ShmemVariableCache->oldestClogXid is protected by XactTruncationLock, @@ -135,48 +129,15 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) Assert(LWLockHeldByMe(XactTruncationLock)); /* - * If the transaction ID has wrapped around, it's definitely too old to - * determine the commit status. Otherwise, we can compare it to - * ShmemVariableCache->oldestClogXid to determine whether the relevant - * CLOG entry is guaranteed to still exist. + * We compare xid to ShmemVariableCache->oldestClogXid to determine + * whether the relevant CLOG entry is guaranteed to still exist. */ - if (xid_epoch + 1 < now_epoch - || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid) - || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) + if (TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) return false; return true; } -/* - * Convert a TransactionId obtained from a snapshot held by the caller to a - * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that - * we can compute the high order bits. It must have been obtained by the - * caller with ReadNextFullTransactionId() after the snapshot was created. - */ -static FullTransactionId -widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid) -{ - TransactionId next_xid = XidFromFullTransactionId(next_fxid); - uint32 epoch = EpochFromFullTransactionId(next_fxid); - - /* Special transaction ID. */ - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - /* - * The 64 bit result must be <= next_fxid, since next_fxid hadn't been - * issued yet when the snapshot was created. Every TransactionId in the - * snapshot must therefore be from the same epoch as next_fxid, or the - * epoch before. We know this because next_fxid is never allow to get - * more than one epoch ahead of the TransactionIds in any snapshot. - */ - if (xid > next_xid) - epoch--; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - /* * txid comparator for qsort/bsearch */ @@ -303,12 +264,12 @@ parse_snapshot(const char *str, Node *escontext) char *endp; StringInfo buf; - xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmin = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; - xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmax = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; @@ -326,7 +287,7 @@ parse_snapshot(const char *str, Node *escontext) while (*str != '\0') { /* read next value */ - val = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + val = FullTransactionIdFromXid(strtou64(str, &endp, 10)); str = endp; /* require the input to be in order */ @@ -404,7 +365,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS) uint32 nxip, i; Snapshot cur; - FullTransactionId next_fxid = ReadNextFullTransactionId(); cur = GetActiveSnapshot(); if (cur == NULL) @@ -415,11 +375,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS) snap = palloc(PG_SNAPSHOT_SIZE(nxip)); /* fill */ - snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid); - snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid); + snap->xmin = FullTransactionIdFromXid(cur->xmin); + snap->xmax = FullTransactionIdFromXid(cur->xmax); snap->nxip = nxip; for (i = 0; i < nxip; i++) - snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid); + snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]); /* * We want them guaranteed to be in ascending order. This also removes @@ -467,16 +427,16 @@ pg_snapshot_out(PG_FUNCTION_ARGS) initStringInfo(&str); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmin)); + XidFromFullTransactionId(snap->xmin)); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmax)); + XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) { if (i > 0) appendStringInfoChar(&str, ','); appendStringInfo(&str, UINT64_FORMAT, - U64FromFullTransactionId(snap->xip[i])); + XidFromFullTransactionId(snap->xip[i])); } PG_RETURN_CSTRING(str.data); @@ -505,8 +465,8 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP) goto bad_format; - xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); - xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + xmin = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); + xmax = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (!FullTransactionIdIsValid(xmin) || !FullTransactionIdIsValid(xmax) || FullTransactionIdPrecedes(xmax, xmin)) @@ -519,7 +479,7 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) for (i = 0; i < nxip; i++) { FullTransactionId cur = - FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (FullTransactionIdPrecedes(cur, last) || FullTransactionIdPrecedes(cur, xmin) || @@ -564,10 +524,10 @@ pg_snapshot_send(PG_FUNCTION_ARGS) pq_begintypsend(&buf); pq_sendint32(&buf, snap->nxip); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin)); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmin)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i])); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xip[i])); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -655,8 +615,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS) * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * - * The passed epoch-qualified xid is treated as a normal xid, not a - * multixact id. + * The passed xid is treated as a normal xid, not a multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 2e2e4d9f1f..674e9ecd8e 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1908,6 +1908,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, memcpy((char *) ct->tuple.t_data, (const char *) dtp->t_data, dtp->t_len); + HeapTupleCopyXids(&ct->tuple, dtp); MemoryContextSwitchTo(oldcxt); if (dtp != ntp) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index b3faccbefe..c4c1b0dbbd 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2341,8 +2341,7 @@ RelationReloadIndexInfo(Relation relation) relation->rd_index->indisreplident = index->indisreplident; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ - HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleSetXmin(relation->rd_indextuple, HeapTupleGetXmin(tuple)); ReleaseSysCache(tuple); } diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 9dfdf890c5..c3122b317c 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -526,7 +526,7 @@ lookup_C_func(HeapTuple procedureTuple) NULL); if (entry == NULL) return NULL; /* no such entry */ - if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) && + if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ @@ -562,7 +562,7 @@ record_C_func(HeapTuple procedureTuple, HASH_ENTER, &found); /* OID is already filled in */ - entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data); + entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index fd0bc11a00..0b4d2e875d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2556,65 +2556,6 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, - { - {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a table row."), - NULL - }, - &vacuum_freeze_min_age, - 50000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), - NULL - }, - &vacuum_multixact_freeze_min_age, - 5000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_multixact_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - { - {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_multixact_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - /* * See also CheckRequiredParameterValues() if this parameter changes */ @@ -3239,28 +3180,6 @@ struct config_int ConfigureNamesInt[] = 50, 0, INT_MAX, NULL, NULL, NULL }, - { - /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), - NULL - }, - &autovacuum_freeze_max_age, - - /* see vacuum_failsafe_age if you change the upper-limit value. */ - 200000000, 100000, 2000000000, - NULL, NULL, NULL - }, - { - /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), - NULL - }, - &autovacuum_multixact_freeze_max_age, - 400000000, 10000, 2000000000, - NULL, NULL, NULL - }, { /* see max_connections */ {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, @@ -3528,7 +3447,6 @@ struct config_int ConfigureNamesInt[] = SCRAM_SHA_256_DEFAULT_ITERATIONS, 1, INT_MAX, NULL, NULL, NULL }, - /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -3538,6 +3456,87 @@ struct config_int ConfigureNamesInt[] = struct config_int64 ConfigureNamesInt64[] = { + { + {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a table row."), + NULL + }, + &vacuum_freeze_min_age, + INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), + NULL + }, + &vacuum_multixact_freeze_min_age, + INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_multixact_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + { + {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_multixact_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + { + /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), + NULL + }, + &autovacuum_freeze_max_age, + + /* see vacuum_failsafe_age if you change the upper-limit value. */ + INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + { + /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), + NULL + }, + &autovacuum_multixact_freeze_max_age, + INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -3545,6 +3544,7 @@ struct config_int64 ConfigureNamesInt64[] = }; + struct config_real ConfigureNamesReal[] = { { diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c index 94c8a16ac1..6e2ce1e325 100644 --- a/src/backend/utils/misc/help_config.c +++ b/src/backend/utils/misc/help_config.c @@ -33,6 +33,7 @@ typedef union struct config_bool _bool; struct config_real real; struct config_int integer; + struct config_int64 integer8; struct config_string string; struct config_enum _enum; } mixedStruct; @@ -106,7 +107,12 @@ printMixedStruct(mixedStruct *structToPrint) structToPrint->integer.min, structToPrint->integer.max); break; - + case PGC_INT64: + printf("INT64\t%lld\t%lld\t%lld\t", + (long long) structToPrint->integer8.reset_val, + (long long) structToPrint->integer8.min, + (long long) structToPrint->integer8.max); + break; case PGC_REAL: printf("REAL\t%g\t%g\t%g\t", structToPrint->real.reset_val, diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 7cbe700295..b0eb7cf0cf 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -118,7 +118,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) nulls[5] = false; values[6] = CStringGetTextDatum(psprintf("%llu", - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid))); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid))); nulls[6] = false; values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index cf9f283cfe..f97bcdf264 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -653,7 +653,7 @@ #autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table # size before insert vacuum #autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze -#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum +#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum # (change requires restart) #autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age # before forced vacuum diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c index 2cd508e513..97a607ab6c 100644 --- a/src/backend/utils/sort/tuplesortvariants.c +++ b/src/backend/utils/sort/tuplesortvariants.c @@ -1217,11 +1217,16 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) { TuplesortPublic *base = TuplesortstateGetPublic(state); HeapTuple tuple = (HeapTuple) stup->tuple; - unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + unsigned int tuplen = tuple->t_len + + sizeof(ItemPointerData) + + 2 * sizeof(TransactionId) + /* tuple xmin, xmax */ + sizeof(int); /* We need to store t_self, but not other fields of HeapTupleData */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeWrite(tape, &tuple->t_xmax, sizeof(TransactionId)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); @@ -1233,7 +1238,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, { TuplesortPublic *base = TuplesortstateGetPublic(state); TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; - unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + unsigned int t_len = tuplen - + sizeof(ItemPointerData) - + 2 * sizeof(TransactionId) - /* tuple xmin, xmax */ + sizeof(int); HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state, t_len + HEAPTUPLESIZE); @@ -1241,6 +1249,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); tuple->t_len = t_len; LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeReadExact(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeReadExact(tape, &tuple->t_xmax, sizeof(TransactionId)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 0e94bc93f7..f760a78072 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid); */ CommandId -HeapTupleHeaderGetCmin(HeapTupleHeader tup) +HeapTupleGetCmin(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); - Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))); + Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); @@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(HeapTupleHeader tup) } CommandId -HeapTupleHeaderGetCmax(HeapTupleHeader tup) +HeapTupleGetCmax(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); @@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * things too much. */ Assert(CritSectionCount > 0 || - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup))); + TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); @@ -150,9 +152,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * changes the tuple in shared buffers. */ void -HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, - CommandId *cmax, - bool *iscombo) +HeapTupleAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo) { /* * If we're marking a tuple deleted that was inserted by (any @@ -160,10 +160,10 @@ HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ - if (!HeapTupleHeaderXminCommitted(tup) && - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup))) + if (!HeapTupleHeaderXminCommitted(tup->t_data) && + TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup))) { - CommandId cmin = HeapTupleHeaderGetCmin(tup); + CommandId cmin = HeapTupleGetCmin(tup); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 6a4327f917..8091b24341 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -927,15 +927,15 @@ SnapshotResetXmin(void) if (pairingheap_is_empty(&RegisteredSnapshots)) { - MyProc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId); return; } minSnapshot = pairingheap_container(SnapshotData, ph_node, pairingheap_first(&RegisteredSnapshots)); - if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin)) - MyProc->xmin = minSnapshot->xmin; + if (TransactionIdPrecedes(pg_atomic_read_u64(&MyProc->xmin), minSnapshot->xmin)) + pg_atomic_write_u64(&MyProc->xmin, minSnapshot->xmin); } /* @@ -1088,7 +1088,7 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin) if (resetXmin) SnapshotResetXmin(); - Assert(resetXmin || MyProc->xmin == 0); + Assert(resetXmin || pg_atomic_read_u64(&MyProc->xmin) == 0); } @@ -1153,8 +1153,9 @@ ExportSnapshot(Snapshot snapshot) * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ - snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", - MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1); + snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d", + MyProc->backendId, (uint32) (MyProc->lxid >> 32), + (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the @@ -1330,7 +1331,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%u", &val) != 1) + if (sscanf(ptr, "%" INT64_MODIFIER "u", &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1355,7 +1356,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2) + if (sscanf(ptr, "%d/%" INT64_MODIFIER "u", &vxid->backendId, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1896,7 +1897,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt)) return true; /* not there, fall through to search xip[] */ @@ -1918,7 +1919,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) + if (pg_lfind64(xid, snapshot->xip, snapshot->xcnt)) return true; } else @@ -1952,7 +1953,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt)) return true; } diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index add07f7fca..a6ad931c45 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -8,6 +8,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +use Data::Dumper; # This regression test demonstrates that the pg_amcheck binary correctly # identifies specific kinds of corruption within pages. To test this, we need @@ -85,6 +86,65 @@ use Test::More; use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL'; use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size +use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ'; +use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16; +use constant HEAPPAGE_SIZE => 8192; + +# Some #define constants from access/htup_details.h for use while corrupting. +use constant HEAP_HASNULL => 0x0001; +use constant HEAP_XMAX_LOCK_ONLY => 0x0080; +use constant HEAP_XMIN_COMMITTED => 0x0100; +use constant HEAP_XMIN_INVALID => 0x0200; +use constant HEAP_XMAX_COMMITTED => 0x0400; +use constant HEAP_XMAX_INVALID => 0x0800; +use constant HEAP_NATTS_MASK => 0x07FF; +use constant HEAP_XMAX_IS_MULTI => 0x1000; +use constant HEAP_KEYS_UPDATED => 0x2000; +use constant HEAP_HOT_UPDATED => 0x4000; +use constant HEAP_ONLY_TUPLE => 0x8000; +use constant HEAP_UPDATED => 0x2000; + +use constant FIRST_NORMAL_TRANSACTION_ID => 3; + +# Read page special data +sub read_special_data +{ + my ($fh, $offset) = @_; + my ($buffer, %special); + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("sysread failed: $!"); + + @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer); + %special = ( + pd_xid_base => shift, + pd_multi_base => shift); + return \%special; +} + +# Write page special data +sub write_special_data +{ + my ($fh, $offset, $special) = @_; + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + my $buffer = pack( + HEAPPAGE_SPECIAL_PACK_CODE, + $special->{pd_xid_base}, $special->{pd_multi_base}); + + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("syswrite failed: $!"); + return; +} # Read a tuple of our table from a heap page. # @@ -96,8 +156,9 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size # sub read_tuple { - my ($fh, $offset) = @_; + my ($fh, $offset, $raw) = @_; my ($buffer, %tup); + sysseek($fh, $offset, 0) or BAIL_OUT("sysseek failed: $!"); defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH)) @@ -133,6 +194,18 @@ sub read_tuple c_va_toastrelid => shift); # Stitch together the text for column 'b' $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7)); + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + $tup{t_xmin} += $special->{pd_xid_base}; + my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup{t_xmax} += !$is_multi ? + $special->{pd_xid_base} : + $special->{pd_multi_base}; + } + return \%tup; } @@ -148,7 +221,39 @@ sub read_tuple # sub write_tuple { - my ($fh, $offset, $tup) = @_; + my ($fh, $offset, $tup, $raw) = @_; + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + if ($tup->{t_xmin} >= 3) + { + my $xmin = $tup->{t_xmin} - $special->{pd_xid_base}; + die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmin < 3; + $tup->{t_xmin} = $xmin; + } + + if ($tup->{t_xmax} >= 3) + { + if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0) + { + my $xmax = $tup->{t_xmax} - $special->{pd_xid_base}; + die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + else + { + my $xmax = $tup->{t_xmax} - $special->{pd_multi_base}; + die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + } + } + my $buffer = pack( HEAPTUPLE_PACK_CODE, $tup->{t_xmin}, $tup->{t_xmax}, @@ -171,6 +276,42 @@ sub write_tuple return; } +# move pd_xid_base and pd_multi_base to more suitable position for tests. +sub fixup_page +{ + my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_; + my $offset = $page * HEAPPAGE_SIZE; + my $special = read_special_data($fh, $offset); + + die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}" + if ($xid_base > $special->{pd_xid_base}); + die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}" + if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0); + return if ($xid_base == $special->{pd_xid_base} && + $multi_base == $special->{pd_multi_base}); + + my $xid_delta = $special->{pd_xid_base} - $xid_base; + my $multi_delta = $special->{pd_multi_base} - $multi_base; + + for my $off (@$lp_off) + { + # change only tuples on this page. + next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE); + next if ($off == -1); + + my $tup = read_tuple($fh, $off, 1); + $tup->{t_xmin} += $xid_delta; + my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta; + write_tuple($fh, $off, $tup, 1); + } + + $special->{pd_xid_base} = $xid_base; + $special->{pd_multi_base} = $multi_base; + + write_special_data($fh, $offset, $special); +} + # Set umask so test directories and files are created with default permissions umask(0077); @@ -320,6 +461,8 @@ my $relfrozenxid = $node->safe_psql('postgres', q(select relfrozenxid from pg_class where relname = 'test')); my $datfrozenxid = $node->safe_psql('postgres', q(select datfrozenxid from pg_database where datname = 'postgres')); +my $datminmxid = $node->safe_psql('postgres', + q(select datminmxid from pg_database where datname = 'postgres')); # Sanity check that our 'test' table has a relfrozenxid newer than the # datfrozenxid for the database, and that the datfrozenxid is greater than the @@ -377,6 +520,11 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) # Determine endianness of current platform from the 1-byte varlena header $ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big"; } + +# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit +# in the past +fixup_page($file, 0, $datfrozenxid - 100, $datminmxid, \@lp_off); + close($file) or BAIL_OUT("close failed: $!"); $node->start; @@ -394,20 +542,6 @@ $node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ], $node->stop; -# Some #define constants from access/htup_details.h for use while corrupting. -use constant HEAP_HASNULL => 0x0001; -use constant HEAP_XMAX_LOCK_ONLY => 0x0080; -use constant HEAP_XMIN_COMMITTED => 0x0100; -use constant HEAP_XMIN_INVALID => 0x0200; -use constant HEAP_XMAX_COMMITTED => 0x0400; -use constant HEAP_XMAX_INVALID => 0x0800; -use constant HEAP_NATTS_MASK => 0x07FF; -use constant HEAP_XMAX_IS_MULTI => 0x1000; -use constant HEAP_KEYS_UPDATED => 0x2000; -use constant HEAP_HOT_UPDATED => 0x4000; -use constant HEAP_ONLY_TUPLE => 0x8000; -use constant HEAP_UPDATED => 0x2000; - # Helper function to generate a regular expression matching the header we # expect verify_heapam() to return given which fields we expect to be non-null. sub header @@ -442,6 +576,8 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) # Read tuple, if there is one. my $tup = $offset == -1 ? undef : read_tuple($file, $offset); + # Read page special, if there is one. + my $special = $offset == -1 ? undef : read_special_data($file, $offset); if ($offnum == 1) { @@ -458,7 +594,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) elsif ($offnum == 2) { # Corruptly set xmin < datfrozenxid - my $xmin = 3; + my $xmin = $datfrozenxid - 12; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; @@ -468,25 +604,24 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) } elsif ($offnum == 3) { - # Corruptly set xmin < datfrozenxid, further back, noting circularity - # of xid comparison. - my $xmin = 4026531839; + # Corruptly set xmin > next transaction id. + my $xmin = $relfrozenxid + 4026531839; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin ${xmin} precedes oldest valid transaction ID \d+/; + qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 4) { - # Corruptly set xmax < relminmxid; - my $xmax = 4026531839; + # Corruptly set xmax > relminmxid; + my $xmax = $relfrozenxid + 4026531839; $tup->{t_xmax} = $xmax; $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; push @expected, - qr/${$header}xmax ${xmax} precedes oldest valid transaction ID \d+/; + qr/${$header}xmax ${xmax} equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 5) { @@ -602,7 +737,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) $tup->{t_xmax} = 4000000000; push @expected, - qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; + qr/${header}multitransaction ID 4000000000 equals or exceeds next valid multitransaction ID 1/; } elsif ($offnum == 16) # Last offnum must equal ROWCOUNT { diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 76837bc72c..6178b6f345 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -247,7 +247,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); printf(_("Latest checkpoint's NextXID: %llu\n"), - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid)); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %llu\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2273ff873..41f7cb16ab 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -41,6 +41,7 @@ #include "access/attnum.h" #include "access/sysattr.h" #include "access/transam.h" +#include "c.h" #include "catalog/pg_aggregate_d.h" #include "catalog/pg_am_d.h" #include "catalog/pg_attribute_d.h" @@ -2985,7 +2986,7 @@ dumpDatabase(Archive *fout) *datistemplate, *datconnlimit, *tablespace; - uint32 frozenxid, + uint64 frozenxid, minmxid; char *qdatname; @@ -3055,8 +3056,8 @@ dumpDatabase(Archive *fout) icurules = PQgetvalue(res, 0, i_daticurules); else icurules = NULL; - frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); - minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); + frozenxid = strtou64(PQgetvalue(res, 0, i_frozenxid), NULL, 0); + minmxid = strtou64(PQgetvalue(res, 0, i_minmxid), NULL, 0); dbdacl.acl = PQgetvalue(res, 0, i_datacl); dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault); datistemplate = PQgetvalue(res, 0, i_datistemplate); @@ -3352,10 +3353,16 @@ dumpDatabase(Archive *fout) RelFileNumber relfilenumber; appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '%llu', relminmxid = '%llu'\n" "WHERE oid = %u;\n", - atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)), - atooid(PQgetvalue(lo_res, i, ii_relminmxid)), + (unsigned long long) strtou64(PQgetvalue(lo_res, + i, + ii_relfrozenxid), + NULL, 0), + (unsigned long long) strtou64(PQgetvalue(lo_res, + i, + ii_relminmxid), + NULL, 0), atooid(PQgetvalue(lo_res, i, ii_oid))); oid = atooid(PQgetvalue(lo_res, i, ii_oid)); @@ -6665,11 +6672,11 @@ getTables(Archive *fout, int *numTables) tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident)); tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0); tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0); - tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid)); - tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid)); + tblinfo[i].frozenxid = strtou64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0); + tblinfo[i].toast_frozenxid = strtou64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0); tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid)); - tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid)); - tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid)); + tblinfo[i].minmxid = strtou64(PQgetvalue(res, i, i_relminmxid), NULL, 0); + tblinfo[i].toast_minmxid = strtou64(PQgetvalue(res, i, i_toastminmxid), NULL, 0); tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions)); if (PQgetisnull(res, i, i_checkoption)) tblinfo[i].checkoption = NULL; diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 2fe3cbed9a..8a5d2eb1fa 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -299,11 +299,11 @@ typedef struct _tableInfo bool rowsec; /* is row security enabled? */ bool forcerowsec; /* is row security forced? */ bool hasoids; /* does it have OIDs? */ - uint32 frozenxid; /* table's relfrozenxid */ - uint32 minmxid; /* table's relminmxid */ + uint64 frozenxid; /* table's relfrozenxid */ + uint64 minmxid; /* table's relminmxid */ Oid toast_oid; /* toast table's OID, or 0 if none */ - uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */ - uint32 toast_minmxid; /* toast table's relminmxid */ + uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */ + uint64 toast_minmxid; /* toast table's relminmxid */ int ncheck; /* # of CHECK expressions */ Oid reltype; /* OID of table's composite type, if any */ Oid reloftype; /* underlying type for typed table */ diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 6f0af6471e..1603977542 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */ static XLogSegNo newXlogSegNo; /* new XLOG segment # */ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; -static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_oldest_xid = 0; static TransactionId set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; @@ -95,7 +94,6 @@ main(int argc, char *argv[]) static struct option long_options[] = { {"commit-timestamp-ids", required_argument, NULL, 'c'}, {"pgdata", required_argument, NULL, 'D'}, - {"epoch", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"next-wal-file", required_argument, NULL, 'l'}, {"multixact-ids", required_argument, NULL, 'm'}, @@ -137,7 +135,7 @@ main(int argc, char *argv[]) } - while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1) { switch (c) { @@ -153,24 +151,9 @@ main(int argc, char *argv[]) noupdate = true; break; - case 'e': - errno = 0; - set_xid_epoch = strtoul(optarg, &endptr, 0); - if (endptr == optarg || *endptr != '\0' || errno != 0) - { - /*------ - translator: the second %s is a command line argument (-e, etc) */ - pg_log_error("invalid argument for option %s", "-e"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit(1); - } - if (set_xid_epoch == -1) - pg_fatal("transaction ID epoch (-e) must not be -1"); - break; - case 'u': errno = 0; - set_oldest_xid = strtoul(optarg, &endptr, 0); + set_oldest_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-u"); @@ -184,7 +167,7 @@ main(int argc, char *argv[]) case 'x': errno = 0; - set_xid = strtoul(optarg, &endptr, 0); + set_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-x"); @@ -198,14 +181,14 @@ main(int argc, char *argv[]) case 'c': errno = 0; - set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0); + set_oldest_commit_ts_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } - set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0); + set_newest_commit_ts_xid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); @@ -241,7 +224,7 @@ main(int argc, char *argv[]) case 'm': errno = 0; - set_mxid = strtoul(optarg, &endptr, 0); + set_mxid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -249,7 +232,7 @@ main(int argc, char *argv[]) exit(1); } - set_oldestmxid = strtoul(endptr + 1, &endptr2, 0); + set_oldestmxid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -269,7 +252,7 @@ main(int argc, char *argv[]) case 'O': errno = 0; - set_mxoff = strtoul(optarg, &endptr, 0); + set_mxoff = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-O"); @@ -415,11 +398,6 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ - if (set_xid_epoch != -1) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(set_xid_epoch, - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - if (set_oldest_xid != 0) { ControlFile.checkPointCopy.oldestXid = set_oldest_xid; @@ -427,9 +405,7 @@ main(int argc, char *argv[]) } if (set_xid != 0) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - set_xid); + ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid); if (set_oldest_commit_ts_xid != 0) ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid; @@ -664,7 +640,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.PrevTimeLineID = 1; ControlFile.checkPointCopy.fullPageWrites = false; ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + FullTransactionIdFromXid(FirstNormalTransactionId); ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; ControlFile.checkPointCopy.nextMultiOffset = 0; @@ -715,6 +691,8 @@ GuessControlValues(void) * * NB: this display should be just those fields that will not be * reset by RewriteControlFile(). + * + * Special macros help to make translatable strings. */ static void PrintControlValues(bool guessed) @@ -734,8 +712,7 @@ PrintControlValues(bool guessed) ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); - printf(_("Latest checkpoint's NextXID: %u:%llu\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), + printf(_("Latest checkpoint's NextXID: %llu\n"), (unsigned long long) XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); @@ -833,12 +810,6 @@ PrintNewControlValues(void) ControlFile.checkPointCopy.oldestXidDB); } - if (set_xid_epoch != -1) - { - printf(_("NextXID epoch: %u\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - } - if (set_oldest_commit_ts_xid != 0) { printf(_("oldestCommitTsXid: %llu\n"), diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl index 18d0882cb1..471503c6b3 100644 --- a/src/bin/pg_resetwal/t/001_basic.pl +++ b/src/bin/pg_resetwal/t/001_basic.pl @@ -95,15 +95,6 @@ command_fails_like( [ 'pg_resetwal', '-c', '10,1', $node->data_dir ], qr/greater than/, 'fails with -c value 1 part 2'); -# -e -command_fails_like( - [ 'pg_resetwal', '-e', 'foo', $node->data_dir ], - qr/error: invalid argument for option -e/, - 'fails with incorrect -e option'); -command_fails_like( - [ 'pg_resetwal', '-e', '-1', $node->data_dir ], - qr/must not be -1/, - 'fails with -e value -1'); # -l command_fails_like( [ 'pg_resetwal', '-l', 'foo', $node->data_dir ], @@ -181,7 +172,6 @@ my $blcksz = $1; my @cmd = ('pg_resetwal', '-D', $node->data_dir); # some not-so-critical hardcoded values -push @cmd, '-e', 1; push @cmd, '-l', '00000001000000320000004B'; push @cmd, '-o', 100_000; push @cmd, '--wal-segsize', 1; @@ -205,8 +195,10 @@ push @cmd, '-c', sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1])); +my $A = 2; +my $B = 1; @files = get_slru_files('pg_multixact/offsets'); -$mult = 32 * $blcksz / 4; +$mult = $A * $blcksz / $B; # -m argument is "new,old" push @cmd, '-m', sprintf("%d,%d", @@ -214,11 +206,11 @@ push @cmd, '-m', hex($files[0]) == 0 ? 1 : hex($files[0] * $mult)); @files = get_slru_files('pg_multixact/members'); -$mult = 32 * int($blcksz / 20) * 4; +$mult = $A * int($blcksz / 20) * $B; push @cmd, '-O', (hex($files[-1]) + 1) * $mult; @files = get_slru_files('pg_xact'); -$mult = 32 * $blcksz * 4; +$mult = $A * $blcksz * $B; push @cmd, '-u', (hex($files[0]) == 0 ? 3 : hex($files[0]) * $mult), '-x', ((hex($files[-1]) + 1) * $mult); diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index bde91e2beb..6de46f8c2d 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -23,6 +23,7 @@ OBJS = \ parallel.o \ pg_upgrade.o \ relfilenumber.o \ + segresize.o \ server.o \ tablespace.o \ util.o \ diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index fa52aa2c22..3d34681699 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -35,6 +35,8 @@ static void check_for_new_tablespace_dir(void); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); static void check_new_cluster_logical_replication_slots(void); static void check_old_cluster_for_valid_slots(bool live_check); +static void check_for_32bit_xid_usage(ClusterInfo *cluster); +static bool is_xid_wraparound(ClusterInfo *cluster); /* @@ -84,7 +86,7 @@ output_check_banner(bool live_check) void -check_and_dump_old_cluster(bool live_check) +check_and_dump_old_cluster(bool live_check, bool *is_wraparound) { /* -- OLD -- */ @@ -197,6 +199,17 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903) old_9_3_check_for_line_data_type_usage(&old_cluster); + /* Prepare for 64bit xid */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -204,6 +217,8 @@ check_and_dump_old_cluster(bool live_check) if (!user_opts.check) generate_old_dump(); + *is_wraparound = is_xid_wraparound(&old_cluster); + if (!live_check) stop_postmaster(false); } @@ -274,6 +289,17 @@ issue_warnings_and_set_wal_level(void) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) old_9_6_invalidate_hash_indexes(&new_cluster, false); + /* Raindex for 64bit xid */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + report_extension_updates(&new_cluster); stop_postmaster(false); @@ -1613,3 +1639,124 @@ check_old_cluster_for_valid_slots(bool live_check) check_ok(); } + +/* + * check_for_32bit_xid_usage() + * + * Postgres Pro Enterprise changes xid storage format to 64-bit. Check if + * xid type is used in tables. + */ +static void +check_for_32bit_xid_usage(ClusterInfo *cluster) +{ + int dbnum; + FILE *script = NULL; + bool found = false; + char output_path[MAXPGPATH]; + + prep_status("Checking for incompatible \"xid\" data type"); + + snprintf(output_path, sizeof(output_path), "tables_using_xid.txt"); + + for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + PGresult *res; + bool db_used = false; + int ntups; + int rowno; + int i_nspname, + i_relname, + i_attname; + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + /* + * While several relkinds don't store any data, e.g. views, they can + * be used to define data types of other columns, so we check all + * relkinds. + */ + res = executeQueryOrDie(conn, + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " a.attnum >= 1 AND " + " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname NOT IN ('pg_catalog', 'information_schema')"); + + ntups = PQntuples(res); + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + for (rowno = 0; rowno < ntups; rowno++) + { + found = true; + if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %s\n", + output_path, strerror(errno)); + if (!db_used) + { + fprintf(script, "Database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + PQclear(res); + + PQfinish(conn); + } + + if (script) + fclose(script); + + if (found) + { + pg_log(PG_REPORT, "fatal"); + pg_fatal("Your installation contains the \"xid\" data type in user tables.\n" + "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n" + "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n" + "You can remove the problem tables and restart the upgrade.\n" + "A list of the problem columns is in the file:\n" + " %s", output_path); + } + else + check_ok(); +} + +/* + * is_xid_wraparound() + * + * Return true if 32-xid cluster had wraparound. + */ +static bool +is_xid_wraparound(ClusterInfo *cluster) +{ + PGconn *conn; + PGresult *res; + bool is_wraparound; + + conn = connectToServer(cluster, "template1"); + + /* + * txid_current is extended with an "epoch" counter, so to check + * wraparound in old 32-xid cluster we cut epoch by casting to int4. + */ + res = executeQueryOrDie(conn, + "SELECT 1 " + "FROM pg_catalog.pg_database, txid_current() tx " + "WHERE (tx %% 4294967295)::bigint <= datfrozenxid::text::bigint " + "LIMIT 1"); + is_wraparound = PQntuples(res) ? true : false; + PQclear(res); + PQfinish(conn); + + return is_wraparound; +} diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 8ef6204137..eb6b8a1027 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -288,6 +288,8 @@ get_control_data(ClusterInfo *cluster, bool live_check) xid.value = strtou64(p, NULL, 10); /* + * Try to read 32-bit XID format 'epoch:xid'. + * * Delimiter changed from '/' to ':' in 9.6. We don't test for * the catalog version of the change because the catalog version * is pulled from pg_controldata too, and it isn't worth adding an @@ -303,8 +305,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) if (p == NULL) { /* FullTransactionId representation */ - cluster->controldata.chkpnt_nxtxid = XidFromFullTransactionId(xid); - cluster->controldata.chkpnt_nxtepoch = EpochFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = xid.value; } else { @@ -313,8 +314,8 @@ get_control_data(ClusterInfo *cluster, bool live_check) /* Epoch:Xid representation */ p++; /* remove '/' or ':' char */ - cluster->controldata.chkpnt_nxtxid = str2uint(p); - cluster->controldata.chkpnt_nxtepoch = (TransactionId) XidFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = (XidFromFullTransactionId(xid)) << 32 | + (TransactionId) str2uint(p); } got_xid = true; @@ -338,7 +339,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmulti = str2uint(p); + cluster->controldata.chkpnt_nxtmulti = strtou64(p, NULL, 10); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL) @@ -349,7 +350,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstxid = str2uint(p); + cluster->controldata.chkpnt_oldstxid = strtou64(p, NULL, 10); got_oldestxid = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) @@ -360,7 +361,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstMulti = str2uint(p); + cluster->controldata.chkpnt_oldstMulti = strtou64(p, NULL, 10); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) @@ -371,7 +372,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmxoff = str2uint(p); + cluster->controldata.chkpnt_nxtmxoff = strtou64(p, NULL, 10); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index d173602882..ae00f4674f 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -174,7 +174,8 @@ linkFile(const char *src, const char *dst, */ void rewriteVisibilityMap(const char *fromfile, const char *tofile, - const char *schemaName, const char *relName) + const char *schemaName, const char *relName, + bool update_version) { int src_fd; int dst_fd; @@ -290,6 +291,11 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, if (old_lastpart && empty) break; + if (update_version) + PageSetPageSizeAndVersion((Page) new_vmbuf.data, + PageGetPageSize((Page) new_vmbuf.data), + PG_PAGE_LAYOUT_VERSION); + /* Set new checksum for visibility map page, if enabled */ if (new_cluster.controldata.data_checksum_version != 0) ((PageHeader) new_vmbuf.data)->pd_checksum = @@ -316,6 +322,97 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, close(src_fd); } +/* + * updateSegmentVersion() + * + * Transform a segment file, copying from src to dst. + * schemaName/relName are relation's SQL name (used for error messages only). + * + * Read segment pages one by one and set version to PG_PAGE_LAYOUT_VERSION. + * + * Although FSM and MV formats does not change while switch to 64-bit XIDs, we + * must upgrade pages version in order to avoid lazy conversion on first read. + */ +void +updateSegmentPagesVersion(const char *fromfile, const char *tofile, + const char *schemaName, const char *relName) +{ + int src_fd; + int dst_fd; + struct stat statbuf; + ssize_t src_filesize; + ssize_t totalBytesRead; + ssize_t bytesRead; + BlockNumber blkno; + PGAlignedBlock buf; + + if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + + if (fstat(src_fd, &statbuf) != 0) + pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + + if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s", + schemaName, relName, tofile, strerror(errno)); + + /* Save old file size */ + src_filesize = statbuf.st_size; + totalBytesRead = 0; + blkno = 0; + + while (totalBytesRead < src_filesize) + { + errno = 0; + if ((bytesRead = read(src_fd, buf.data, BLCKSZ)) != BLCKSZ) + { + if (bytesRead < 0) + pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + else + pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"", + schemaName, relName, fromfile); + } + + totalBytesRead += BLCKSZ; + PageSetPageSizeAndVersion((Page) buf.data, + PageGetPageSize((Page) buf.data), + PG_PAGE_LAYOUT_VERSION); + + /* Set new checksum for page, if enabled */ + if (new_cluster.controldata.data_checksum_version != 0) + ((PageHeader) buf.data)->pd_checksum = + pg_checksum_page(buf.data, blkno); + + /* + * We dealing here only with FSM and VM pages. + */ + if (((PageHeader) buf.data)->pd_lower != SizeOfPageHeaderData || + ((PageHeader) buf.data)->pd_upper != BLCKSZ) + pg_fatal("error while copying relation \"%s.%s\": unknown page format found in file \"%s\"", + schemaName, relName, fromfile); + + errno = 0; + if (write(dst_fd, buf.data, BLCKSZ) != BLCKSZ) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s", + schemaName, relName, tofile, strerror(errno)); + } + + blkno++; + } + + /* Clean up */ + close(dst_fd); + close(src_fd); +} + void check_file_clone(void) { diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build index 3e8a08e062..12243fd4f3 100644 --- a/src/bin/pg_upgrade/meson.build +++ b/src/bin/pg_upgrade/meson.build @@ -12,6 +12,7 @@ pg_upgrade_sources = files( 'parallel.c', 'pg_upgrade.c', 'relfilenumber.c', + 'segresize.c', 'server.c', 'tablespace.c', 'util.c', diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index c4ab30c773..f36e769f0a 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -44,6 +44,9 @@ #include #endif +#include "access/multixact.h" +#include "access/transam.h" +#include "access/xlog_internal.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -54,7 +57,7 @@ static void set_locale_and_encoding(void); static void prepare_new_cluster(void); static void prepare_new_globals(void); -static void create_new_objects(void); +static void create_new_objects(bool is_wraparound); static void copy_xact_xlog_xid(void); static void set_frozenxids(bool minmxid_only); static void make_outputdirs(char *pgdata); @@ -82,6 +85,7 @@ main(int argc, char **argv) { char *deletion_script_file_name = NULL; bool live_check = false; + bool is_wraparound = false; /* * pg_upgrade doesn't currently use common/logging.c, but initialize it @@ -127,7 +131,7 @@ main(int argc, char **argv) check_cluster_compatibility(live_check); - check_and_dump_old_cluster(live_check); + check_and_dump_old_cluster(live_check, &is_wraparound); /* -- NEW -- */ @@ -160,7 +164,7 @@ main(int argc, char **argv) prepare_new_globals(); - create_new_objects(); + create_new_objects(is_wraparound); stop_postmaster(false); @@ -510,7 +514,7 @@ prepare_new_globals(void) static void -create_new_objects(void) +create_new_objects(bool is_wraparound) { int dbnum; @@ -602,11 +606,23 @@ create_new_objects(void) check_ok(); /* - * We don't have minmxids for databases or relations in pre-9.3 clusters, - * so set those after we have restored the schema. + * Refix datfrozenxid and datminmxid */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902) set_frozenxids(true); + else if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + { + /* + * During upgrade from 32-bit to 64-bit xids save relfrozenxids if + * there was no wraparound in old cluster. Otherwise, reset them to + * FirstNormalTransactionId value. + */ + if (is_wraparound) + set_frozenxids(false); + else + set_frozenxids(true); + } /* update new_cluster info now that we have objects in the databases */ get_db_rel_and_slot_infos(&new_cluster, false); @@ -660,14 +676,37 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) static void copy_xact_xlog_xid(void) { - /* - * Copy old commit logs to new data dir. pg_clog has been renamed to - * pg_xact in post-10 clusters. - */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); + TransactionId next_xid; + +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + + /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + next_xid = ((TransactionId) 1 << 32); + else + next_xid = old_cluster.controldata.chkpnt_nxtxid; + + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + { + /* Convert commit logs and copy to the new data dir */ + prep_status("Transforming commit log segments"); + convert_xact(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)), + psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster))); + check_ok(); + } + else + { + /* + * Copy old commit logs to new data dir. pg_clog has been renamed to + * pg_xact in post-10 clusters. + */ + prep_status("Copying commit log segments"); + copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster)); + check_ok(); + } prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, @@ -681,19 +720,20 @@ copy_xact_xlog_xid(void) prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -x %llu \"%s\"", - new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + new_cluster.bindir, (unsigned long long) next_xid, new_cluster.pgdata); +#ifdef NOT_USED exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); +#endif /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -c %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + (unsigned long long) next_xid, + (unsigned long long) next_xid, new_cluster.pgdata); check_ok(); @@ -706,8 +746,48 @@ copy_xact_xlog_xid(void) if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER && new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) { - copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); - copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff; + + if (old_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + { + copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); + copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + } + else + { + MultiXactOffset oldest_mxoff; + + remove_new_subdir("pg_multixact/offsets", false); + oldest_mxoff = convert_multixact_offsets("pg_multixact/offsets", "pg_multixact/offsets"); + + remove_new_subdir("pg_multixact/members", false); + convert_multixact_members("pg_multixact/members", "pg_multixact/members", oldest_mxoff); + + /* + * Handle wraparound if we're upgrading from 32 bit postgres. + * Invalid 0 mxids/offsets are skipped, so 1 becomes 2^32. + */ + if (oldest_mxoff) + { + if (next_mxid < oldest_mxid) + next_mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + + if (next_mxoff < oldest_mxoff) + next_mxoff += ((MultiXactOffset) 1 << 32) - 1; + + /* Offsets and members were rewritten, oldest_mxoff = 1 */ + next_mxoff -= oldest_mxoff - 1; + oldest_mxoff = 1; + + /* + * Save converted next_mxid for possible usage in + * set_frozenxids() + */ + old_cluster.controldata.chkpnt_nxtmulti = next_mxid; + } + } prep_status("Setting next multixact ID and offset for new cluster"); @@ -718,9 +798,9 @@ copy_xact_xlog_xid(void) exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -O %llu -m %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti, - (unsigned long long) old_cluster.controldata.chkpnt_oldstMulti, + (unsigned long long) next_mxoff, + (unsigned long long) next_mxid, + (unsigned long long) oldest_mxid, new_cluster.pgdata); check_ok(); } @@ -794,6 +874,8 @@ set_frozenxids(bool minmxid_only) int ntups; int i_datname; int i_datallowconn; + TransactionId frozen_xid; + MultiXactId minmxid; if (!minmxid_only) prep_status("Setting frozenxid and minmxid counters in new cluster"); @@ -802,18 +884,26 @@ set_frozenxids(bool minmxid_only) conn_template1 = connectToServer(&new_cluster, "template1"); + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + frozen_xid = FirstNormalTransactionId; + else + frozen_xid = old_cluster.controldata.chkpnt_nxtxid; + + minmxid = old_cluster.controldata.chkpnt_nxtmulti; + if (!minmxid_only) /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datfrozenxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datminmxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); /* get database names */ dbres = executeQueryOrDie(conn_template1, @@ -853,7 +943,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, @@ -864,7 +954,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); PQfinish(conn); /* Reset datallowconn flag */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index a710f325de..ad09f67710 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -115,6 +115,11 @@ extern char *output_files[]; */ #define MULTIXACT_FORMATCHANGE_CAT_VER 201301231 +/* + * xid format changed from 32-bit to 64-bit. + */ +#define XID_FORMATCHANGE_CAT_VER 999999999 + /* * large object chunk size added to pg_controldata, * commit 5f93c37805e7485488480916b4585e098d3cc883 @@ -225,13 +230,13 @@ typedef struct uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; - uint32 chkpnt_nxtxid; - uint32 chkpnt_nxtepoch; + uint64 chkpnt_nxtxid; + uint32 chkpnt_nxtepoch; /* for 32bit xids only */ uint32 chkpnt_nxtoid; - uint32 chkpnt_nxtmulti; - uint32 chkpnt_nxtmxoff; - uint32 chkpnt_oldstMulti; - uint32 chkpnt_oldstxid; + uint64 chkpnt_nxtmulti; + uint64 chkpnt_nxtmxoff; + uint64 chkpnt_oldstMulti; + uint64 chkpnt_oldstxid; uint32 align; uint32 blocksz; uint32 largesz; @@ -361,7 +366,7 @@ extern OSInfo os_info; /* check.c */ void output_check_banner(bool live_check); -void check_and_dump_old_cluster(bool live_check); +void check_and_dump_old_cluster(bool live_check, bool *is_wraparound); void check_new_cluster(void); void report_clusters_compatible(void); void issue_warnings_and_set_wal_level(void); @@ -402,7 +407,10 @@ void copyFile(const char *src, const char *dst, void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName); void rewriteVisibilityMap(const char *fromfile, const char *tofile, - const char *schemaName, const char *relName); + const char *schemaName, const char *relName, + bool update_version); +void updateSegmentPagesVersion(const char *fromfile, const char *tofile, + const char *schemaName, const char *relName); void check_file_clone(void); void check_hard_link(void); @@ -485,6 +493,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void report_extension_updates(ClusterInfo *cluster); +void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode); + /* parallel.c */ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); @@ -492,3 +504,9 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr char *old_pgdata, char *new_pgdata, char *old_tablespace); bool reap_child(bool wait_for_child); + +/* segresize.c */ +void convert_xact(const char *olddir, const char *newdir); +MultiXactOffset convert_multixact_offsets(const char *olddir, const char *newdir); +void convert_multixact_members(const char *olddir, const char *newdir, + MultiXactOffset oldest_mxoff); diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 34bc9c1504..a0e10047ed 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -16,7 +16,8 @@ #include "pg_upgrade.h" static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); -static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit); +static void transfer_relfile(FileNameMap *map, const char *type_suffix, + bool vm_must_add_frozenbit, bool update_version); /* @@ -136,6 +137,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) { int mapnum; bool vm_must_add_frozenbit = false; + bool update_version = false; /* * Do we need to rewrite visibilitymap? @@ -144,19 +146,28 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER) vm_must_add_frozenbit = true; + /* + * Need to update FSM and VM pages version to avoid lazy conversion. + */ + if (old_cluster.controldata.cat_ver < new_cluster.controldata.cat_ver) + update_version = true; + for (mapnum = 0; mapnum < size; mapnum++) { if (old_tablespace == NULL || strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { /* transfer primary file */ - transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit); + transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit, + update_version); /* * Copy/link any fsm and vm files, if they exist */ - transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); - transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit); + transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit, + update_version); + transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit, + update_version); } } } @@ -170,7 +181,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) * mode. */ static void -transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit) +transfer_relfile(FileNameMap *map, const char *type_suffix, + bool vm_must_add_frozenbit, bool update_version) { char old_file[MAXPGPATH]; char new_file[MAXPGPATH]; @@ -235,7 +247,17 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro /* Need to rewrite visibility map format */ pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"", old_file, new_file); - rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname); + rewriteVisibilityMap(old_file, new_file, map->nspname, + map->relname, update_version); + } + else if ((update_version && strcmp(type_suffix, "_vm") == 0) || + (update_version && strcmp(type_suffix, "_fsm") == 0)) + { + /* Need to update pages version */ + pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"", + old_file, new_file); + updateSegmentPagesVersion(old_file, new_file, map->nspname, + map->relname); } else switch (user_opts.transfer_mode) diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c new file mode 100644 index 0000000000..1b14637c81 --- /dev/null +++ b/src/bin/pg_upgrade/segresize.c @@ -0,0 +1,586 @@ +/*------------------------------------------------------------------------- + * + * segresize.c + * SLRU segment resize utility from 32bit to 64bit xid format + * + * Copyright (c) 2015-2022, Postgres Professional + * + * IDENTIFICATION + * src/bin/pg_upgrade/segresize.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "pg_upgrade.h" +#include "access/multixact.h" +#include "access/transam.h" + +#define SLRU_PAGES_PER_SEGMENT_OLD 32 +#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */ + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) + +typedef uint32 MultiXactId32; +typedef uint32 MultiXactOffset32; +typedef uint32 TransactionId32; + +#define MaxTransactionId32 ((TransactionId32) 0xFFFFFFFF) +#define MaxMultiXactId32 ((MultiXactId32) 0xFFFFFFFF) +#define MaxMultiXactOffset32 ((MultiXactOffset32) 0xFFFFFFFF) + +#define MULTIXACT_OFFSETS_PER_PAGE_OLD (BLCKSZ / sizeof(MultiXactOffset32)) +#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset)) + +#define MXACT_MEMBER_FLAGS_PER_BYTE 1 + +/* 64xid */ +#define MULTIXACT_FLAGBYTES_PER_GROUP 8 +#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ + (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) +/* size in bytes of a complete group */ +#define MULTIXACT_MEMBERGROUP_SIZE \ + (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP) +#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE) + +/* 32xid */ +#define MULTIXACT_FLAGBYTES_PER_GROUP_OLD 4 +#define MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD \ + (MULTIXACT_FLAGBYTES_PER_GROUP_OLD * MXACT_MEMBER_FLAGS_PER_BYTE) +/* size in bytes of a complete group */ +#define MULTIXACT_MEMBERGROUP_SIZE_OLD \ + (sizeof(TransactionId32) * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD + MULTIXACT_FLAGBYTES_PER_GROUP_OLD) +#define MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE_OLD) +#define MULTIXACT_MEMBERS_PER_PAGE_OLD \ + (MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD) + +typedef struct SLRUSegmentState +{ + const char *dir; + FILE *file; + int64 segno; + int64 pageno; + bool is_empty_segment; +} SLRUSegmentState; + +static char * +slru_filename_old(const char *path, int64 segno) +{ + Assert(segno <= PG_INT32_MAX); + return psprintf("%s/%04X", path, (int) segno); +} + +static char * +slru_filename_new(const char *path, int64 segno) +{ + return psprintf("%s/%012llX", path, (long long) segno); +} + +static inline FILE * +open_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno), + char *mode, char *fatal_msg) +{ + char *filename = filename_fn(state->dir, state->segno); + FILE *fd = fopen(filename, mode); + + if (!fd) + pg_fatal(fatal_msg, filename); + + pfree(filename); + + return fd; +} + +static void +close_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno)) +{ + if (state->file != NULL) + { + if (fclose(state->file) != 0) + pg_fatal("could not close file \"%s\": %m", + filename_fn(state->dir, state->segno)); + state->file = NULL; + } +} + +static inline int +read_file(SLRUSegmentState *state, void *buf) +{ + size_t n = fread(buf, sizeof(char), BLCKSZ, state->file); + + if (n != 0) + return n; + + if (ferror(state->file)) + pg_fatal("could not read file \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + if (!feof(state->file)) + pg_fatal("unknown file read state \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + close_file(state, slru_filename_old); + + return 0; +} + +static int +read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty) +{ + int n; + + /* Open next segment file, if needed */ + if (!state->file) + { + state->file = open_file(state, slru_filename_old, "rb", + "could not open source file \"%s\": %m"); + + /* Set position to the needed page */ + if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET)) + close_file(state, slru_filename_old); + + /* + * Skip segment conversion if segment file doesn't exist. + * First segment file should exist in any case. + */ + if (state->segno != 0) + state->is_empty_segment = true; + } + + if (state->file) + { + /* Segment file does exist, read page from it */ + state->is_empty_segment = false; + + /* Try to read BLCKSZ bytes */ + n = read_file(state, buf); + *is_empty = (n == 0); + + /* Zeroing buf tail if needed */ + if (n) + memset((char *) buf + n, 0, BLCKSZ - n); + } + else + { + n = state->is_empty_segment ? + BLCKSZ : /* Skip empty block at the end of segment */ + 0; /* We reached the last segment */ + *is_empty = true; + + if (n) + memset((char *) buf, 0, BLCKSZ); + } + + state->pageno++; + + if (state->pageno >= SLRU_PAGES_PER_SEGMENT_OLD) + { + /* Start new segment */ + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_old); + } + + return n; +} + +static void +write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty) +{ + /* + * Create a new segment file if we still didn't. Creation is postponed + * until the first non-empty page is found. This helps not to create + * completely empty segments. + */ + if (!state->file && !is_empty) + { + state->file = open_file(state, slru_filename_new, "wb", + "could not open target file \"%s\": %m"); + + /* Write zeroes to the previously skipped prefix */ + if (state->pageno > 0) + { + char zerobuf[BLCKSZ] = {0}; + + for (int64 i = 0; i < state->pageno; i++) + { + if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + } + + } + + /* Write page to the new segment (if it was created) */ + if (state->file) + { + if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + + state->pageno++; + + /* + * Did we reach the maximum page number? Then close segment file and + * create a new one on the next iteration + */ + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_new); + } +} + +/* + * Convert pg_xact segments. + */ +void +convert_xact(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid; + TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid; + TransactionId xid; + int64 pageno; + char buf[BLCKSZ] = {0}; + + oldseg.dir = old_subdir; + newseg.dir = new_subdir; + + pageno = oldest_xid / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + if (next_xid < oldest_xid) + next_xid += (TransactionId) 1 << 32; /* wraparound */ + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionId32 / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD) + { + pageno = (MaxTransactionId32 + 1) / CLOG_XACTS_PER_PAGE; + + Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(!oldseg.pageno); + Assert(!oldseg.file); + oldseg.segno = 0; + + Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT); + Assert(!newseg.pageno); + Assert(!newseg.file); + newseg.segno = 0; + } + + read_old_segment_page(&oldseg, buf, &is_empty); + write_new_segment_page(&newseg, buf, is_empty); + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); +} + +static inline SLRUSegmentState +create_slru_segment_state(MultiXactId mxid, + int offsets_per_page, + int pages_per_segment, + char *dir) +{ + SLRUSegmentState seg = {0}; + int64 n; + + n = mxid / offsets_per_page; + seg.pageno = n % pages_per_segment; + seg.segno = n / pages_per_segment; + seg.dir = dir; + + return seg; +} + +/* + * Convert pg_multixact/offsets segments and return oldest mxid offset. + */ +MultiXactOffset +convert_multixact_offsets(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg, + newseg; + MultiXactOffset32 oldbuf[MULTIXACT_OFFSETS_PER_PAGE_OLD] = {0}; + MultiXactOffset newbuf[MULTIXACT_OFFSETS_PER_PAGE] = {0}; + MultiXactOffset32 oldest_mxoff = 0; + MultiXactId oldest_mxid, + next_mxid, + mxid; + uint64 old_entry, + new_entry; + bool oldest_mxoff_known = false; + + StaticAssertStmt((sizeof(oldbuf) == BLCKSZ && sizeof(newbuf) == BLCKSZ), + "buf should be BLCKSZ"); + + oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + + oldseg = create_slru_segment_state(oldest_mxid, + MULTIXACT_OFFSETS_PER_PAGE_OLD, + SLRU_PAGES_PER_SEGMENT_OLD, + psprintf("%s/%s", old_cluster.pgdata, + old_subdir)); + + newseg = create_slru_segment_state(oldest_mxid, + MULTIXACT_OFFSETS_PER_PAGE, + SLRU_PAGES_PER_SEGMENT, + psprintf("%s/%s", new_cluster.pgdata, + new_subdir)); + + old_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE_OLD; + new_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE; + + next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + if (next_mxid < oldest_mxid) + next_mxid += (MultiXactId) 1 << 32; /* wraparound */ + + prep_status("Converting old %s to new format", old_subdir); + + /* Copy mxid offsets reading only needed segment pages */ + for (mxid = oldest_mxid; mxid < next_mxid; old_entry = 0) + { + int oldlen; + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxMultiXactId32 / MULTIXACT_OFFSETS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD) /* 0xFFFF */ + oldseg.segno = 0; + + oldlen = read_old_segment_page(&oldseg, oldbuf, &is_empty); + + if (oldlen == 0 || is_empty) + pg_fatal("cannot read page %lld from segment: %s\n", + (long long) oldseg.pageno, + slru_filename_old(oldseg.dir, oldseg.segno)); + + /* Save oldest mxid offset */ + if (!oldest_mxoff_known) + { + oldest_mxoff = oldbuf[old_entry]; + oldest_mxoff_known = true; + } + + /* Skip wrapped-around invalid MultiXactIds */ + if (mxid == (MultiXactId) 1 << 32) + { + Assert(oldseg.segno == 0); + Assert(oldseg.pageno == 1); + Assert(old_entry == 0); + mxid += FirstMultiXactId; + old_entry = FirstMultiXactId; + } + + /* Copy entries to the new page */ + for (; mxid < next_mxid && old_entry < MULTIXACT_OFFSETS_PER_PAGE_OLD; + mxid++, old_entry++) + { + MultiXactOffset mxoff = oldbuf[old_entry]; + + /* Handle possible offset wraparound (1 becomes 2^32) */ + if (mxoff < oldest_mxoff) + mxoff += ((MultiXactOffset) 1 << 32) - 1; + + /* Subtract oldest_mxoff, so new offsets will start from 1 */ + newbuf[new_entry++] = mxoff - oldest_mxoff + 1; + + if (new_entry >= MULTIXACT_OFFSETS_PER_PAGE) + { + /* Write new page */ + write_new_segment_page(&newseg, newbuf, false); + new_entry = 0; + } + } + } + + /* Write the last incomplete page */ + if (new_entry > 0 || oldest_mxid == next_mxid) + { + memset(&newbuf[new_entry], 0, + sizeof(newbuf[0]) * (MULTIXACT_OFFSETS_PER_PAGE - new_entry)); + write_new_segment_page(&newseg, newbuf, false); + } + + /* Use next_mxoff as oldest_mxoff, if oldest_mxid == next_mxid */ + if (!oldest_mxoff_known) + { + Assert(oldest_mxid == next_mxid); + oldest_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff; + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); + + return oldest_mxoff; +} + +/* + * Convert pg_multixact/members segments, offsets will start from 1. + */ +void +convert_multixact_members(const char *old_subdir, const char *new_subdir, + MultiXactOffset oldest_mxoff) +{ + MultiXactOffset next_mxoff, + mxoff; + SLRUSegmentState oldseg, + newseg; + char oldbuf[BLCKSZ] = {0}, + newbuf[BLCKSZ] = {0}; + int newgroup, + newmember; + char *newflag = newbuf; + TransactionId *newxid; + int oldidx, + newidx; + + prep_status("Converting old %s to new format", old_subdir); + + next_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff; + if (next_mxoff < oldest_mxoff) + next_mxoff += (MultiXactOffset) 1 << 32; + + newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP); + + /* Initialize old starting position */ + oldidx = oldest_mxoff % MULTIXACT_MEMBERS_PER_PAGE_OLD; + oldseg = create_slru_segment_state(oldest_mxoff, + MULTIXACT_MEMBERS_PER_PAGE_OLD, + SLRU_PAGES_PER_SEGMENT_OLD, + psprintf("%s/%s", old_cluster.pgdata, + old_subdir)); + + /* Initialize empty new segment */ + newseg = create_slru_segment_state(0, 1, 1, + psprintf("%s/%s", new_cluster.pgdata, + new_subdir)); + + /* Initialize new starting position (skip invalid zero offset) */ + newgroup = 0; + newidx = 1; + newmember = 1; + newflag++; + newxid++; + + /* Iterate through the original directory */ + for (mxoff = oldest_mxoff; mxoff < next_mxoff; oldidx = 0) + { + bool old_is_empty; + int oldlen; + int ngroups; + int oldgroup; + int oldmember; + + oldlen = read_old_segment_page(&oldseg, oldbuf, &old_is_empty); + + if (oldlen == 0 || old_is_empty) + pg_fatal("cannot read page %lld from segment: %s\n", + (long long) oldseg.pageno, + slru_filename_old(oldseg.dir, oldseg.segno)); + + ngroups = oldlen / MULTIXACT_MEMBERGROUP_SIZE_OLD; + + /* Iterate through old member groups */ + for (oldgroup = oldidx / MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD, + oldmember = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD; + oldgroup < ngroups && mxoff < next_mxoff; + oldgroup++, oldmember = 0) + { + char *oldflag = (char *) oldbuf + oldgroup * MULTIXACT_MEMBERGROUP_SIZE_OLD; + TransactionId32 *oldxid = (TransactionId32 *) (oldflag + MULTIXACT_FLAGBYTES_PER_GROUP_OLD); + + oldxid += oldmember; + oldflag += oldmember; + + /* Iterate through old members */ + for (int i = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD; + i < MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD && mxoff < next_mxoff; + i++) + { + /* Copy member's xid and flags to the new page */ + *newflag++ = *oldflag++; + *newxid++ = (TransactionId) * oldxid++; + + newidx++; + oldidx++; + mxoff++; + + if (++newmember >= MULTIXACT_MEMBERS_PER_MEMBERGROUP) + { + /* Start next member group */ + newmember = 0; + + if (++newgroup >= MULTIXACT_MEMBERGROUPS_PER_PAGE) + { + /* Write current page and start new */ + newgroup = 0; + newidx = 0; + write_new_segment_page(&newseg, newbuf, false); + memset(newbuf, 0, BLCKSZ); + } + + newflag = (char *) newbuf + newgroup * MULTIXACT_MEMBERGROUP_SIZE; + newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP); + } + + /* Handle offset wraparound */ + if (mxoff > MaxMultiXactOffset32) + { + Assert(mxoff == (MultiXactOffset) 1 << 32); + Assert(oldseg.segno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldseg.pageno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD % SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldmember == MaxMultiXactOffset32 % MULTIXACT_MEMBERS_PER_PAGE_OLD); + + /* Switch to segment 0000 */ + close_file(&oldseg, slru_filename_old); + oldseg.segno = 0; + oldseg.pageno = 0; + + oldidx = 1; /* skip invalid zero mxid offset */ + } + } + } + } + + /* Write last page, unless it is empty */ + if (newflag > (char *) newbuf || oldest_mxoff == next_mxoff) + write_new_segment_page(&newseg, newbuf, false); + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); +} diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 997963082b..7a64c6d302 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -260,6 +260,14 @@ if (defined($ENV{oldinstall})) } } +$oldnode->safe_psql('regression', + "CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4)); + INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000);"); +my $relfrozenxid = $oldnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); +my $relminmxid = $oldnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + # Take a dump before performing the upgrade as a base comparison. Note # that we need to use pg_dumpall from the new node here. my @dump_command = ( @@ -399,6 +407,16 @@ ok( !-d $newnode->data_dir . "/pg_upgrade_output.d", $newnode->start; +my $relfrozenxid_new = $newnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); + +is($relfrozenxid_new, $relfrozenxid, 'old and new relfrozenxid match after pg_upgrade'); + +my $relminmxid_new = $newnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + +is($relminmxid_new, $relminmxid, 'old and new relminmxid match after pg_upgrade'); + # Check if there are any logs coming from pg_upgrade, that would only be # retained on failure. my $log_path = $newnode->data_dir . "/pg_upgrade_output.d"; diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 403a6d7cfa..2bf9d5dac4 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,6 +9,7 @@ #include "postgres_fe.h" +#include "access/transam.h" #include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" @@ -242,19 +243,21 @@ old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) } /* - * old_9_6_invalidate_hash_indexes() - * 9.6 -> 10 - * Hash index binary format has changed from 9.6->10.0 + * invalidate_indexes() + * Invalidates all indexes satisfying given predicate. */ -void -old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +static void +invalidate_indexes(ClusterInfo *cluster, bool check_mode, + const char *name, const char *pred) { int dbnum; FILE *script = NULL; bool found = false; - char *output_path = "reindex_hash.sql"; + char output_path[MAXPGPATH]; + + snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name); - prep_status("Checking for hash indexes"); + prep_status("Checking for %s indexes", name); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) { @@ -267,9 +270,16 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; PGconn *conn = connectToServer(cluster, active_db->db_name); - /* find hash indexes */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname " + + /* + * Find indexes satisfying predicate. + * + * System indexes (with oids < FirstNormalObjectId) are excluded from + * the search as they are recreated in the new cluster during initdb. + */ + res = executeQueryOrDie( + conn, + "SELECT n.nspname, c.relname, i.indexrelid " "FROM pg_catalog.pg_class c, " " pg_catalog.pg_index i, " " pg_catalog.pg_am a, " @@ -277,8 +287,11 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'" - ); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s " + "ORDER BY i.indexrelid ASC", + FirstNormalObjectId, + pred); ntups = PQntuples(res); i_nspname = PQfnumber(res, "nspname"); @@ -311,8 +324,14 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) if (!check_mode && db_used) { - /* mark hash indexes as invalid */ - PQclear(executeQueryOrDie(conn, + /* + * Mark indexes satisfying predicate as invalid. + * + * System indexes (with oids < FirstNormalObjectId) are excluded + * from the search (see above). + */ + PQclear(executeQueryOrDie( + conn, "UPDATE pg_catalog.pg_index i " "SET indisvalid = false " "FROM pg_catalog.pg_class c, " @@ -321,7 +340,10 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'")); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s", + FirstNormalObjectId, + pred)); } PQfinish(conn); @@ -335,24 +357,37 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) report_status(PG_WARNING, "warning"); if (check_mode) pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. After upgrading, you will be given\n" - "REINDEX instructions."); + "REINDEX instructions.", + name); else pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. The file\n" " %s\n" "when executed by psql by the database superuser will recreate all invalid\n" "indexes; until then, none of these indexes will be used.", + name, output_path); } else check_ok(); } +/* + * old_9_6_invalidate_hash_indexes() + * 9.6 -> 10 + * Hash index binary format has changed from 9.6->10.0 + */ +void +old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'"); +} + /* * old_11_check_for_sql_identifier_data_type_usage() * 11 -> 12 @@ -459,3 +494,36 @@ report_extension_updates(ClusterInfo *cluster) else check_ok(); } + +/* + * invalidate_spgist_indexes() + * 32bit -> 64bit + * SP-GIST contains xids. + */ +void +invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'"); +} + +/* + * invalidate_gin_indexes() + * 32bit -> 64bit + * Gin indexes contains xids in deleted pages. + */ +void +invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'"); +} + +/* + * invalidate_external_indexes() + * Generate script to REINDEX non standard external indexes (like RUM etc) + */ +void +invalidate_external_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "external", + "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')"); +} diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 319c44c2b2..dea4dac02b 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -1050,7 +1050,7 @@ main(int argc, char **argv) config.filter_by_fpw = true; break; case 'x': - if (sscanf(optarg, "%u", &config.filter_by_xid) != 1) + if (sscanf(optarg, "%" INT64_MODIFIER "u", &config.filter_by_xid) != 1) { pg_log_error("invalid transaction ID specification: \"%s\"", optarg); diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index 029a0d0521..8e6c7b5b09 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -47,7 +47,8 @@ BRIN CommitTs ReplicationOrigin Generic -LogicalMessage$/, +LogicalMessage +Heap3$/, 'rmgr list'); diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h index c59790ec5a..4b655baf73 100644 --- a/src/include/access/ginblock.h +++ b/src/include/access/ginblock.h @@ -133,8 +133,15 @@ typedef struct GinMetaPageData * We should reclaim deleted page only once every transaction started before * its deletion is over. */ -#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageGetDeleteXid(page) ( \ + (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \ + InvalidTransactionId ) +#define GinPageSetDeleteXid(page, xid) \ + do { \ + ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \ + } while (false) extern bool GinPageIsRecyclable(Page page); /* diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 0235716c06..2996cdd486 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -223,7 +223,7 @@ GistPageGetDeleteXid(Page page) return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; } else - return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + return FullTransactionIdFromXid(FirstNormalTransactionId); } /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index a2d7a0ea72..d7e3ebbc81 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -76,6 +76,8 @@ typedef struct HeapScanDescData int rs_cindex; /* current tuple's index in vistuples */ int rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ + TransactionId rs_xmin[MaxHeapTuplesPerPage]; /* their xmins */ + TransactionId rs_xmax[MaxHeapTuplesPerPage]; /* their xmaxs */ } HeapScanDescData; typedef struct HeapScanDescData *HeapScanDesc; @@ -272,6 +274,8 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); +extern void rewrite_page_prepare_for_xid(Page page, HeapTuple tup, + bool is_toast); extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); @@ -291,21 +295,21 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, struct TM_FailureData *tmfd); extern void heap_inplace_update(Relation relation, HeapTuple tuple); -extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_prepare_freeze_tuple(HeapTuple tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen); extern void heap_freeze_execute_prepared(Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples); -extern bool heap_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff); -extern bool heap_tuple_should_freeze(HeapTupleHeader tuple, +extern bool heap_tuple_should_freeze(HeapTuple htup, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid); -extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); +extern bool heap_tuple_needs_eventual_freeze(HeapTuple htup); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, ItemPointer tid); @@ -321,12 +325,16 @@ extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern void heap_page_prune(Relation relation, Buffer buffer, struct GlobalVisState *vistest, PruneResult *presult, - OffsetNumber *off_loc); + OffsetNumber *off_loc, + bool repairFragmentation); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused); -extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); + OffsetNumber *nowunused, int nunused, + bool repairFragmentation, + bool is_toast); +extern void heap_get_root_tuples(Relation relation, Buffer buffer, Page page, + OffsetNumber *root_offsets); /* in heap/vacuumlazy.c */ struct VacuumParams; @@ -344,7 +352,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer TransactionId *dead_after); extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); -extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +extern bool HeapTupleIsOnlyLocked(HeapTuple htup); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index a038450787..ba911404e7 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -59,6 +59,8 @@ #define XLOG_HEAP2_LOCK_UPDATED 0x60 #define XLOG_HEAP2_NEW_CID 0x70 +#define XLOG_HEAP3_BASE_SHIFT 0x00 + /* * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available. */ @@ -98,6 +100,7 @@ #define XLH_DELETE_CONTAINS_OLD_KEY (1<<2) #define XLH_DELETE_IS_SUPER (1<<3) #define XLH_DELETE_IS_PARTITION_MOVE (1<<4) +#define XLH_DELETE_PAGE_ON_TOAST_RELATION (1<<5) /* convenience macro for checking whether any form of old tuple was logged */ #define XLH_DELETE_CONTAINS_OLD \ @@ -240,6 +243,9 @@ typedef struct xl_heap_update * * Acquires a full cleanup lock. */ +#define XLH_PRUNE_ON_TOAST_RELATION 0x01 +#define XLH_PRUNE_REPAIR_FRAGMENTATION 0x02 + typedef struct xl_heap_prune { TransactionId snapshotConflictHorizon; @@ -247,10 +253,11 @@ typedef struct xl_heap_prune uint16 ndead; bool isCatalogRel; /* to handle recovery conflict during logical * decoding on standby */ + uint8 flags; /* OFFSET NUMBERS are in the block reference 0 */ } xl_heap_prune; -#define SizeOfHeapPrune (offsetof(xl_heap_prune, isCatalogRel) + sizeof(bool)) +#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8)) /* * The vacuum page record is similar to the prune record, but can only mark @@ -342,19 +349,22 @@ typedef struct xl_heap_freeze_plan * Each such page offset number array corresponds to a single freeze plan * (REDO routine freezes corresponding heap tuples using freeze plan). */ +#define XLH_FREEZE_PAGE_ON_TOAST_RELATION 0x01 + typedef struct xl_heap_freeze_page { TransactionId snapshotConflictHorizon; uint16 nplans; bool isCatalogRel; /* to handle recovery conflict during logical * decoding on standby */ + uint8 flags; /* * In payload of blk 0 : FREEZE PLANS and OFFSET NUMBER ARRAY */ } xl_heap_freeze_page; -#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, isCatalogRel) + sizeof(bool)) +#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, flags) + sizeof(uint8)) /* * This is what we need to know about setting a visibility map bit @@ -401,7 +411,19 @@ typedef struct xl_heap_rewrite_mapping XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; -extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, +#define XLH_BASE_SHIFT_ON_TOAST_RELATION 0x01 + +/* shift the base of xids on heap page */ +typedef struct xl_heap_base_shift +{ + int64 delta; /* delta value to shift the base */ + bool multi; /* true to shift multixact base */ + uint8 flags; +} xl_heap_base_shift; + +#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, flags) + sizeof(uint8)) + +extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple, TransactionId *snapshotConflictHorizon); extern void heap_redo(XLogReaderState *record); @@ -411,6 +433,9 @@ extern void heap_mask(char *pagedata, BlockNumber blkno); extern void heap2_redo(XLogReaderState *record); extern void heap2_desc(StringInfo buf, XLogReaderState *record); extern const char *heap2_identify(uint8 info); +extern void heap3_redo(XLogReaderState *record); +extern void heap3_desc(StringInfo buf, XLogReaderState *record); +extern const char *heap3_identify(uint8 info); extern void heap_xlog_logical_rewrite(XLogReaderState *r); extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h index 5c0a796f66..f9c781abbe 100644 --- a/src/include/access/heaptoast.h +++ b/src/include/access/heaptoast.h @@ -20,10 +20,19 @@ /* * Find the maximum size of a tuple if there are to be N tuples per page. */ +#if MAXIMUM_ALIGNOF == 8 #define MaximumBytesPerTuple(tuplesPerPage) \ MAXALIGN_DOWN((BLCKSZ - \ - MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \ / (tuplesPerPage)) +#elif MAXIMUM_ALIGNOF == 4 +#define MaximumBytesPerTuple(tuplesPerPage) \ + MAXALIGN_DOWN((BLCKSZ - \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(ToastPageSpecialData))) \ + / (tuplesPerPage)) +#else +#error "unknown arch bitness" +#endif /* * These symbols control toaster activation. If a tuple is larger than diff --git a/src/include/access/htup.h b/src/include/access/htup.h index a8f7ff5dfe..063d9ac4c4 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -54,6 +54,12 @@ typedef MinimalTupleData *MinimalTuple; * this can't be told apart from case #1 by inspection; code setting up * or destroying this representation has to know what it's doing. * + * t_xmin and t_xmax are TransactionId values stored in heap tuple header. + * Normally they are calculated from ShortTransactionId-sized on-disk tuple + * xmin/xmax representation: + * t_data->t_choice.t_heap.t_xmin/t_data->t_choice.t_heap.t_xmin + * and pd_xid_base and pd_multi_base commmon values for all tuples on a page. + * * t_len should always be valid, except in the pointer-to-nothing case. * t_self and t_tableOid should be valid if the HeapTupleData points to * a disk buffer, or if it represents a copy of a tuple on disk. They @@ -61,10 +67,12 @@ typedef MinimalTupleData *MinimalTuple; */ typedef struct HeapTupleData { + TransactionId t_xmin; /* calculated tuple xmin */ + TransactionId t_xmax; /* calculated tuple xmax */ uint32 t_len; /* length of *t_data */ ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ -#define FIELDNO_HEAPTUPLEDATA_DATA 3 +#define FIELDNO_HEAPTUPLEDATA_DATA 5 HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; @@ -78,12 +86,11 @@ typedef HeapTupleData *HeapTuple; #define HeapTupleIsValid(tuple) PointerIsValid(tuple) /* HeapTupleHeader functions implemented in utils/time/combocid.c */ -extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); -extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); -extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, - CommandId *cmax, bool *iscombo); +extern CommandId HeapTupleGetCmin(HeapTuple tup); +extern CommandId HeapTupleGetCmax(HeapTuple tup); +extern void HeapTupleAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ -extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple); +extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple); #endif /* HTUP_H */ diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 6fd87dc108..a99bb70906 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -19,6 +19,7 @@ #include "access/tupdesc.h" #include "access/tupmacs.h" #include "storage/bufpage.h" +#include "storage/bufmgr.h" #include "varatt.h" /* @@ -121,13 +122,13 @@ typedef struct HeapTupleFields { - TransactionId t_xmin; /* inserting xact ID */ - TransactionId t_xmax; /* deleting or locking xact ID */ + ShortTransactionId t_xmin; /* inserting xact ID */ + ShortTransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ - TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ + ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; @@ -223,7 +224,7 @@ struct HeapTupleHeaderData * HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is * not a multi and the EXCL_LOCK bit is set. * - * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible + * See also HeapTupleIsOnlyLocked, which also checks for a possible * aborted updater transaction. * * Beware of multiple evaluations of the argument. @@ -299,29 +300,81 @@ struct HeapTupleHeaderData */ /* - * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid + * HeapTupleGetRawXmin returns the "raw" xmin field, which is the xid * originally used to insert the tuple. However, the tuple might actually - * be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin + * be frozen (via HeapTupleHeaderStoreXminFrozen) in which case the tuple's xmin * is visible to every snapshot. Prior to PostgreSQL 9.4, we actually changed * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ -#define HeapTupleHeaderGetRawXmin(tup) \ +#define HeapTupleGetRawXmin(tup) \ ( \ - (tup)->t_choice.t_heap.t_xmin \ + ((tup)->t_xmin) \ ) -#define HeapTupleHeaderGetXmin(tup) \ +#define HeapTupleGetXmin(tup) \ ( \ - HeapTupleHeaderXminFrozen(tup) ? \ - FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \ + HeapTupleHeaderXminFrozen((tup)->t_data) ? \ + FrozenTransactionId : HeapTupleGetRawXmin(tup) \ ) -#define HeapTupleHeaderSetXmin(tup, xid) \ +#define HeapTupleSetXmin(tup, xid) \ ( \ - (tup)->t_choice.t_heap.t_xmin = (xid) \ + ((tup)->t_xmin = (xid)) \ ) +/* + * Functions for accessing "double xmax". On pg_upgraded instances, it might + * happend that we can't fit new special area to the page. But we still + * might neep to write xmax of tuples for updates and deletes. The trick is + * that we actually don't need xmin field. After pg_upgrade (wich implies + * restart) no insertions went to this page yet (otherwise special area could + * fit). So, if tuple is visible (othewise it would be deleted), then it's + * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use + * both t_xmin and t_xmax to store 64-bit xmax. + * + * See heap_convert.c for details. + */ +static inline TransactionId +HeapTupleHeaderGetDoubleXmax(HeapTupleHeader htup) +{ + TransactionId xmax; + + xmax = htup->t_choice.t_heap.t_xmin; + xmax <<= 32; + xmax += htup->t_choice.t_heap.t_xmax; + + return xmax; +} + +static inline void +HeapTupleHeaderSetDoubleXmax(HeapTupleHeader htup, TransactionId xid) +{ + htup->t_choice.t_heap.t_xmax = xid & 0xFFFFFFFF; + htup->t_choice.t_heap.t_xmin = (xid >> 32) & 0xFFFFFFFF; +} + +static inline void +HeapTupleHeaderStoreXmin(Page page, HeapTuple htup, bool is_toast) +{ + TransactionId base; + + Assert(!HeapPageIsDoubleXmax(page)); + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial((page))->pd_xid_base; + htup->t_data->t_choice.t_heap.t_xmin = + NormalTransactionIdToShort(base, htup->t_xmin); +} + +static inline void +HeapTupleAndHeaderSetXmin(Page page, HeapTuple tup, TransactionId xid, + bool is_toast) +{ + HeapTupleSetXmin(tup, xid); + HeapTupleHeaderStoreXmin(page, tup, is_toast); +} + #define HeapTupleHeaderXminCommitted(tup) \ ( \ ((tup)->t_infomask & HEAP_XMIN_COMMITTED) != 0 \ @@ -338,6 +391,12 @@ struct HeapTupleHeaderData ((tup)->t_infomask & (HEAP_XMIN_FROZEN)) == HEAP_XMIN_FROZEN \ ) +#define HeapTupleHeaderStoreXminFrozen(tup) \ +( \ + AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \ + ((tup)->t_infomask |= HEAP_XMIN_FROZEN) \ +) + #define HeapTupleHeaderSetXminCommitted(tup) \ ( \ AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \ @@ -363,30 +422,80 @@ struct HeapTupleHeaderData * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ -#define HeapTupleHeaderGetUpdateXid(tup) \ +#define HeapTupleGetUpdateXidAny(tup) \ ( \ - (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \ - ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \ - !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ + (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \ + !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ HeapTupleGetUpdateXid(tup) \ : \ - HeapTupleHeaderGetRawXmax(tup) \ + HeapTupleGetRawXmax(tup) \ ) -#define HeapTupleHeaderGetRawXmax(tup) \ -( \ - (tup)->t_choice.t_heap.t_xmax \ -) +static inline TransactionId +HeapTupleHeaderGetRawXmax(Page page, HeapTupleHeader htup) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return HeapTupleHeaderGetDoubleXmax(htup); -#define HeapTupleHeaderSetXmax(tup, xid) \ + base = (htup->t_infomask & HEAP_XMAX_IS_MULTI) ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + return ShortTransactionIdToNormal(base, + htup->t_choice.t_heap.t_xmax); +} + +#define HeapTupleGetRawXmax(tup) \ ( \ - (tup)->t_choice.t_heap.t_xmax = (xid) \ + ((tup)->t_xmax) \ ) +#define HeapTupleSetXmax(tup, xid) \ +do { \ + (tup)->t_xmax = (xid); \ +} while (0) + +/* + * Set xid as xmax for HeapTupleHeader. + */ +static inline void +HeapTupleHeaderStoreXmax(Page page, HeapTuple htup, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + { + HeapTupleHeaderSetDoubleXmax(htup->t_data, htup->t_xmax); + return; + } + + if (is_toast) + base = ToastPageGetSpecial(page)->pd_xid_base; + else + base = (htup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0 ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + htup->t_data->t_choice.t_heap.t_xmax = + NormalTransactionIdToShort(base, htup->t_xmax); +} + +/* + * Set xid as xmax for HeadTuple and HeapTupleHeader. + */ +static inline void +HeapTupleAndHeaderSetXmax(Page page, HeapTuple htup, TransactionId xid, + bool is_toast) +{ + HeapTupleSetXmax(htup, xid); + HeapTupleHeaderStoreXmax(page, htup, is_toast); +} + /* * HeapTupleHeaderGetRawCommandId will give you what's in the header whether - * it is useful or not. Most code should use HeapTupleHeaderGetCmin or - * HeapTupleHeaderGetCmax instead, but note that those Assert that you can + * it is useful or not. Most code should use HeapTupleGetCmin or + * HeapTupleGetCmax instead, but note that those Assert that you can * get a legitimate result, ie you are in the originating transaction! */ #define HeapTupleHeaderGetRawCommandId(tup) \ @@ -402,7 +511,7 @@ do { \ (tup)->t_infomask &= ~HEAP_COMBOCID; \ } while (0) -/* SetCmax must be used after HeapTupleHeaderAdjustCmax; see combocid.c */ +/* SetCmax must be used after HeapTupleAdjustCmax; see combocid.c */ #define HeapTupleHeaderSetCmax(tup, cid, iscombo) \ do { \ Assert(!((tup)->t_infomask & HEAP_MOVED)); \ @@ -559,8 +668,16 @@ StaticAssertDecl(MaxOffsetNumber < SpecTokenOffsetNumber, * an otherwise-empty page can indeed hold a tuple of this size. Because * ItemIds and tuples have different alignment requirements, don't assume that * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page. + * + * On shift to 64-bit XIDs MaxHeapTupleSize decreased by sizeof(HeapPageSpecialData). + * Extant tuples with length over new MaxHeapTupleSize are inherited on DoubleXmax + * pages. They could be read, but can not be updated unless their length decreases + * to fit MaxHeapTupleSize. Vacuum full will also copy these double xmax pages + * without change. */ -#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) + +#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) +#define MaxHeapTupleSize_32 (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) #define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader) /* @@ -694,6 +811,112 @@ struct MinimalTupleData #define HeapTupleClearHeapOnly(tuple) \ HeapTupleHeaderClearHeapOnly((tuple)->t_data) +/* + * Copy base values for xid and multixacts from one heap tuple to heap tuple. + * Should be called on tuple copy or making desc tuple on the base on src tuple + * saving visibility information. + */ +static inline void +HeapTupleCopyXids(HeapTuple dest, HeapTuple src) +{ + dest->t_xmin = src->t_xmin; + dest->t_xmax = src->t_xmax; +} + +/* + * Set base values for tuple xids/multixacts to zero. Used when visibility + * infromation is negligible or will be set later. + */ +static inline void +HeapTupleSetZeroXids(HeapTuple htup) +{ + htup->t_xmin = 0; + htup->t_xmax = 0; +} + +/* + * Copy HeapTupleHeader xmin/xmax in raw way ??? + */ +static inline void +HeapTupleCopyHeaderXids(HeapTuple htup) \ +{ + htup->t_xmin = htup->t_data->t_choice.t_heap.t_xmin; + htup->t_xmax = htup->t_data->t_choice.t_heap.t_xmax; +} + +static inline void +HeapTupleCopyXminFromPage(HeapTuple tup, Page page, bool is_toast) +{ + TransactionId base; + ShortTransactionId xmin; /* short xmin from tuple header */ + + if (HeapTupleHeaderXminFrozen(tup->t_data)) + { + tup->t_xmin = FrozenTransactionId; + return; + } + + xmin = tup->t_data->t_choice.t_heap.t_xmin; + + if (!TransactionIdIsNormal(xmin)) + base = 0; + else if (is_toast) + base = ToastPageGetSpecial(page)->pd_xid_base; + else + base = HeapPageGetSpecial(page)->pd_xid_base; + + tup->t_xmin = ShortTransactionIdToNormal(base, xmin); +} + +static inline void +HeapTupleCopyXmaxFromPage(HeapTuple tup, Page page, bool is_toast) +{ + TransactionId base; + ShortTransactionId xmax; /* short xmax from tuple header */ + + xmax = tup->t_data->t_choice.t_heap.t_xmax; + + if (!TransactionIdIsNormal(xmax)) + base = 0; + else if (is_toast) + /* + * Toast page is not expected to have multixacts in chunks and + * has shorter special. + */ + base = ToastPageGetSpecial(page)->pd_xid_base; + else if (tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + base = HeapPageGetSpecial(page)->pd_multi_base; + else + base = HeapPageGetSpecial(page)->pd_xid_base; + + tup->t_xmax = ShortTransactionIdToNormal(base, xmax); +} + +/* + * Copy base values for xid and multixacts from page to heap tuple. Should be + * called each time tuple is read from page. Otherwise, it would be impossible + * to correctly read tuple xmin and xmax. + */ +static inline void +HeapTupleCopyXidsFromPage(Buffer buffer, HeapTuple tup, Page page, + bool is_toast) +{ + Assert(IsBufferLocked(buffer)); + + if (HeapPageIsDoubleXmax(page)) + { + /* + * On double xmax pages, xmax is extracted from tuple header. + */ + tup->t_xmin = FrozenTransactionId; + tup->t_xmax = HeapTupleHeaderGetDoubleXmax(tup->t_data); + return; + } + + HeapTupleCopyXminFromPage(tup, page, is_toast); + HeapTupleCopyXmaxFromPage(tup, page, is_toast); +} + /* prototypes for functions in common/heaptuple.c */ extern Size heap_compute_data_size(TupleDesc tupleDesc, const Datum *values, const bool *isnull); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 0be1355892..b84eb12710 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -18,16 +18,16 @@ /* * The first two MultiXactId values are reserved to store the truncation Xid - * and epoch of the first segment, so we start assigning multixact values from + * and base of the first segment, so we start assigning multixact values from * 2. */ -#define InvalidMultiXactId ((MultiXactId) 0) -#define FirstMultiXactId ((MultiXactId) 1) -#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF) +#define InvalidMultiXactId UINT64CONST(0) +#define FirstMultiXactId UINT64CONST(1) +#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) -#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) +#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF) /* Number of SLRU buffers to use for multixact */ #define NUM_MULTIXACTOFFSET_BUFFERS 8 @@ -147,7 +147,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti, extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); -extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 7bfbf3086c..d579aabee1 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -62,8 +62,10 @@ typedef uint16 BTCycleId; typedef struct BTPageOpaqueData { BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ + /* ... or next transaction ID (lower part) */ BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ uint32 btpo_level; /* tree level --- zero for leaf pages */ + /* ... or next transaction ID (lower part) */ uint16 btpo_flags; /* flag bits, see below */ BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ } BTPageOpaqueData; @@ -92,6 +94,14 @@ typedef BTPageOpaqueData *BTPageOpaque; */ #define MAX_BT_CYCLE_ID 0xFF7F +/* Macros for access xact */ +#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \ + (uint64) ((BTPageOpaque) opaque)->btpo_level) +#define BTP_SET_XACT(opaque, xact) \ +do { \ + ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \ + ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \ +} while (0) /* * The Meta page is always the first page in the btree index. diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index 1ad1352036..58d460fc03 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -110,7 +110,7 @@ typedef struct relopt_int64 int64 default_val; int64 min; int64 max; -} relopt_int64; +} relopt_int64; typedef struct relopt_real { diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index 1125457053..c0f0d456da 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData * 6) xid of the xact performing the mapping * --- */ -#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" -extern void CheckPointLogicalRewriteHeap(void); +#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x" +extern void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 463bcb67c5..4bafc98190 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode) +PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask, NULL) diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 091e2202c9..982b716b44 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -21,15 +21,7 @@ /* * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere * else in Postgres. The segment size can be chosen somewhat arbitrarily; - * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG - * or 64K transactions for SUBTRANS. - * - * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, - * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where - * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at - * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need - * take no explicit notice of that fact in slru.c, except when comparing - * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). + * we make it 32 pages by default. */ #define SLRU_PAGES_PER_SEGMENT 32 diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 87227d8bd2..5caf4760e8 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -134,7 +134,7 @@ typedef enum TU_UpdateIndexes * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the * tuple); otherwise cmax is zero. (We make this restriction because - * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other + * HeapTupleGetCmax doesn't work for tuples outdated in other * transactions.) */ typedef struct TM_FailureData diff --git a/src/include/access/transam.h b/src/include/access/transam.h index f5af6d3055..af9cb645a5 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -17,6 +17,10 @@ #include "access/xlogdefs.h" +#ifndef FRONTEND +#include "utils/elog.h" +#endif + /* ---------------- * Special transaction ID values * @@ -28,11 +32,12 @@ * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ -#define InvalidTransactionId ((TransactionId) 0) -#define BootstrapTransactionId ((TransactionId) 1) -#define FrozenTransactionId ((TransactionId) 2) -#define FirstNormalTransactionId ((TransactionId) 3) -#define MaxTransactionId ((TransactionId) 0xFFFFFFFF) +#define InvalidTransactionId UINT64CONST(0) +#define BootstrapTransactionId UINT64CONST(1) +#define FrozenTransactionId UINT64CONST(2) +#define FirstNormalTransactionId UINT64CONST(3) +#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF) +#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF) /* ---------------- * transaction ID manipulation macros @@ -44,8 +49,40 @@ #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) -#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32)) -#define XidFromFullTransactionId(x) ((uint32) (x).value) +/* + * Convert short xid from/to full xid. Assertion should fail if we full xid + * doesn't fit to xid base. + */ +static inline TransactionId +ShortTransactionIdToNormal(TransactionId base, ShortTransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (TransactionId) xid; + +#ifndef FRONTEND + /* xid + base should not overflow TransactionId */ + Assert(xid + base >= base); +#endif + + return (TransactionId) (xid + base); +} + +static inline ShortTransactionId +NormalTransactionIdToShort(TransactionId base, TransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (ShortTransactionId) (xid); + +#ifndef FRONTEND + /* xid should fit ShortTransactionId */ + Assert(xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId); +#endif + + return (ShortTransactionId) (xid - base); +} + +#define XidFromFullTransactionId(x) ((x).value) #define U64FromFullTransactionId(x) ((x).value) #define FullTransactionIdEquals(a, b) ((a).value == (b).value) #define FullTransactionIdPrecedes(a, b) ((a).value < (b).value) @@ -53,8 +90,8 @@ #define FullTransactionIdFollows(a, b) ((a).value > (b).value) #define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value) #define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x)) -#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId) -#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId) +#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId) +#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId) #define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId) /* @@ -68,21 +105,11 @@ typedef struct FullTransactionId } FullTransactionId; static inline FullTransactionId -FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid) +FullTransactionIdFromXid(TransactionId xid) { FullTransactionId result; - result.value = ((uint64) epoch) << 32 | xid; - - return result; -} - -static inline FullTransactionId -FullTransactionIdFromU64(uint64 value) -{ - FullTransactionId result; - - result.value = value; + result.value = xid; return result; } @@ -91,8 +118,7 @@ FullTransactionIdFromU64(uint64 value) #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ - if ((dest) < FirstNormalTransactionId) \ - (dest) = FirstNormalTransactionId; \ + Assert(TransactionIdIsNormal(dest)); \ } while(0) /* @@ -140,18 +166,19 @@ FullTransactionIdAdvance(FullTransactionId *dest) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ + Assert(TransactionIdIsNormal(dest)); \ (dest)--; \ - } while ((dest) < FirstNormalTransactionId) + } while(0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) < 0) + (int64) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) > 0) + (int64) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. @@ -201,10 +228,6 @@ FullTransactionIdAdvance(FullTransactionId *dest) * OID and XID assignment state. For largely historical reasons, there is * just one struct with different fields that are protected by different * LWLocks. - * - * Note: xidWrapLimit and oldestXidDB are not "active" values, but are - * used just to generate useful messages when xidWarnLimit or xidStopLimit - * are exceeded. */ typedef struct VariableCacheData { @@ -221,9 +244,6 @@ typedef struct VariableCacheData TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ - TransactionId xidWarnLimit; /* start complaining here */ - TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ - TransactionId xidWrapLimit; /* where the world ends */ Oid oldestXidDB; /* database with minimum datfrozenxid */ /* @@ -276,10 +296,6 @@ extern bool TransactionIdDidAbort(TransactionId transactionId); extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); -extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); -extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); extern TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids); extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); @@ -319,7 +335,7 @@ ReadNextTransactionId(void) /* return transaction ID backed up by amount, handling wraparound correctly */ static inline TransactionId -TransactionIdRetreatedBy(TransactionId xid, uint32 amount) +TransactionIdRetreatedBy(TransactionId xid, uint64 amount) { xid -= amount; diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 3414446597..83c1adcd6c 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -131,10 +131,11 @@ fetch_att(const void *T, bool attbyval, int attlen) ((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \ (((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \ (((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \ + (((attalign) == TYPALIGN_XID) ? MAXALIGN(cur_offset) : \ ( \ AssertMacro((attalign) == TYPALIGN_SHORT), \ SHORTALIGN(cur_offset) \ - ))) \ + )))) \ ) /* diff --git a/src/include/access/xact.h b/src/include/access/xact.h index cb90f227ce..f002edd469 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -249,7 +249,7 @@ typedef struct xl_xact_xinfo * Commit records can be large, so copying large portions isn't * attractive. */ - uint32 xinfo; + uint64 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo @@ -302,7 +302,12 @@ typedef struct xl_xact_invals typedef struct xl_xact_twophase { - TransactionId xid; + /* + * TransactionId is split into 32-bit parts because xl_xact_twophase is + * only int-aligned. + */ + uint32 xid_lo; + uint32 xid_hi; } xl_xact_twophase; typedef struct xl_xact_origin @@ -321,7 +326,7 @@ typedef struct xl_xact_commit /* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; @@ -337,7 +342,7 @@ typedef struct xl_xact_abort /* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* No invalidation messages needed. */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_abort; diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cace867497..87fad76eba 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -38,6 +38,7 @@ #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image * is taken */ #define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */ +#define REGBUF_CONVERTED 0x40 /* buffer had format convertion */ /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 0813722715..07f3af146c 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -427,10 +427,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state, #define XLogRecHasBlockData(decoder, block_id) \ ((decoder)->record->blocks[block_id].has_data) -#ifndef FRONTEND -extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); -#endif - extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index ec9a3c802a..4e58d9be61 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -41,18 +41,17 @@ typedef struct XLogRecord { uint32 xl_tot_len; /* total len of entire record */ + pg_crc32c xl_crc; /* CRC for this record */ TransactionId xl_xid; /* xact id */ XLogRecPtr xl_prev; /* ptr to previous record in log */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ - /* 2 bytes of padding here, initialize to zero */ - pg_crc32c xl_crc; /* CRC for this record */ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ } XLogRecord; -#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) +#define SizeOfXLogRecord (offsetof(XLogRecord, xl_rmid) + sizeof(RmgrId)) /* * The high 4 bits in xl_info may be used freely by rmgr. The diff --git a/src/include/c.h b/src/include/c.h index 82f8e9d4c7..e2420738f2 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -638,19 +638,29 @@ typedef double float8; typedef Oid regproc; typedef regproc RegProcedure; -typedef uint32 TransactionId; +typedef uint64 TransactionId; -typedef uint32 LocalTransactionId; +extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); +extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); -typedef uint32 SubTransactionId; +typedef uint32 ShortTransactionId; +typedef uint64 LocalTransactionId; +typedef uint64 SubTransactionId; -#define InvalidSubTransactionId ((SubTransactionId) 0) -#define TopSubTransactionId ((SubTransactionId) 1) +#define InvalidSubTransactionId ((SubTransactionId) 0) +#define TopSubTransactionId ((SubTransactionId) 1) /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; -typedef uint32 MultiXactOffset; +typedef uint64 MultiXactOffset; + +#define MAX_START_XID UINT64CONST(0x3FFFFFFFFFFFFFFF) /* 2^62 - 1 */ +#define StartTransactionIdIsValid(xid) ((xid) <= MAX_START_XID) +#define StartMultiXactIdIsValid(mxid) ((mxid) <= MAX_START_XID) +#define StartMultiXactOffsetIsValid(mxoff) ((mxoff) <= MAX_START_XID) typedef uint32 CommandId; @@ -824,7 +834,6 @@ typedef NameData *Name; /* we don't currently need wider versions of the other ALIGN macros */ #define MAXALIGN64(LEN) TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN)) - /* ---------------------------------------------------------------- * Section 6: assertions * ---------------------------------------------------------------- diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a06a8f0b23..9f5f3637f0 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,7 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202312071 +/* XXX: should de changed to actual version on commit */ +#define CATALOG_VERSION_NO 999999999 #endif diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 4c70da41de..b64d64e194 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -401,9 +401,9 @@ amprocrighttype => 'bytea', amprocnum => '2', amproc => 'hashvarlenaextended' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint8extended' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index b2cdea66c4..f9feefb013 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -183,16 +183,16 @@ oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)', oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '353', descr => 'equal', - oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel', + oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '3315', descr => 'not equal', oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool', oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '3316', descr => 'not equal', - oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel', + oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '5068', descr => 'equal', oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 77e8b13764..f0b02b5599 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2397,10 +2397,10 @@ { oid => '1181', descr => 'age of a transaction ID, in transactions before current transaction', proname => 'age', provolatile => 's', proparallel => 'r', - prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' }, + prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' }, { oid => '3939', descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction', - proname => 'mxid_age', provolatile => 's', prorettype => 'int4', + proname => 'mxid_age', provolatile => 's', prorettype => 'int8', proargtypes => 'xid', prosrc => 'mxid_age' }, { oid => '1188', @@ -2751,11 +2751,11 @@ prosrc => 'bpcharlen' }, { oid => '1319', - proname => 'xideqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xideq' }, + proname => 'xideqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xideq' }, { oid => '3309', - proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xidneq' }, + proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xidneq' }, { oid => '1326', proname => 'interval_div', prorettype => 'interval', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index f6110a850d..25eb0079cd 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -95,9 +95,9 @@ typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv', typsend => 'tidsend', typalign => 's' }, { oid => '28', array_type_oid => '1011', descr => 'transaction id', - typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U', + typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U', typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv', - typsend => 'xidsend', typalign => 'i' }, + typsend => 'xidsend', typalign => 'x' }, { oid => '29', array_type_oid => '1012', descr => 'command identifier type, sequence in transaction id', typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U', diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 508ba7b0f7..e9bb5c3b26 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -300,6 +300,11 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, TypeNameNspIndexId, pg_typ #define TYPALIGN_SHORT 's' /* short alignment (typically 2 bytes) */ #define TYPALIGN_INT 'i' /* int alignment (typically 4 bytes) */ #define TYPALIGN_DOUBLE 'd' /* double alignment (often 8 bytes) */ +/* + * We need to use alignment sutable for 8-byte XID values. + * On system like AIX double alignment (4 bytes) is not enough. + */ +#define TYPALIGN_XID 'x' #define TYPSTORAGE_PLAIN 'p' /* type not prepared for toasting */ #define TYPSTORAGE_EXTERNAL 'e' /* toastable, don't try to compress */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 4af02940c5..121dec1079 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -215,12 +215,12 @@ typedef enum VacOptValue */ typedef struct VacuumParams { - bits32 options; /* bitmask of VACOPT_* */ - int freeze_min_age; /* min freeze age, -1 to use default */ - int freeze_table_age; /* age at which to scan whole table */ - int multixact_freeze_min_age; /* min multixact freeze age, -1 to + bits32 options; /* bitmask of VacuumOption */ + int64 freeze_min_age; /* min freeze age, -1 to use default */ + int64 freeze_table_age; /* age at which to scan whole table */ + int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ - int multixact_freeze_table_age; /* multixact age at which to scan + int64 multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at @@ -293,12 +293,12 @@ typedef struct VacDeadItems /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ -extern PGDLLIMPORT int vacuum_freeze_min_age; -extern PGDLLIMPORT int vacuum_freeze_table_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; -extern PGDLLIMPORT int vacuum_failsafe_age; -extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT int64 vacuum_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_failsafe_age; +extern PGDLLIMPORT int64 vacuum_multixact_failsafe_age; /* * Maximum value for default_statistics_target and per-column statistics diff --git a/src/include/fmgr.h b/src/include/fmgr.h index edf61e53f3..3469edd160 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n)) #define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n)) #define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n)) +#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) /* use this if you want the raw, possibly-toasted input datum: */ #define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n)) /* use this if you want the input datum de-toasted: */ @@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_RETURN_FLOAT8(x) return Float8GetDatum(x) #define PG_RETURN_INT64(x) return Int64GetDatum(x) #define PG_RETURN_UINT64(x) return UInt64GetDatum(x) +#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) /* RETURN macros for other pass-by-ref types will typically look like this: */ #define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x) #define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x) diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 529a382d28..a687303dae 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -46,6 +46,7 @@ typedef union ListCell { void *ptr_value; int int_value; + int64 int64_value; Oid oid_value; TransactionId xid_value; } ListCell; @@ -171,6 +172,7 @@ list_length(const List *l) */ #define lfirst(lc) ((lc)->ptr_value) #define lfirst_int(lc) ((lc)->int_value) +#define lfirst_int64(lc) ((lc)->int64_value) #define lfirst_oid(lc) ((lc)->oid_value) #define lfirst_xid(lc) ((lc)->xid_value) #define lfirst_node(type,lc) castNode(type, lfirst(lc)) @@ -197,6 +199,7 @@ list_length(const List *l) #define llast(l) lfirst(list_last_cell(l)) #define llast_int(l) lfirst_int(list_last_cell(l)) +#define llast_int64(l) lfirst_int64(list_last_cell(l)) #define llast_oid(l) lfirst_oid(list_last_cell(l)) #define llast_xid(l) lfirst_xid(list_last_cell(l)) #define llast_node(type,l) castNode(type, llast(l)) @@ -559,6 +562,7 @@ extern List *list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2, extern pg_nodiscard List *lappend(List *list, void *datum); extern pg_nodiscard List *lappend_int(List *list, int datum); +extern pg_nodiscard List *lappend_int64(List *list, int64 datum); extern pg_nodiscard List *lappend_oid(List *list, Oid datum); extern pg_nodiscard List *lappend_xid(List *list, TransactionId datum); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 5f16918243..4651e61aca 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -771,6 +771,9 @@ # endif #endif +/* Postgres Pro use 64bit xids */ +#undef XID_IS_64BIT + /* Size of a WAL file block. This need have no particular relation to BLCKSZ. XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O, XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h index 59aa8245ed..f9e0ebaddc 100644 --- a/src/include/port/pg_lfind.h +++ b/src/include/port/pg_lfind.h @@ -81,35 +81,21 @@ pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem) } /* - * pg_lfind32 + * pg_lfind64 * * Return true if there is an element in 'base' that equals 'key', otherwise * return false. */ static inline bool -pg_lfind32(uint32 key, uint32 *base, uint32 nelem) +pg_lfind64(uint64 key, uint64 *base, uint32 nelem) { - uint32 i = 0; - -#ifndef USE_NO_SIMD - - /* - * For better instruction-level parallelism, each loop iteration operates - * on a block of four registers. Testing for SSE2 has showed this is ~40% - * faster than using a block of two registers. - */ - const Vector32 keys = vector32_broadcast(key); /* load copies of key */ - const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); - const uint32 nelem_per_iteration = 4 * nelem_per_vector; - - /* round down to multiple of elements per iteration */ - const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1); - + uint32 i, + iterations; #if defined(USE_ASSERT_CHECKING) bool assert_result = false; /* pre-compute the result for assert checking */ - for (i = 0; i < nelem; i++) + for (i = 0; i < nelem; ++i) { if (key == base[i]) { @@ -119,62 +105,127 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem) } #endif - for (i = 0; i < tail_idx; i += nelem_per_iteration) +#define UNROLL_FACTOR 8 + StaticAssertStmt((UNROLL_FACTOR & (UNROLL_FACTOR - 1)) == 0, + "Loop unroll factor must be power of 2"); + iterations = nelem & ~(UNROLL_FACTOR - 1); + for (i = 0; i < iterations; i += UNROLL_FACTOR) { - Vector32 vals1, - vals2, - vals3, - vals4, - result1, - result2, - result3, - result4, - tmp1, - tmp2, - result; - - /* load the next block into 4 registers */ - vector32_load(&vals1, &base[i]); - vector32_load(&vals2, &base[i + nelem_per_vector]); - vector32_load(&vals3, &base[i + nelem_per_vector * 2]); - vector32_load(&vals4, &base[i + nelem_per_vector * 3]); - - /* compare each value to the key */ - result1 = vector32_eq(keys, vals1); - result2 = vector32_eq(keys, vals2); - result3 = vector32_eq(keys, vals3); - result4 = vector32_eq(keys, vals4); - - /* combine the results into a single variable */ - tmp1 = vector32_or(result1, result2); - tmp2 = vector32_or(result3, result4); - result = vector32_or(tmp1, tmp2); - - /* see if there was a match */ - if (vector32_is_highbit_set(result)) + if (base[0] == key || base[1] == key || base[2] == key || + base[3] == key || base[4] == key || base[5] == key || + base[6] == key || base[7] == key) { +#if defined(USE_ASSERT_CHECKING) Assert(assert_result == true); +#endif return true; } + base += UNROLL_FACTOR; } -#endif /* ! USE_NO_SIMD */ /* Process the remaining elements one at a time. */ - for (; i < nelem; i++) + iterations = nelem & (UNROLL_FACTOR - 1); + for (i = 0; i < iterations; ++i) { - if (key == base[i]) + if (key == *base++) { -#ifndef USE_NO_SIMD +#if defined(USE_ASSERT_CHECKING) Assert(assert_result == true); #endif return true; } } -#ifndef USE_NO_SIMD +#if defined(USE_ASSERT_CHECKING) Assert(assert_result == false); #endif return false; +// uint32 i = 0; +// +//#ifndef USE_NO_SIMD +// +// /* +// * For better instruction-level parallelism, each loop iteration operates +// * on a block of four registers. Testing for SSE2 has showed this is ~40% +// * faster than using a block of two registers. +// */ +// const Vector32 keys = vector32_broadcast(key); /* load copies of key */ +// const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); +// const uint32 nelem_per_iteration = 4 * nelem_per_vector; +// +// /* round down to multiple of elements per iteration */ +// const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1); +// +//#if defined(USE_ASSERT_CHECKING) +// bool assert_result = false; +// +// /* pre-compute the result for assert checking */ +// for (i = 0; i < nelem; i++) +// { +// if (key == base[i]) +// { +// assert_result = true; +// break; +// } +// } +//#endif +// +// for (i = 0; i < tail_idx; i += nelem_per_iteration) +// { +// Vector32 vals1, +// vals2, +// vals3, +// vals4, +// result1, +// result2, +// result3, +// result4, +// tmp1, +// tmp2, +// result; +// +// /* load the next block into 4 registers */ +// vector32_load(&vals1, &base[i]); +// vector32_load(&vals2, &base[i + nelem_per_vector]); +// vector32_load(&vals3, &base[i + nelem_per_vector * 2]); +// vector32_load(&vals4, &base[i + nelem_per_vector * 3]); +// +// /* compare each value to the key */ +// result1 = vector32_eq(keys, vals1); +// result2 = vector32_eq(keys, vals2); +// result3 = vector32_eq(keys, vals3); +// result4 = vector32_eq(keys, vals4); +// +// /* combine the results into a single variable */ +// tmp1 = vector32_or(result1, result2); +// tmp2 = vector32_or(result3, result4); +// result = vector32_or(tmp1, tmp2); +// +// /* see if there was a match */ +// if (vector32_is_highbit_set(result)) +// { +// Assert(assert_result == true); +// return true; +// } +// } +//#endif /* ! USE_NO_SIMD */ +// +// /* Process the remaining elements one at a time. */ +// for (; i < nelem; i++) +// { +// if (key == base[i]) +// { +//#ifndef USE_NO_SIMD +// Assert(assert_result == true); +//#endif +// return true; +// } +// } +// +//#ifndef USE_NO_SIMD +// Assert(assert_result == false); +//#endif +// return false; } #endif /* PG_LFIND_H */ diff --git a/src/include/postgres.h b/src/include/postgres.h index 8a028ff789..5baa6b278c 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -80,6 +80,9 @@ typedef struct NullableDatum #define SIZEOF_DATUM SIZEOF_VOID_P +static uint64 DatumGetUInt64(Datum X); +static Datum UInt64GetDatum(uint64 X); + /* * DatumGetBool * Returns boolean value of a datum. @@ -261,7 +264,7 @@ ObjectIdGetDatum(Oid X) static inline TransactionId DatumGetTransactionId(Datum X) { - return (TransactionId) X; + return DatumGetUInt64(X); } /* @@ -271,7 +274,7 @@ DatumGetTransactionId(Datum X) static inline Datum TransactionIdGetDatum(TransactionId X) { - return (Datum) X; + return UInt64GetDatum(X); } /* @@ -281,7 +284,7 @@ TransactionIdGetDatum(TransactionId X) static inline Datum MultiXactIdGetDatum(MultiXactId X) { - return (Datum) X; + return UInt64GetDatum(X); } /* diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index c9ef31ae66..55d0615746 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -37,8 +37,8 @@ extern PGDLLIMPORT int autovacuum_vac_ins_thresh; extern PGDLLIMPORT double autovacuum_vac_ins_scale; extern PGDLLIMPORT int autovacuum_anl_thresh; extern PGDLLIMPORT double autovacuum_anl_scale; -extern PGDLLIMPORT int autovacuum_freeze_max_age; -extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_multixact_freeze_max_age; extern PGDLLIMPORT double autovacuum_vac_cost_delay; extern PGDLLIMPORT int autovacuum_vac_cost_limit; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 2c4fd92e39..c34040bd2c 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -42,10 +42,10 @@ */ #define BUF_REFCOUNT_ONE 1 #define BUF_REFCOUNT_MASK ((1U << 18) - 1) -#define BUF_USAGECOUNT_MASK 0x003C0000U +#define BUF_USAGECOUNT_MASK 0x001C0000U #define BUF_USAGECOUNT_ONE (1U << 18) #define BUF_USAGECOUNT_SHIFT 18 -#define BUF_FLAG_MASK 0xFFC00000U +#define BUF_FLAG_MASK 0xFFE00000U /* Get refcount and usagecount from buffer state */ #define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK) @@ -57,6 +57,7 @@ * Note: BM_TAG_VALID essentially means that there is a buffer hashtable * entry associated with the buffer's tag. */ +#define BM_CONVERTED (1U << 21) /* buffer were converted to 64xid */ #define BM_LOCKED (1U << 22) /* buffer header is locked */ #define BM_DIRTY (1U << 23) /* data needs writing */ #define BM_VALID (1U << 24) /* data is valid */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 41e26d3e20..101e43d799 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -239,8 +239,12 @@ extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum); extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std); +extern void MarkBufferConverted(Buffer buffer, bool converted); +extern bool IsBufferConverted(Buffer buffer); extern void UnlockBuffers(void); +extern bool IsBufferLocked(Buffer buffer); +extern bool IsBufferLockedExclusive(Buffer buffer); extern void LockBuffer(Buffer buffer, int mode); extern bool ConditionalLockBuffer(Buffer buffer); extern void LockBufferForCleanup(Buffer buffer); @@ -266,6 +270,8 @@ extern int GetAccessStrategyBufferCount(BufferAccessStrategy strategy); extern void FreeAccessStrategy(BufferAccessStrategy strategy); +/* old tuple format support */ +extern void convert_page(Relation rel, Page orig_page, Buffer buf, BlockNumber blkno); /* inline functions */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 424ecba028..7b8ddde76a 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -14,10 +14,13 @@ #ifndef BUFPAGE_H #define BUFPAGE_H +#include "access/transam.h" #include "access/xlogdefs.h" #include "storage/block.h" #include "storage/item.h" #include "storage/off.h" +#include "postgres.h" +#include "utils/rel.h" /* * A postgres disk page is an abstraction layered on top of a postgres @@ -163,12 +166,41 @@ typedef struct PageHeaderData LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; - TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ + ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if + * none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; + +/* + * HeapPageSpecialData -- data that stored at the end of each heap page. + * + * pd_xid_base - base value for transaction IDs on page + * pd_multi_base - base value for multixact IDs on page + * + * pd_xid_base and pd_multi_base are base values for calculation of transaction + * identifiers from t_xmin and t_xmax in each heap tuple header on the page. + */ +typedef struct HeapPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ + TransactionId pd_multi_base; /* base value for multixact IDs on page */ +} HeapPageSpecialData; + +typedef HeapPageSpecialData *HeapPageSpecial; + +typedef struct ToastPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ +} ToastPageSpecialData; + +typedef ToastPageSpecialData *ToastPageSpecial; + +extern PGDLLIMPORT HeapPageSpecial heapDoubleXmaxSpecial; +extern PGDLLIMPORT ToastPageSpecial toastDoubleXmaxSpecial; + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. @@ -200,7 +232,7 @@ typedef PageHeaderData *PageHeader; * As of Release 9.3, the checksum version must also be considered when * handling pages. */ -#define PG_PAGE_LAYOUT_VERSION 4 +#define PG_PAGE_LAYOUT_VERSION 5 #define PG_DATA_CHECKSUM_VERSION 1 /* ---------------------------------------------------------------- @@ -440,18 +472,177 @@ PageClearAllVisible(Page page) } /* - * These two require "access/transam.h", so left as macros. + * Check if page is in "double xmax" format. */ -#define PageSetPrunable(page, xid) \ -do { \ - Assert(TransactionIdIsNormal(xid)); \ - if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ - TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ - ((PageHeader) (page))->pd_prune_xid = (xid); \ -} while (0) -#define PageClearPrunable(page) \ - (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) +static inline bool +HeapPageIsDoubleXmax(Page page) +{ + return ((PageHeader) (page))->pd_special == BLCKSZ; +} +/* + * Get pointer to HeapPageSpecialData. + * + * Can be used for non-consistent reads from non-locked pages. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax" + * format). + */ +static inline HeapPageSpecial +HeapPageGetSpecialNoAssert(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return heapDoubleXmaxSpecial; + + return (HeapPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Get pointer to ToastPageSpecialData. + * + * Can be used for non-consistent reads from non-locked pages. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax" + * format). + */ +static inline ToastPageSpecial +ToastPageGetSpecialNoAssert(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return toastDoubleXmaxSpecial; + + return (ToastPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Wrapper for HeapPageGetSpecialNoAssert for general use. + */ +static inline HeapPageSpecial +HeapPageGetSpecial(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return heapDoubleXmaxSpecial; + + Assert(((PageHeader) page)->pd_special == + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))); + + return (HeapPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Wrapper for ToastPageGetSpecialNoAssert for general use. + */ +static inline ToastPageSpecial +ToastPageGetSpecial(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return toastDoubleXmaxSpecial; + + Assert(((PageHeader) page)->pd_special == + BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData))); + + return (ToastPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Set pd_prune_xid. + */ +static inline void +HeapPageSetPruneXid(Page page, TransactionId xid, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return; + + if (!TransactionIdIsNormal(xid)) + { + ((PageHeader) (page))->pd_prune_xid = xid; + return; + } + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + ((PageHeader) (page))->pd_prune_xid = NormalTransactionIdToShort(base, xid); + Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId); +} + +static inline void +ToastPageSetPruneXid(Page page, TransactionId xid) +{ + if (HeapPageIsDoubleXmax(page)) + return; + + if (!TransactionIdIsNormal(xid)) + { + ((PageHeader) (page))->pd_prune_xid = xid; + return; + } + + ((PageHeader) (page))->pd_prune_xid = + NormalTransactionIdToShort(ToastPageGetSpecial(page)->pd_xid_base, (xid)); + + Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId); +} + +/* + * Get pd_prune_xid from locked page. + */ +static inline TransactionId +HeapPageGetPruneXid(Page page, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + return ShortTransactionIdToNormal(base, + ((PageHeader) (page))->pd_prune_xid); +} + +static inline void +PageSetPrunable(Page page, TransactionId xid, bool is_toast) +{ + TransactionId prune_xid; + + Assert(TransactionIdIsNormal(xid)); + + if (HeapPageIsDoubleXmax(page)) + return; + + prune_xid = HeapPageGetPruneXid(page, is_toast); + if ((!TransactionIdIsValid(prune_xid) || + TransactionIdPrecedes(xid, prune_xid))) + { + HeapPageSetPruneXid(page, xid, is_toast); + } +} + +/* + * Get pd_prune_xid from non-locked page. May return invalid value, but doen't + * causes assert failures. + */ +static inline TransactionId +HeapPageGetPruneXidNoAssert(Page page, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + base = is_toast ? ToastPageGetSpecialNoAssert(page)->pd_xid_base : + HeapPageGetSpecialNoAssert(page)->pd_xid_base; + return ShortTransactionIdToNormal(base, + ((PageHeader) (page))->pd_prune_xid); +} /* ---------------------------------------------------------------- * extern declarations @@ -485,6 +676,21 @@ do { \ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); +/* + * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + */ +typedef struct ItemIdCompactData +{ + uint16 offsetindex; /* linp array index */ + int16 itemoff; /* page offset of item data */ + uint16 alignedlen; /* MAXALIGN(item data len) */ +} ItemIdCompactData; + +typedef ItemIdCompactData *ItemIdCompact; +typedef RelationData *Relation; + +extern int itemoffcompare(const void *item1, const void *item2); + extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, @@ -493,7 +699,7 @@ extern Page PageGetTempPage(Page page); extern Page PageGetTempPageCopy(Page page); extern Page PageGetTempPageCopySpecial(Page page); extern void PageRestoreTempPage(Page tempPage, Page oldPage); -extern void PageRepairFragmentation(Page page); +extern void PageRepairFragmentation(Page page, bool is_toast); extern void PageTruncateLinePointerArray(Page page); extern Size PageGetFreeSpace(Page page); extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups); diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h index e5cfb8c3cc..e24e60981b 100644 --- a/src/include/storage/itemid.h +++ b/src/include/storage/itemid.h @@ -78,6 +78,8 @@ typedef uint16 ItemLength; #define ItemIdGetRedirect(itemId) \ ((itemId)->lp_off) +#define ItemIdGetTupleEnd(itemId) \ + (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId))) /* * ItemIdIsValid * True iff item identifier is valid. diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 590c026b5b..0e58f4ccad 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -224,8 +224,8 @@ typedef struct LOCKTAG /* ID info for a transaction is its TransactionId */ #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ @@ -234,8 +234,8 @@ typedef struct LOCKTAG /* ID info for a virtual transaction is its VirtualTransactionId */ #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).backendId, \ - (locktag).locktag_field2 = (vxid).localTransactionId, \ - (locktag).locktag_field3 = 0, \ + (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \ + (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) @@ -245,9 +245,9 @@ typedef struct LOCKTAG * its speculative insert counter. */ #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = (token), \ - (locktag).locktag_field3 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ + (locktag).locktag_field3 = (token), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4b25961249..371b7ffefa 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -17,6 +17,7 @@ #include "access/clog.h" #include "access/xlogdefs.h" #include "lib/ilist.h" +#include "port/atomics.h" #include "storage/latch.h" #include "storage/lock.h" #include "storage/pg_sema.h" @@ -176,12 +177,12 @@ struct PGPROC Latch procLatch; /* generic latch for process */ - TransactionId xid; /* id of top-level transaction currently being + pg_atomic_uint64 xid; /* id of top-level transaction currently being * executed by this proc, if running and XID * is assigned; else InvalidTransactionId. * mirrored in ProcGlobal->xids[pgxactoff] */ - TransactionId xmin; /* minimal running XID as it was when we were + pg_atomic_uint64 xmin; /* minimal running XID as it was when we were * starting our xact, excluding LAZY VACUUM: * vacuum must not remove tuples deleted by * xid >= xmin ! */ @@ -368,7 +369,7 @@ typedef struct PROC_HDR PGPROC *allProcs; /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ - TransactionId *xids; + pg_atomic_uint64 *xids; /* * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h index 2b496ee634..3f49819468 100644 --- a/src/include/utils/combocid.h +++ b/src/include/utils/combocid.h @@ -15,7 +15,7 @@ #define COMBOCID_H /* - * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes + * HeapTupleGetCmin and HeapTupleGetCmax function prototypes * are in access/htup.h, because that's where the macro definitions that * those functions replaced used to be. */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 0ad613c4b8..1d78a56161 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -311,12 +311,12 @@ typedef struct AutoVacOpts int vacuum_ins_threshold; int analyze_threshold; int vacuum_cost_limit; - int freeze_min_age; - int freeze_max_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_max_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_max_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_max_age; + int64 multixact_freeze_table_age; int log_min_duration; float8 vacuum_cost_delay; float8 vacuum_scale_factor; diff --git a/src/include/utils/xid8.h b/src/include/utils/xid8.h index 2f5e14baad..6401904ad2 100644 --- a/src/include/utils/xid8.h +++ b/src/include/utils/xid8.h @@ -17,13 +17,13 @@ static inline FullTransactionId DatumGetFullTransactionId(Datum X) { - return FullTransactionIdFromU64(DatumGetUInt64(X)); + return FullTransactionIdFromXid(DatumGetUInt64(X)); } static inline Datum FullTransactionIdGetDatum(FullTransactionId X) { - return UInt64GetDatum(U64FromFullTransactionId(X)); + return UInt64GetDatum(XidFromFullTransactionId(X)); } #define PG_GETARG_FULLTRANSACTIONID(X) DatumGetFullTransactionId(PG_GETARG_DATUM(X)) diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index d68ad7be34..650d045b07 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -2680,7 +2680,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup) * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ - uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) @@ -2804,7 +2804,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger) MemoryContextSetIdentifier(proc_cxt, prodesc->proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index a341cde2c1..7686841265 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -171,7 +171,7 @@ recheck: if (function) { /* We have a compiled function, but is it still valid? */ - if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (function->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else @@ -344,7 +344,7 @@ do_compile(FunctionCallInfo fcinfo, function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_xmin = HeapTupleGetRawXmin(procTup); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index f8c7f48747..2ae9a58484 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -7394,6 +7394,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup) tmptup->t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup->t_self)); tmptup->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tmptup); tmptup->t_data = td; /* Extract rowtype info and find a tupdesc */ @@ -7568,6 +7569,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = td; /* Extract rowtype info */ diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c index 79b6ef6a44..f89d909104 100644 --- a/src/pl/plpython/plpy_procedure.c +++ b/src/pl/plpython/plpy_procedure.c @@ -178,7 +178,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) proc->proname = pstrdup(NameStr(procStruct->proname)); MemoryContextSetIdentifier(cxt, proc->proname); proc->pyname = pstrdup(procName); - proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + proc->fn_xmin = HeapTupleGetRawXmin(procTup); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; @@ -417,7 +417,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup) return false; /* If the pg_proc tuple has changed, it's not valid */ - if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c index 6187e15781..485331a134 100644 --- a/src/pl/tcl/pltcl.c +++ b/src/pl/tcl/pltcl.c @@ -1428,7 +1428,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, * function's pg_proc entry without changing its OID. ************************************************************/ if (prodesc != NULL && - prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ @@ -1494,7 +1494,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/test/Makefile b/src/test/Makefile index dbd3192874..8e0f39289e 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,8 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules authentication recovery subscription +SUBDIRS = perl regress isolation modules authentication recovery subscription \ + xid-64 ifeq ($(with_icu),yes) SUBDIRS += icu diff --git a/src/test/meson.build b/src/test/meson.build index 5f3c9c2ba2..e9f504c000 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -7,6 +7,7 @@ subdir('authentication') subdir('recovery') subdir('subscription') subdir('modules') +subdir('xid-64') if ssl.found() subdir('ssl') diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c index e2e8b7389f..46553cbe89 100644 --- a/src/test/modules/test_lfind/test_lfind.c +++ b/src/test/modules/test_lfind/test_lfind.c @@ -120,29 +120,29 @@ Datum test_lfind32(PG_FUNCTION_ARGS) { #define TEST_ARRAY_SIZE 135 - uint32 test_array[TEST_ARRAY_SIZE] = {0}; + uint64 test_array[TEST_ARRAY_SIZE] = {0}; test_array[8] = 1; test_array[64] = 2; test_array[TEST_ARRAY_SIZE - 1] = 3; - if (pg_lfind32(1, test_array, 4)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(1, test_array, 4)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(1, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(2, test_array, 32)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(2, test_array, 32)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(2, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(3, test_array, 96)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(3, test_array, 96)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(3, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() found nonexistent element"); + if (pg_lfind64(4, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() found nonexistent element"); PG_RETURN_VOID(); } diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index e34dfb9243..bcf4ffac55 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -119,6 +119,10 @@ sub adjust_database_contents 'drop table public.gtest_normal_child2'); } + # Can't upgrade xid type + _add_st($result, 'regression', + 'alter table public.tab_core_types drop column xid'); + # stuff not supported from release 14 if ($old_version < 14) { diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index e882ce2077..1f2d710cf1 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1); # Bump the transaction ID epoch. This is useful to stress the portability # of recovery_target_xid parsing. -system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir); +system_or_bail('pg_resetwal', $node_primary->data_dir); # Start it $node_primary->start; diff --git a/src/test/recovery/t/039_end_of_wal.pl b/src/test/recovery/t/039_end_of_wal.pl index d2bf062bb2..d4aca99995 100644 --- a/src/test/recovery/t/039_end_of_wal.pl +++ b/src/test/recovery/t/039_end_of_wal.pl @@ -21,7 +21,7 @@ use integer; # causes / operator to use integer math my $BIG_ENDIAN = pack("L", 0x12345678) eq pack("N", 0x12345678); # Header size of record header. -my $RECORD_HEADER_SIZE = 24; +my $RECORD_HEADER_SIZE = 26; # Fields retrieved from code headers. my @scan_result = scan_server_header('access/xlog_internal.h', @@ -131,17 +131,21 @@ sub build_record_header # This needs to follow the structure XLogRecord: # I for xl_tot_len - # I for xl_xid + # I for xl_crc + # II for xl_xid # II for xl_prev # C for xl_info # C for xl_rmid - # BB for two bytes of padding - # I for xl_crc - return pack("IIIICCBBI", - $xl_tot_len, $xl_xid, + # BBBBBB for two bytes of padding + return pack("IIIIIICCBBBBBB", + $xl_tot_len, + $xl_crc, + $BIG_ENDIAN ? 0 : $xl_xid, + $BIG_ENDIAN ? $xl_xid : 0, $BIG_ENDIAN ? 0 : $xl_prev, $BIG_ENDIAN ? $xl_prev : 0, - $xl_info, $xl_rmid, 0, 0, $xl_crc); + $xl_info, $xl_rmid, + 0, 0, 0, 0, 0, 0); } # Build a fake WAL page header, based on the data given by the caller @@ -265,7 +269,7 @@ $node->stop('immediate'); my $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 0", $log_size + "invalid record length at .*: expected at least 26, got 0", $log_size ), "xl_tot_len zero"); @@ -277,7 +281,7 @@ write_wal($node, $TLI, $end_lsn, build_record_header(23)); $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 23", + "invalid record length at .*: expected at least 26, got 23", $log_size), "xl_tot_len short"); @@ -290,7 +294,7 @@ write_wal($node, $TLI, $end_lsn, build_record_header(1)); $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 1", $log_size + "invalid record length at .*: expected at least 26, got 1", $log_size ), "xl_tot_len short at end-of-page"); diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out index 44b54dc37f..313482b866 100644 --- a/src/test/regress/expected/indirect_toast.out +++ b/src/test/regress/expected/indirect_toast.out @@ -161,6 +161,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; ("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 (5 rows) +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; +insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len); DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index dd4354fc7d..d52545b443 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; -- -- check indirection (field/array assignment), cf bug #14265 @@ -980,3 +980,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest; (1 row) drop table returningwrtest; +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 7610b011d6..c34bef3fb5 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND ORDER BY 1, 2; proargtypes | proargtypes -----------------------------+-------------------------- - integer | xid + bigint | xid timestamp without time zone | timestamp with time zone bit | bit varying txid_snapshot | pg_snapshot @@ -736,7 +736,7 @@ int8(oid) tideq(tid,tid) timestamptz_cmp(timestamp with time zone,timestamp with time zone) interval_cmp(interval,interval) -xideqint4(xid,integer) +xideqint8(xid,bigint) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) @@ -850,7 +850,7 @@ pg_lsn_gt(pg_lsn,pg_lsn) pg_lsn_ne(pg_lsn,pg_lsn) pg_lsn_cmp(pg_lsn,pg_lsn) xidneq(xid,xid) -xidneqint4(xid,integer) +xidneqint8(xid,bigint) sha224(bytea) sha256(bytea) sha384(bytea) diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 1aeed8452b..804d9914e8 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -2,9 +2,22 @@ -- SELECT_VIEWS -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C"; name | thepath | cname ------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------- + 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland + 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland + 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette + 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley + 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland + 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette + 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley + 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley + 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland + 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland + 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland + 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland + 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland @@ -22,10 +35,10 @@ SELECT * FROM street; Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland - Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley - Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland + Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley + Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley B St | [(-122.1749,37.451),(-122.1743,37.443)] | Oakland Bancroft Ave | [(-122.15714,37.4242),(-122.156,37.409)] | Oakland @@ -37,9 +50,9 @@ SELECT * FROM street; Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley + Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland - Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland @@ -60,9 +73,9 @@ SELECT * FROM street; Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland + Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland - Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley Coliseum Way | [(-122.2001,37.47),(-122.1978,37.516)] | Oakland Coliseum Way | [(-122.2113,37.626),(-122.2085,37.592),(-122.2063,37.568)] | Berkeley Coolidge Ave | [(-122.2007,37.058),(-122.1992,37.06)] | Lafayette @@ -77,9 +90,9 @@ SELECT * FROM street; Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland + Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland - Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette @@ -89,9 +102,9 @@ SELECT * FROM street; Driscoll Road | [(-121.9482,37.403),(-121.948451,37.39995)] | Oakland E St | [(-122.1832,37.505),(-122.1826,37.498),(-122.182,37.49)] | Oakland Eden Ave | [(-122.1143,37.505),(-122.1142,37.491)] | Oakland + Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland - Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley Edgewater Dr | [(-122.201,37.379),(-122.2042,37.41)] | Lafayette Enos Way | [(-121.7677,37.896),(-121.7673,37.91)] | Oakland Euclid Ave | [(-122.2671,37.009),(-122.2666,37.987)] | Berkeley @@ -106,8 +119,8 @@ SELECT * FROM street; Harris Road | [(-122.0659,37.372),(-122.0675,37.363)] | Oakland Heartwood Dr | [(-122.2006,37.341),(-122.1992,37.338)] | Lafayette Hegenberger Exwy | [(-122.1946,37.52),(-122.1947,37.497)] | Oakland - Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Berkeley + Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland Hesperian Blvd | [(-122.1132,37.6),(-122.1123,37.586)] | Berkeley @@ -117,10 +130,10 @@ SELECT * FROM street; I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland - I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley - I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland + I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley + I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland @@ -136,8 +149,8 @@ SELECT * FROM street; I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley - I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley + I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland @@ -158,16 +171,16 @@ SELECT * FROM street; I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland - I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley + I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland + I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland - I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley - I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley + I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland @@ -175,12 +188,12 @@ SELECT * FROM street; I- 880 Ramp | [(-122.0041,37.313),(-122.0038,37.308),(-122.0039,37.284),(-122.0013,37.287),(-121.9995,37.289)] | Oakland I- 880 Ramp | [(-122.0236,37.488),(-122.0231,37.458),(-122.0227,37.458),(-122.0223,37.452),(-122.0205,37.447)] | Oakland I- 880 Ramp | [(-122.0238,37.491),(-122.0215,37.483),(-122.0211,37.477),(-122.0205,37.447)] | Oakland + I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland - I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley + I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland - I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland I- 880 Ramp | [(-122.085,37.34),(-122.0866,37.316),(-122.0819,37.296)] | Oakland @@ -212,26 +225,26 @@ SELECT * FROM street; Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland - Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley + Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland - Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley - McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland + Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley + McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley Miramar Ave | [(-122.1009,37.025),(-122.099089,37.03209)] | Oakland Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland - Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Berkeley + Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland Moores Ave | [(-122.0087,37.301),(-122.0094,37.292)] | Oakland National Ave | [(-122.1192,37.5),(-122.1281,37.489)] | Oakland Navajo Ct | [(-121.8779,37.901),(-121.8783,37.9)] | Oakland @@ -242,49 +255,49 @@ SELECT * FROM street; Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland - Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley + Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland Portsmouth Ave | [(-122.1064,37.315),(-122.1064,37.308)] | Oakland Proctor Ave | [(-122.2267,37.406),(-122.2251,37.386)] | Lafayette + Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland - Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley + Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland - Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley - Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley + Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley Saddle Brook Dr | [(-122.1478,37.909),(-122.1454,37.904),(-122.1451,37.888)] | Berkeley Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley + Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland - Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley - Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Berkeley + Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland Skyline Dr | [(-122.0277,37.5),(-122.0284,37.498)] | Oakland Skywest Dr | [(-122.1161,37.62),(-122.1123,37.586)] | Berkeley Southern Pacific Railroad | [(-122.3002,37.674),(-122.2999,37.661)] | Berkeley Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland Sp Railroad | [(-121.9565,37.898),(-121.9562,37.9)] | Oakland + Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland - Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley Sp Railroad | [(-122.0914,37.601),(-122.087,37.56),(-122.086408,37.5551)] | Berkeley - Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Berkeley + Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland Sp Railroad | [(-122.1947,37.497),(-122.193328,37.4848)] | Oakland Stanton Ave | [(-122.100392,37.0697),(-122.099513,37.06052)] | Oakland State Hwy 123 | [(-122.3004,37.986),(-122.2998,37.969),(-122.2995,37.962),(-122.2992,37.952),(-122.299,37.942),(-122.2987,37.935),(-122.2984,37.924),(-122.2982,37.92),(-122.2976,37.904),(-122.297,37.88),(-122.2966,37.869),(-122.2959,37.848),(-122.2961,37.843)] | Berkeley @@ -316,28 +329,15 @@ SELECT * FROM street; Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley + Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland - Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland - Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley + Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley - 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland - 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland - 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette - 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley - 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland - 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette - 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley - 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley - 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland - 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland - 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland - 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland - 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette (333 rows) SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out index 95ba66e95e..2ea4434f51 100644 --- a/src/test/regress/expected/txid.out +++ b/src/test/regress/expected/txid.out @@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3'; (1 row) SELECT txid_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT txid_snapshot '1:9223372036854775808:3'; - ^ + txid_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test txid_current_if_assigned BEGIN; SELECT txid_current_if_assigned() IS NULL; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 88d8f6c32d..0c4b994343 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -19,7 +19,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); oid | typname -----+--------- @@ -32,7 +32,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); oid | typname -----+--------- (0 rows) diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out index 835077e9d5..0154990d1a 100644 --- a/src/test/regress/expected/xid.out +++ b/src/test/regress/expected/xid.out @@ -8,9 +8,9 @@ select '010'::xid, '42'::xid8, '0xffffffffffffffff'::xid8, '-1'::xid8; - xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 ------+-----+------------+------------+------+------+----------------------+---------------------- - 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615 + xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 +-----+-----+------------+----------------------+------+------+----------------------+---------------------- + 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615 (1 row) -- garbage values @@ -43,10 +43,10 @@ SELECT pg_input_is_valid('asdf', 'xid'); f (1 row) -SELECT * FROM pg_input_error_info('0xffffffffff', 'xid'); - message | detail | hint | sql_error_code ----------------------------------------------------+--------+------+---------------- - value "0xffffffffff" is out of range for type xid | | | 22003 +SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid'); + message | detail | hint | sql_error_code +-------------------------------------------------------------+--------+------+---------------- + value "0xffffffffffffffffffff" is out of range for type xid | | | 22003 (1 row) SELECT pg_input_is_valid('42', 'xid8'); @@ -441,9 +441,11 @@ SELECT pg_snapshot '1:9223372036854775807:3'; (1 row) SELECT pg_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT pg_snapshot '1:9223372036854775808:3'; - ^ + pg_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test pg_current_xact_id_if_assigned BEGIN; SELECT pg_current_xact_id_if_assigned() IS NULL; diff --git a/src/test/regress/expected/xid64.out b/src/test/regress/expected/xid64.out new file mode 100644 index 0000000000..c30c5b5739 --- /dev/null +++ b/src/test/regress/expected/xid64.out @@ -0,0 +1,92 @@ +--- +--- Unit test for xid64 functions +--- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; +--- +--- Check page consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +INFO: test 1: page is converted to xid64 format + xid64_test_1 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_2('test_xid64_table'); + xid64_test_2 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE UNLOGGED TABLE test_xid64_table(i int); +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_2('test_xid64_table'); + xid64_test_2 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +CREATE UNLOGGED TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_double_xmax('test_xid64_table'); +INFO: test double xmax: page 0 is converted into double xmax format +INFO: test double xmax: end + xid64_test_double_xmax +------------------------ + +(1 row) + +DROP TABLE test_xid64_table; +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f0987ff537..bfbf85a255 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid xid64 mvcc # ---------- # Load huge amounts of data diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index bcbc6d910f..9bb8b541bb 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -23,6 +23,7 @@ #include "access/htup_details.h" #include "access/transam.h" #include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" @@ -578,6 +579,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); @@ -1262,3 +1264,293 @@ get_columns_length(PG_FUNCTION_ARGS) PG_RETURN_INT32(column_offset); } + +#include "access/hio.h" +#include "access/relation.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" + +static void +CheckNewPage(char *msg, Page page) +{ + uint16 size; + + if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION) + elog(ERROR, "%s: page version is %d, expected %d ", + msg, PageGetPageLayoutVersion(page), PG_PAGE_LAYOUT_VERSION); + + size = PageGetSpecialSize(page); + if (size == MAXALIGN(sizeof(HeapPageSpecialData))) + elog(INFO, "%s: page is converted to xid64 format", msg); + else if (HeapPageIsDoubleXmax(page)) + elog(INFO, "%s: page is converted into double xmax format", msg); + else + elog(ERROR, "%s: converted page has pageSpecial size %u, expected %llu", + msg, size, + (unsigned long long) MAXALIGN(sizeof(HeapPageSpecialData))); +} + +/* + * Get page from relation. + * Make this page look like in 32-bit xid format. + * Convert it to 64-bit xid format. + * Run basic checks. + */ +PG_FUNCTION_INFO_V1(xid64_test_1); +Datum +xid64_test_1(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + Buffer buf; + Page page; + PageHeader hdr; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + buf = ReadBuffer(rel, 0); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HeapPageSpecialData))) + elog(ERROR, "page expected in new format"); + + if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION) + elog(ERROR, "unknown page version (%u)", + PageGetPageLayoutVersion(page)); + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + convert_page(rel, page, buf, 0); + CheckNewPage("test 1", page); + + UnlockReleaseBuffer(buf); + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} + +typedef struct TupleCheckValues +{ + TransactionId xmin; + TransactionId xmax; +} TupleCheckValues; + +typedef struct RelCheckValues +{ + TupleCheckValues *tcv; + Size ntuples; +} RelCheckValues; + +static RelCheckValues +FillRelCheckValues(Relation rel, Buffer buffer, Page page) +{ + RelCheckValues set; + Size n; + +#define DEFAULT_SET_SIZE 64 + n = DEFAULT_SET_SIZE; + set.ntuples = 0; + set.tcv = palloc(sizeof(set.tcv[0]) * n); + + { + OffsetNumber maxoff, + offnum; + HeapTupleHeader tuphdr; + ItemId itemid; + HeapTupleData tuple; + TransactionId xmin, + xmax; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = tuphdr; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + if (HeapPageGetSpecial(page) == heapDoubleXmaxSpecial) + { + xmin = tuphdr->t_choice.t_heap.t_xmin; + xmax = tuphdr->t_choice.t_heap.t_xmax; + } + else + { + HeapTupleCopyXidsFromPage(buffer, &tuple, page, + IsToastRelation(rel)); + + xmin = HeapTupleGetRawXmin(&tuple); + xmax = HeapTupleGetRawXmax(&tuple); + } + + if (set.ntuples == n) + { + n *= 2; + set.tcv = repalloc(set.tcv, sizeof(set.tcv[0]) * n); + } + + set.tcv[set.ntuples].xmin = xmin; + set.tcv[set.ntuples].xmax = xmax; + set.ntuples++; + } + } + + return set; +} + +/* + * Test xmin/xmax invariant when converting page from 32bit xid to 64xid. + * + * Scenario: + * - enforce all relation pages to 32bit xid format, discarding pd_xid_base and + * pd_multi_base + * - store all xmin/xmax in array + * - convert all the pages from relation into 64xid format + * - store all new xmin/xmax in array + * - compare old and new xmin/xmax + * + * NOTE: inital xid value does not affect test as pd_xid_base/pd_multi_base + * discarded. + */ +PG_FUNCTION_INFO_V1(xid64_test_2); +Datum +xid64_test_2(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + RelCheckValues before, + after; + BlockNumber pageno, + npages; + Size i; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + + /* get page */ + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + /* make page look like 32-bit xid page */ + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + before = FillRelCheckValues(rel, buf, page); + convert_page(rel, page, buf, pageno); + after = FillRelCheckValues(rel, buf, page); + + /* check */ + if (before.ntuples != after.ntuples) + elog(ERROR, "numer of tuples must be equal"); + + for (i = 0; i != before.ntuples; ++i) + { + if (before.tcv[i].xmin != after.tcv[i].xmin && after.tcv[i].xmin) + elog(ERROR, "old and new xmin does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmin, + (unsigned long long) after.tcv[i].xmin); + + if (before.tcv[i].xmax != after.tcv[i].xmax) + elog(ERROR, "old and new xmax does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmax, + (unsigned long long) after.tcv[i].xmax); + } + + Assert(npages != 0); + pfree(before.tcv); + pfree(after.tcv); + + UnlockReleaseBuffer(buf); + } + + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(xid64_test_double_xmax); +Datum +xid64_test_double_xmax(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + BlockNumber pageno, + npages; + bool found; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + found = false; + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + ItemId itemid; + OffsetNumber offnum; + HeapTupleHeader tuphdr; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (pageno == 0) + { + itemid = PageGetItemId(page, FirstOffsetNumber); + itemid->lp_len += 16; /* Move to overlap special */ + } + + for (offnum = FirstOffsetNumber; + offnum <= PageGetMaxOffsetNumber(page); + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuphdr->t_infomask |= HEAP_XMIN_COMMITTED; + } + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + convert_page(rel, page, buf, pageno); + + if (HeapPageIsDoubleXmax(page)) + { + found = true; + elog(INFO, "test double xmax: page %u is converted into double xmax format", + pageno); + } + + UnlockReleaseBuffer(buf); + } + + if (!found) + elog(ERROR, "test double xmax: failed, no double xmax"); + + Assert(npages != 0); + elog(INFO, "test double xmax: end"); + + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql index 3e2f6c0237..ea087b5128 100644 --- a/src/test/regress/sql/indirect_toast.sql +++ b/src/test/regress/sql/indirect_toast.sql @@ -76,7 +76,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; VACUUM FREEZE indtoasttest; SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; + +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; + +insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len); + DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index bdcffd0314..7ada0801eb 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; @@ -597,3 +597,18 @@ alter table returningwrtest2 drop c; alter table returningwrtest attach partition returningwrtest2 for values in (2); insert into returningwrtest values (2, 'foo') returning returningwrtest; drop table returningwrtest; + +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql index e742f13699..a94bd7259c 100644 --- a/src/test/regress/sql/select_views.sql +++ b/src/test/regress/sql/select_views.sql @@ -3,7 +3,7 @@ -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C"; SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index e88d6cbe49..bbc2213b54 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -22,7 +22,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); -- Look for "pass by value" types that can't be passed by value. @@ -33,7 +33,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); -- Look for "toastable" types that aren't varlena. diff --git a/src/test/regress/sql/xid.sql b/src/test/regress/sql/xid.sql index 9f716b3653..9b94cb9a4a 100644 --- a/src/test/regress/sql/xid.sql +++ b/src/test/regress/sql/xid.sql @@ -19,7 +19,7 @@ select 'asdf'::xid8; -- Also try it with non-error-throwing API SELECT pg_input_is_valid('42', 'xid'); SELECT pg_input_is_valid('asdf', 'xid'); -SELECT * FROM pg_input_error_info('0xffffffffff', 'xid'); +SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid'); SELECT pg_input_is_valid('42', 'xid8'); SELECT pg_input_is_valid('asdf', 'xid8'); SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid8'); diff --git a/src/test/regress/sql/xid64.sql b/src/test/regress/sql/xid64.sql new file mode 100644 index 0000000000..caa97a0ed9 --- /dev/null +++ b/src/test/regress/sql/xid64.sql @@ -0,0 +1,84 @@ +--- +--- Unit test for xid64 functions +--- + +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; + +--- +--- Check page consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); + +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; + +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_2('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE UNLOGGED TABLE test_xid64_table(i int); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_2('test_xid64_table'); +DROP TABLE test_xid64_table; + +CREATE UNLOGGED TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_double_xmax('test_xid64_table'); +DROP TABLE test_xid64_table; + +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile new file mode 100644 index 0000000000..3b1e50dfc0 --- /dev/null +++ b/src/test/xid-64/Makefile @@ -0,0 +1,22 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/xid-64 +# +# Copyright (c) 2018, Postgres Professional +# +# src/test/xid-64/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/xid-64 +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/xid-64/README b/src/test/xid-64/README new file mode 100644 index 0000000000..01c0a1a1f7 --- /dev/null +++ b/src/test/xid-64/README @@ -0,0 +1,16 @@ +src/test/xid-64/README + +Regression tests for 64-bit XIDs +============================================= + +This directory contains a test suite for 64-bit xids. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/xid-64/meson.build b/src/test/xid-64/meson.build new file mode 100644 index 0000000000..63a780b69e --- /dev/null +++ b/src/test/xid-64/meson.build @@ -0,0 +1,16 @@ +tests += { + 'name': 'xid-64', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_test_large_xids.pl', + 't/002_test_gucs.pl', + 't/003_test_integrity.pl', + 't/004_test_relminmxid.pl', + 't/005_stream_subxact.pl', + 't/006_zeropage.pl', + 't/007_first_multi.pl', + ], + }, +} diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl new file mode 100644 index 0000000000..9341389233 --- /dev/null +++ b/src/test/xid-64/t/002_test_gucs.pl @@ -0,0 +1,79 @@ +# Tests for guc boundary values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +sub set_guc +{ + my ($node, $guc, $val) = @_; + print("SET $guc = $val\n"); + $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val"); + $node->restart(); +} + +sub test_pgbench +{ + my ($node) = @_; + $node->command_ok( + [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ], + 'pgbench finished without errors'); +} + +my @guc_vals = ( + [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ], + [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ], + [ "vacuum_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_freeze_table_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ] +); + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init; +# Disable logging of all statements to avoid log bloat during pgbench +$node->append_conf('postgresql.conf', "log_statement = none"); +$node->start; + +# Fill the test database with the pgbench data +$node->command_ok( + [ qw(pgbench --initialize --scale=10) ], + 'pgbench finished without errors'); + +# Test all GUCs with minimum, maximum and random value inbetween +# (run pgbench for every configuration setting) +foreach my $gi (0 .. $#guc_vals) { + print($guc_vals[$gi][0]); print("\n"); + my $guc = $guc_vals[$gi][0]; + my $minval = $guc_vals[$gi][1]; + my $maxval = $guc_vals[$gi][2]; + set_guc($node, $guc, $minval); + test_pgbench($node); + set_guc($node, $guc, $maxval); + test_pgbench($node); + set_guc($node, $guc, $minval + int(rand($maxval - $minval))); + test_pgbench($node); +} + +done_testing(); diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl new file mode 100644 index 0000000000..5b0789688e --- /dev/null +++ b/src/test/xid-64/t/003_test_integrity.pl @@ -0,0 +1,58 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Compare; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; +use bigint; + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(); +$node->start; + +# Create a database and fill it with the pgbench data +$node->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +$node->command_ok( + [ qw(pgbench --initialize --scale=2 pgbench_db) ], + 'pgbench finished without errors'); +# Dump the database (cluster the main table to put data in a determined order) +$node->safe_psql('pgbench_db', qq( + CREATE INDEX pa_aid_idx ON pgbench_accounts (aid); + CLUSTER pgbench_accounts USING pa_aid_idx)); +$node->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench.sql", "pgbench_db" ], + 'pgdump finished without errors'); +$node->stop('fast'); + +# Initialize second node +my $node2 = PostgreSQL::Test::Cluster->new('master2'); +$node2->init; +# Disable logging of all statements to avoid log bloat during restore +$node2->append_conf('postgresql.conf', "log_statement = none"); +$node2->start; + +# Create a database and restore the previous dump +$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()'); +print("# Initial txid_current: $txid0\n"); +$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]); + +# Dump the database and compare the dumped content with the previous one +$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts'); +$node2->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench2.sql", "pgbench_db" ], + 'pgdump finished without errors'); +ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected"); + +done_testing(); diff --git a/src/test/xid-64/t/004_test_relminmxid.pl b/src/test/xid-64/t/004_test_relminmxid.pl new file mode 100644 index 0000000000..e924f9cd9a --- /dev/null +++ b/src/test/xid-64/t/004_test_relminmxid.pl @@ -0,0 +1,90 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +my ($node, $rmm, $vacout); +$node = PostgreSQL::Test::Cluster->new('master'); +$node->init; +$node->append_conf('postgresql.conf', 'max_prepared_transactions = 2'); +$node->start; + +sub relminmxid +{ + my $rmm = $node->safe_psql("postgres", qq( + SELECT relminmxid + FROM pg_class + WHERE relname = 'foo';)); + return $rmm + 0; +} + +sub vacuum +{ + my ($rc, $stdout, $stderr) = $node->psql("postgres", "VACUUM foo;"); + return $stdout.$stderr; +} + +sub gen_multixact +{ + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + PREPARE TRANSACTION 'fooshare'; + )); + + my $xmax = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($xmax + 0, 0, "xmax not empty"); + + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + COMMIT; + COMMIT PREPARED 'fooshare'; + )); + + my $mxact = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($mxact + 0, 0, "mxact not empty"); + cmp_ok($xmax, '>', $mxact, "xmax is greater than mxact"); +} + +# Initialize master node with the random xid-related parameters +$node->safe_psql("postgres", "CREATE TABLE foo (a int); INSERT INTO foo VALUES (1);"); + +is(relminmxid(), 1, "relminmxid is default"); + +vacuum(); +is(relminmxid(), 1, "relminmxid is still default"); + +gen_multixact(); +is(relminmxid(), 1, "relminmxid is still still default"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error"); + +# No intentionally break relminmxid +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = ((1::int8<<62) + 1)::text::xid + WHERE relname = 'foo' +)); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (intentionally)"); + +gen_multixact(); +like(vacuum(), qr/multixact.*before relminmxid/, "got relminmxid error"); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (still)"); + +# Fix relminmxid by setting to default +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = '1' + WHERE relname = 'foo' +)); +is(relminmxid(), 1, "relminmxid is default again"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error again"); + +done_testing(); diff --git a/src/test/xid-64/t/005_stream_subxact.pl b/src/test/xid-64/t/005_stream_subxact.pl new file mode 100644 index 0000000000..6765f6061c --- /dev/null +++ b/src/test/xid-64/t/005_stream_subxact.pl @@ -0,0 +1,100 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test xids streaming of large transaction containing large subtransactions +# near 32-bit boundary. +# +# Mostly it is a copy of 016_stream_subxact.pl, but with publisher xid inited +# just before 32-bit boundary, so if xids are replicated as 32-bit values, +# subscriber will get 0 xid value. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Create publisher node +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->append_conf('postgresql.conf', + 'logical_decoding_work_mem = 64kB'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b varchar)"); +$node_publisher->safe_psql('postgres', + "INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')"); + +# Setup structure on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b text, c timestamptz DEFAULT now(), d bigint DEFAULT 999)" +); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR TABLE test_tab"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" +); + +$node_publisher->wait_for_catchup($appname); + +# Also wait for initial table sync to finish +my $synced_query = + "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +my $result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(2|2|2), 'check initial data was copied to subscriber'); + +# Insert, update and delete enough rows to exceed 64kB limit. +$node_publisher->safe_psql( + 'postgres', q{ +BEGIN; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series( 3, 500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s1; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(501, 1000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s2; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1001, 1500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s3; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1501, 2000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s4; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(2001, 2500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +COMMIT; +}); + +$node_publisher->wait_for_catchup($appname); + +$result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(1667|1667|1667), + 'check data was copied to subscriber in streaming mode and extra columns contain local defaults' +); + +$node_subscriber->stop; +$node_publisher->stop; + +done_testing(); diff --git a/src/test/xid-64/t/006_zeropage.pl b/src/test/xid-64/t/006_zeropage.pl new file mode 100644 index 0000000000..4b87c90edc --- /dev/null +++ b/src/test/xid-64/t/006_zeropage.pl @@ -0,0 +1,33 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Check WAL for ZEROPAGE record. + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + return $stdout; +} + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; +my $pgdata = $node->data_dir; +my $xlogfilename0 = $node->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn())"); +#$node->command_like( +# [ 'pg_waldump', '-S', "$pgdata/pg_wal/$xlogfilename0" ], +# qr/ZEROPAGE/, +# 'pg_waldump prints start timestamp'); +my $wd_output = command_output( + [ 'pg_waldump', "$pgdata/pg_wal/$xlogfilename0" ]); +ok($wd_output =~ qr/ZEROPAGE page 0/, "ZEROPAGE found"); + +done_testing(); diff --git a/src/test/xid-64/t/007_first_multi.pl b/src/test/xid-64/t/007_first_multi.pl new file mode 100644 index 0000000000..eca2c39af9 --- /dev/null +++ b/src/test/xid-64/t/007_first_multi.pl @@ -0,0 +1,83 @@ +# Test for pages with first tuple has xmax multi +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +sub test_multixact +{ + my ($primary, $standby, $test_name) = @_; + + $primary->safe_psql('postgres', q{ + CREATE TABLE t (id INT, data TEXT, CONSTRAINT t_id_pk PRIMARY KEY(id)); + INSERT INTO t SELECT 1, repeat('a', 1000); + }); + + my %psql = ( + '1' => $primary->background_psql('postgres'), + '2' => $primary->background_psql('postgres'), + ); + + # Lock tuples + $psql{1}->query_safe(q( + BEGIN; + SELECT * FROM t FOR KEY SHARE; + )); + + $psql{2}->query_safe(q( + BEGIN; + SELECT * FROM t FOR KEY SHARE; + )); + + # Repeat update until we get a new page with one tuple + my $res; + my $guard = 0; + + do { + $res = $primary->safe_psql('postgres', q{ + UPDATE t SET data = repeat('a', 1000) RETURNING ctid; + }); + # Fail if we already write around 64k and still have no new page. + fail("creating second page") if (++$guard == 64); + } until ($res eq "(1,1)"); + + $psql{1}->quit; + $psql{2}->quit; + $primary->wait_for_catchup($standby); + + # Check results + my $query = q{ + SELECT xmax, ctid, id, data = repeat('a', 1000) as data FROM t; + }; + my $res_primary = $primary->safe_psql('postgres', $query); + my $res_standby = $standby->safe_psql('postgres', $query); + + is($res_primary, $res_standby, "rows are the same in test $test_name"); +} + +# We should run test for full_page_writes on and off. +foreach ('true', 'false') { + # Create primary + my $primary = PostgreSQL::Test::Cluster->new("master_$_"); + $primary->init(allows_streaming => 1); + $primary->append_conf('postgresql.conf', "full_page_writes = $_"); + $primary->start; + + # Take backup + my $backup_name = "my_backup_$_"; + $primary->backup($backup_name); + + # Create standby from backup + my $standby = PostgreSQL::Test::Cluster->new("standby_$_"); + $standby->init_from_backup($primary, $backup_name, has_streaming => 1); + $standby->start; + + # Check + test_multixact($primary, $standby, "with FPW $_"); + + $standby->stop(); + $primary->stop(); +} + +done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 38a86575e1..194646ef2d 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3438,8 +3438,8 @@ intset_leaf_node intset_node intvKEY io_stat_col -itemIdCompact -itemIdCompactData +ItemIdCompact +ItemIdCompactData iterator jmp_buf join_search_hook_type -- 2.34.1