From 0fd3de673e4df9ad46333a42742c16ec009ffd6c Mon Sep 17 00:00:00 2001 From: Yura Sokolov Date: Wed, 29 Dec 2021 18:54:31 +0300 Subject: [PATCH v6 2/2] Add 64bit xids - change TransactionId to 64bit - disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit -- now 32bit xid is named ShortTransactionId - heap page format is changed to contain xid and multixact base value, tuple's xmin and xmax are offsets from. -- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified. - in-memory tuple (HeapTuple) were enriched with copy of xid_base and multi_base from a page. -- todo: replace xid_base/multi_base with precalculated 64bit xmin/xmax. Authors: - Alexander Korotkov - Teodor Sigaev - Nikita Glukhov - Konstantin Knizhnik - Maxim Orlov - Pavel Borisov - Yura Sokolov --- contrib/amcheck/verify_heapam.c | 91 +- contrib/amcheck/verify_nbtree.c | 2 +- contrib/hstore/hstore_io.c | 2 + contrib/pageinspect/Makefile | 3 +- contrib/pageinspect/brinfuncs.c | 10 +- contrib/pageinspect/btreefuncs.c | 25 +- contrib/pageinspect/expected/hash_1.out | 166 +++ .../pageinspect/expected/oldextversions.out | 10 +- contrib/pageinspect/expected/page.out | 28 +- contrib/pageinspect/fsmfuncs.c | 2 +- contrib/pageinspect/heapfuncs.c | 9 +- .../pageinspect/pageinspect--1.10--1.11.sql | 145 +++ contrib/pageinspect/pageinspect--1.5.sql | 2 + contrib/pageinspect/pageinspect.control | 2 +- contrib/pageinspect/rawpage.c | 26 +- contrib/pg_surgery/heap_surgery.c | 10 +- .../pg_visibility/expected/pg_visibility.out | 17 + contrib/pg_visibility/pg_visibility.c | 3 +- contrib/pg_visibility/sql/pg_visibility.sql | 18 + contrib/pgrowlocks/pgrowlocks.c | 8 +- contrib/pgstattuple/pgstatapprox.c | 1 + contrib/pgstattuple/pgstatindex.c | 2 +- .../postgres_fdw/expected/postgres_fdw.out | 55 +- contrib/postgres_fdw/postgres_fdw.c | 9 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +- contrib/test_decoding/test_decoding.c | 24 +- src/backend/access/common/heaptuple.c | 10 +- src/backend/access/common/reloptions.c | 118 +- src/backend/access/hash/hashvalidate.c | 5 +- src/backend/access/heap/heapam.c | 1059 ++++++++++++++--- src/backend/access/heap/heapam_handler.c | 30 +- src/backend/access/heap/heapam_visibility.c | 173 +-- src/backend/access/heap/heaptoast.c | 3 + src/backend/access/heap/hio.c | 16 +- src/backend/access/heap/pruneheap.c | 48 +- src/backend/access/heap/rewriteheap.c | 56 +- src/backend/access/heap/vacuumlazy.c | 121 +- src/backend/access/nbtree/nbtpage.c | 2 + src/backend/access/nbtree/nbtsplitloc.c | 13 +- src/backend/access/nbtree/nbtxlog.c | 2 + src/backend/access/rmgrdesc/clogdesc.c | 8 +- src/backend/access/rmgrdesc/committsdesc.c | 8 +- src/backend/access/rmgrdesc/gistdesc.c | 8 +- src/backend/access/rmgrdesc/hashdesc.c | 2 +- src/backend/access/rmgrdesc/heapdesc.c | 50 +- src/backend/access/rmgrdesc/mxactdesc.c | 12 +- src/backend/access/rmgrdesc/nbtdesc.c | 8 +- src/backend/access/rmgrdesc/spgdesc.c | 2 +- src/backend/access/rmgrdesc/standbydesc.c | 6 +- src/backend/access/rmgrdesc/xactdesc.c | 16 +- src/backend/access/rmgrdesc/xlogdesc.c | 9 +- src/backend/access/transam/clog.c | 92 +- src/backend/access/transam/commit_ts.c | 70 +- src/backend/access/transam/multixact.c | 885 +++----------- src/backend/access/transam/slru.c | 125 +- src/backend/access/transam/subtrans.c | 51 +- src/backend/access/transam/transam.c | 68 +- src/backend/access/transam/twophase.c | 31 +- src/backend/access/transam/varsup.c | 153 +-- src/backend/access/transam/xact.c | 50 +- src/backend/access/transam/xlog.c | 40 +- src/backend/access/transam/xlogreader.c | 34 - src/backend/bootstrap/bootstrap.c | 29 +- src/backend/catalog/heap.c | 8 +- src/backend/catalog/pg_inherits.c | 2 +- src/backend/commands/async.c | 16 +- src/backend/commands/sequence.c | 44 +- src/backend/commands/vacuum.c | 76 +- src/backend/executor/execExprInterp.c | 1 + src/backend/executor/execUtils.c | 1 + src/backend/executor/nodeModifyTable.c | 1 + src/backend/executor/spi.c | 1 + src/backend/nodes/outfuncs.c | 4 +- src/backend/optimizer/util/plancat.c | 2 +- src/backend/postmaster/autovacuum.c | 72 +- src/backend/replication/logical/decode.c | 21 +- src/backend/replication/logical/logical.c | 2 +- src/backend/replication/logical/proto.c | 4 +- .../replication/logical/reorderbuffer.c | 29 +- src/backend/replication/logical/snapbuild.c | 18 +- src/backend/replication/logical/worker.c | 12 +- src/backend/replication/walreceiver.c | 27 +- src/backend/replication/walsender.c | 63 +- src/backend/statistics/extended_stats.c | 1 + src/backend/storage/buffer/Makefile | 3 +- src/backend/storage/buffer/bufmgr.c | 19 +- src/backend/storage/buffer/heap_convert.c | 496 ++++++++ src/backend/storage/ipc/procarray.c | 120 +- src/backend/storage/ipc/standby.c | 10 +- src/backend/storage/lmgr/lmgr.c | 13 +- src/backend/storage/lmgr/predicate.c | 28 +- src/backend/storage/page/bufpage.c | 199 +++- src/backend/utils/adt/enum.c | 2 +- src/backend/utils/adt/jsonfuncs.c | 2 + src/backend/utils/adt/lockfuncs.c | 11 +- src/backend/utils/adt/numutils.c | 22 + src/backend/utils/adt/pgstatfuncs.c | 1 + src/backend/utils/adt/rowtypes.c | 12 + src/backend/utils/adt/xid.c | 27 +- src/backend/utils/adt/xid8funcs.c | 57 +- src/backend/utils/cache/catcache.c | 1 + src/backend/utils/cache/relcache.c | 5 +- src/backend/utils/error/elog.c | 12 +- src/backend/utils/fmgr/fmgr.c | 4 +- src/backend/utils/misc/guc.c | 184 +-- src/backend/utils/misc/help_config.c | 8 +- src/backend/utils/misc/pg_controldata.c | 3 +- src/backend/utils/misc/postgresql.conf.sample | 4 +- src/backend/utils/sort/tuplesort.c | 8 +- src/backend/utils/time/combocid.c | 18 +- src/backend/utils/time/snapmgr.c | 23 +- src/bin/initdb/initdb.c | 67 +- src/bin/pg_amcheck/t/004_verify_heapam.pl | 184 ++- src/bin/pg_controldata/pg_controldata.c | 17 +- src/bin/pg_dump/pg_dump.c | 51 +- src/bin/pg_dump/pg_dump.h | 8 +- src/bin/pg_resetwal/pg_resetwal.c | 143 ++- src/bin/pg_upgrade/Makefile | 1 + src/bin/pg_upgrade/check.c | 114 ++ src/bin/pg_upgrade/controldata.c | 28 +- src/bin/pg_upgrade/file.c | 172 ++- src/bin/pg_upgrade/pg_upgrade.c | 132 +- src/bin/pg_upgrade/pg_upgrade.h | 24 +- src/bin/pg_upgrade/segresize.c | 571 +++++++++ src/bin/pg_upgrade/test.sh | 19 +- src/bin/pg_upgrade/util.c | 17 + src/bin/pg_upgrade/version.c | 134 ++- src/bin/pg_verifybackup/t/003_corruption.pl | 2 +- src/bin/pg_waldump/pg_waldump.c | 4 +- src/include/access/clog.h | 2 +- src/include/access/commit_ts.h | 2 +- src/include/access/ginblock.h | 11 +- src/include/access/gist.h | 2 +- src/include/access/heapam.h | 16 +- src/include/access/heapam_xlog.h | 22 +- src/include/access/heaptoast.h | 2 +- src/include/access/htup.h | 16 +- src/include/access/htup_details.h | 142 ++- src/include/access/multixact.h | 14 +- src/include/access/nbtree.h | 10 + src/include/access/rewriteheap.h | 2 +- src/include/access/rmgrlist.h | 1 + src/include/access/slru.h | 28 +- src/include/access/tableam.h | 2 +- src/include/access/transam.h | 76 +- src/include/access/xact.h | 13 +- src/include/access/xlog.h | 8 + src/include/access/xlogreader.h | 4 - src/include/c.h | 44 +- src/include/catalog/pg_amproc.dat | 4 +- src/include/catalog/pg_control.h | 6 + src/include/catalog/pg_operator.dat | 8 +- src/include/catalog/pg_proc.dat | 12 +- src/include/catalog/pg_type.dat | 4 +- src/include/commands/vacuum.h | 28 +- src/include/fmgr.h | 2 + src/include/pg_config.h.in | 3 + src/include/postgres.h | 6 +- src/include/postmaster/autovacuum.h | 4 +- src/include/storage/bufmgr.h | 2 + src/include/storage/bufpage.h | 105 +- src/include/storage/itemid.h | 2 + src/include/storage/lock.h | 14 +- src/include/storage/proc.h | 2 +- src/include/storage/standby.h | 2 +- src/include/storage/sync.h | 2 +- src/include/utils/builtins.h | 1 + src/include/utils/combocid.h | 2 +- src/include/utils/rel.h | 12 +- src/pl/plperl/plperl.c | 4 +- src/pl/plpgsql/src/pl_comp.c | 4 +- src/pl/plpgsql/src/pl_exec.c | 2 + src/pl/plpython/plpy_procedure.c | 4 +- src/pl/tcl/pltcl.c | 4 +- src/test/Makefile | 3 +- src/test/perl/PostgreSQL/Test/Cluster.pm | 1 + src/test/recovery/t/003_recovery_targets.pl | 2 +- src/test/regress/expected/indirect_toast.out | 8 + src/test/regress/expected/insert.out | 16 +- src/test/regress/expected/opr_sanity.out | 6 +- src/test/regress/expected/select_views.out | 70 +- src/test/regress/expected/txid.out | 8 +- src/test/regress/expected/xid.out | 14 +- src/test/regress/pg_regress.c | 2 +- src/test/regress/regress.c | 1 + src/test/regress/sql/indirect_toast.sql | 11 + src/test/regress/sql/insert.sql | 17 +- src/test/regress/sql/select_views.sql | 2 +- src/test/xid-64/Makefile | 22 + src/test/xid-64/README | 16 + src/test/xid-64/t/001_test_large_xids.pl | 53 + src/test/xid-64/t/002_test_gucs.pl | 78 ++ src/test/xid-64/t/003_test_integrity.pl | 56 + src/tools/msvc/Solution.pm | 1 + 194 files changed, 5635 insertions(+), 2891 deletions(-) create mode 100644 contrib/pageinspect/expected/hash_1.out create mode 100644 contrib/pageinspect/pageinspect--1.10--1.11.sql create mode 100644 src/backend/storage/buffer/heap_convert.c create mode 100644 src/bin/pg_upgrade/segresize.c create mode 100644 src/test/xid-64/Makefile create mode 100644 src/test/xid-64/README create mode 100644 src/test/xid-64/t/001_test_large_xids.pl create mode 100644 src/test/xid-64/t/002_test_gucs.pl create mode 100644 src/test/xid-64/t/003_test_integrity.pl diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index a23d0182fc0..cc58eb864c3 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -126,6 +126,7 @@ typedef struct HeapCheckContext uint16 lp_len; uint16 lp_off; HeapTupleHeader tuphdr; + HeapTupleData tuple; int natts; /* Values for iterating over attributes within the tuple */ @@ -523,6 +524,11 @@ verify_heapam(PG_FUNCTION_ARGS) ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid); ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr); + ctx.tuple.t_data = ctx.tuphdr; + ctx.tuple.t_len = ItemIdGetLength(ctx.itemid); + ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel); + HeapTupleCopyBaseFromPage(&ctx.tuple, ctx.page); + /* Ok, ready to check this next tuple */ check_tuple(&ctx); } @@ -768,12 +774,13 @@ check_tuple_visibility(HeapCheckContext *ctx) XidCommitStatus xmin_status; XidCommitStatus xvac_status; XidCommitStatus xmax_status; + HeapTuple tuple = &ctx->tuple; HeapTupleHeader tuphdr = ctx->tuphdr; ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */ /* If xmin is normal, it should be within valid range */ - xmin = HeapTupleHeaderGetXmin(tuphdr); + xmin = HeapTupleGetXmin(tuple); switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: @@ -781,23 +788,20 @@ check_tuple_visibility(HeapCheckContext *ctx) break; case XID_IN_FUTURE: report_corruption(ctx, - psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u", + psprintf("xmin " XID_FMT " equals or exceeds next valid transaction ID " XID_FMT, xmin, - EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("xmin %u precedes oldest valid transaction ID %u:%u", + psprintf("xmin " XID_FMT " precedes oldest valid transaction ID " XID_FMT, xmin, - EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("xmin %u precedes relation freeze threshold %u:%u", + psprintf("xmin " XID_FMT " precedes relation freeze threshold " XID_FMT, xmin, - EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; } @@ -822,23 +826,20 @@ check_tuple_visibility(HeapCheckContext *ctx) return false; case XID_IN_FUTURE: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple equals or exceeds next valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved off tuple equals or exceeds next valid transaction ID " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes relation freeze threshold %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved off tuple precedes relation freeze threshold " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes oldest valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved off tuple precedes oldest valid transaction ID " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: @@ -849,12 +850,12 @@ check_tuple_visibility(HeapCheckContext *ctx) { case XID_IS_CURRENT_XID: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple matches our current transaction ID", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved off tuple matches our current transaction ID", xvac)); return false; case XID_IN_PROGRESS: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple appears to be in progress", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved off tuple appears to be in progress", xvac)); return false; @@ -891,23 +892,20 @@ check_tuple_visibility(HeapCheckContext *ctx) return false; case XID_IN_FUTURE: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple equals or exceeds next valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved in tuple equals or exceeds next valid transaction ID " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes relation freeze threshold %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved in tuple precedes relation freeze threshold " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes oldest valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved in tuple precedes oldest valid transaction ID " XID_FMT, xvac, - EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: @@ -918,12 +916,12 @@ check_tuple_visibility(HeapCheckContext *ctx) { case XID_IS_CURRENT_XID: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple matches our current transaction ID", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved in tuple matches our current transaction ID", xvac)); return false; case XID_IN_PROGRESS: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple appears to be in progress", + psprintf("old-style VACUUM FULL transaction ID " XID_FMT " for moved in tuple appears to be in progress", xvac)); return false; @@ -985,7 +983,7 @@ check_tuple_visibility(HeapCheckContext *ctx) * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of * avoiding possibly-bogus complaints about missing TOAST entries. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (check_mxid_valid_in_rel(xmax, ctx)) { case XID_INVALID: @@ -994,17 +992,17 @@ check_tuple_visibility(HeapCheckContext *ctx) return true; case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u", + psprintf("multitransaction ID " XID_FMT " precedes relation minimum multitransaction ID threshold " XID_FMT, xmax, ctx->relminmxid)); return true; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u", + psprintf("multitransaction ID " XID_FMT " precedes oldest valid multitransaction ID threshold " XID_FMT, xmax, ctx->oldest_mxact)); return true; case XID_IN_FUTURE: report_corruption(ctx, - psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u", + psprintf("multitransaction ID " XID_FMT " equals or exceeds next valid multitransaction ID " XID_FMT, xmax, ctx->next_mxact)); return true; @@ -1042,7 +1040,7 @@ check_tuple_visibility(HeapCheckContext *ctx) * We already checked above that this multixact is within limits for * this table. Now check the update xid from this multixact. */ - xmax = HeapTupleGetUpdateXid(tuphdr); + xmax = HeapTupleGetUpdateXid(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1052,23 +1050,20 @@ check_tuple_visibility(HeapCheckContext *ctx) return true; case XID_IN_FUTURE: report_corruption(ctx, - psprintf("update xid %u equals or exceeds next valid transaction ID %u:%u", + psprintf("update xid " XID_FMT " equals or exceeds next valid transaction ID " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return true; case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("update xid %u precedes relation freeze threshold %u:%u", + psprintf("update xid " XID_FMT " precedes relation freeze threshold " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return true; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("update xid %u precedes oldest valid transaction ID %u:%u", + psprintf("update xid " XID_FMT " precedes oldest valid transaction ID " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return true; case XID_BOUNDS_OK: @@ -1109,28 +1104,25 @@ check_tuple_visibility(HeapCheckContext *ctx) } /* xmax is an XID, not a MXID. Sanity check it. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_IN_FUTURE: report_corruption(ctx, - psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", + psprintf("xmax " XID_FMT " equals or exceeds next valid transaction ID " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("xmax %u precedes relation freeze threshold %u:%u", + psprintf("xmax " XID_FMT " precedes relation freeze threshold " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("xmax %u precedes oldest valid transaction ID %u:%u", + psprintf("xmax " XID_FMT " precedes oldest valid transaction ID " XID_FMT, xmax, - EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: @@ -1615,21 +1607,13 @@ check_tuple(HeapCheckContext *ctx) } /* - * Convert a TransactionId into a FullTransactionId using our cached values of - * the valid transaction ID range. It is the caller's responsibility to have - * already updated the cached values, if necessary. + * With 64xid this function is just wrapper for FullTransactionIdFromXid, + * but we left it in order for its calls remain "vanilla" like. */ static FullTransactionId -FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) +FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx pg_attribute_unused()) { - uint32 epoch; - - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - epoch = EpochFromFullTransactionId(ctx->next_fxid); - if (xid > ctx->next_xid) - epoch--; - return FullTransactionIdFromEpochAndXid(epoch, xid); + return FullTransactionIdFromXid(xid); } /* @@ -1754,7 +1738,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, * performed the full xid conversion, reconvert. */ update_cached_xid_range(ctx); - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); } if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid)) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index d3b29d3d890..af9a70c7758 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -498,7 +498,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * avoid this. */ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple), snapshot->xmin)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index b3304ff8445..f54741330c5 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -859,6 +859,7 @@ hstore_from_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); @@ -1012,6 +1013,7 @@ hstore_populate_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); } /* diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index 5c0736564ab..5ca80c9d766 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -13,7 +13,8 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ +DATA = pageinspect--1.10--1.11.sql \ + pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c index 0e3c2deb66c..d19cef5b129 100644 --- a/contrib/pageinspect/brinfuncs.c +++ b/contrib/pageinspect/brinfuncs.c @@ -45,7 +45,7 @@ Datum brin_page_type(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); - Page page = VARDATA(raw_page); + Page page = get_page_from_raw(raw_page); int raw_page_size; char *type; @@ -101,7 +101,7 @@ verify_brin_page(bytea *raw_page, uint16 type, const char *strtype) errdetail("Expected size %d, got %d", BLCKSZ, raw_page_size))); - page = VARDATA(raw_page); + page = get_page_from_raw(raw_page); /* verify the special space says this page is what we want */ if (BrinPageType(page) != type) @@ -393,15 +393,15 @@ brin_revmap_data(PG_FUNCTION_ARGS) MemoryContext mctx; Page page; - /* minimally verify the page we got */ - page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap"); - /* create a function context for cross-call persistence */ fctx = SRF_FIRSTCALL_INIT(); /* switch to memory context appropriate for multiple function calls */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + /* minimally verify the page we got */ + page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap"); + state = palloc(sizeof(*state)); state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids; state->idx = 0; diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 03debe336ba..21463cf65c4 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -105,9 +105,14 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->page_size = PageGetPageSize(page); + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_level = opaque->btpo_level; + /* page type (flags) */ if (P_ISDELETED(opaque)) { + TransactionId safexid; + /* We divide deleted pages into leaf ('d') or internal ('D') */ if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque)) stat->type = 'd'; @@ -122,16 +127,16 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) * called "bpto"). */ if (P_HAS_FULLXID(opaque)) + safexid = XidFromFullTransactionId(BTPageGetDeleteXid(page)); + else { - FullTransactionId safexid = BTPageGetDeleteXid(page); - - elog(DEBUG2, "deleted page from block %u has safexid %u:%u", - blkno, EpochFromFullTransactionId(safexid), - XidFromFullTransactionId(safexid)); + safexid = BTP_GET_XACT(opaque); + stat->btpo_prev = 0; + stat->btpo_level = 0; } - else - elog(DEBUG2, "deleted page from block %u has safexid %u", - blkno, opaque->btpo_level); + + elog(DEBUG2, "deleted page from block %u has safexid " XID_FMT, + blkno, safexid); /* Don't interpret BTDeletedPageData as index tuples */ maxoff = InvalidOffsetNumber; @@ -146,9 +151,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->type = 'i'; /* btpage opaque data */ - stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; - stat->btpo_level = opaque->btpo_level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; @@ -613,7 +616,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) uargs = palloc(sizeof(struct user_args)); - uargs->page = VARDATA(raw_page); + uargs->page = get_page_from_raw(raw_page); uargs->offset = FirstOffsetNumber; diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out new file mode 100644 index 00000000000..5e64eb92602 --- /dev/null +++ b/contrib/pageinspect/expected/hash_1.out @@ -0,0 +1,166 @@ +CREATE TABLE test_hash (a int, b text); +INSERT INTO test_hash VALUES (1, 'one'); +CREATE INDEX test_hash_a_idx ON test_hash USING hash (a); +\x +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]--+--------- +hash_page_type | metapage + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5)); +-[ RECORD 1 ]--+------- +hash_page_type | bitmap + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); +ERROR: invalid block number +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); +ERROR: invalid overflow block number 0 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); +ERROR: invalid overflow block number 1 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2); +ERROR: invalid overflow block number 2 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3); +ERROR: invalid overflow block number 3 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); +ERROR: invalid overflow block number 4 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); +ERROR: invalid overflow block number 5 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +magic | 105121344 +version | 4 +ntuples | 1 +bsize | 8156 +bmsize | 4096 +bmshift | 15 +maxbucket | 3 +highmask | 7 +lowmask | 3 +ovflpoint | 2 +firstfree | 0 +nmaps | 1 +procid | 450 +spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} + +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 1)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 2)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 3)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 4)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash meta page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 0 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 1 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---+----------- +live_items | 1 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 2 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 3 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---------- +itemoffset | 1 +ctid | (0,1) +data | 2389907270 + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +DROP TABLE test_hash; diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out index f5c4b61bd79..00323d392d6 100644 --- a/contrib/pageinspect/expected/oldextversions.out +++ b/contrib/pageinspect/expected/oldextversions.out @@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1); -- pagesize in pageinspect >= 1.10. ALTER EXTENSION pageinspect UPDATE TO '1.9'; \df page_header - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ - public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ + public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func (1 row) SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) DROP TABLE test1; diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index 4e325ae56dd..804be53c6dc 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -48,7 +48,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; @@ -69,19 +69,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- - 0: 254 + - 1: 254 + - 3: 254 + - 7: 254 + - 15: 254 + - 31: 254 + - 63: 254 + - 127: 254 + - 255: 254 + - 511: 254 + - 1023: 254 + - 2047: 254 + - 4095: 254 + + 0: 253 + + 1: 253 + + 3: 253 + + 7: 253 + + 15: 253 + + 31: 253 + + 63: 253 + + 127: 253 + + 255: 253 + + 511: 253 + + 1023: 253 + + 2047: 253 + + 4095: 253 + fp_next_slot: 0 + (1 row) diff --git a/contrib/pageinspect/fsmfuncs.c b/contrib/pageinspect/fsmfuncs.c index 930f1df3390..4fc7be9d756 100644 --- a/contrib/pageinspect/fsmfuncs.c +++ b/contrib/pageinspect/fsmfuncs.c @@ -44,7 +44,7 @@ fsm_page_contents(PG_FUNCTION_ARGS) (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use raw page functions"))); - fsmpage = (FSMPage) PageGetContents(VARDATA(raw_page)); + fsmpage = (FSMPage) PageGetContents(get_page_from_raw(raw_page)); initStringInfo(&sinfo); diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index a05611c0b3a..61c83a93d09 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -163,7 +163,7 @@ heap_page_items(PG_FUNCTION_ARGS) inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; - inter_call_data->page = VARDATA(raw_page); + inter_call_data->page = get_page_from_raw(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; @@ -211,6 +211,7 @@ heap_page_items(PG_FUNCTION_ARGS) lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { + HeapTupleData tup; HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; @@ -218,9 +219,11 @@ heap_page_items(PG_FUNCTION_ARGS) /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); + tup.t_data = tuphdr; + HeapTupleCopyBaseFromPage(&tup, page); - values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); - values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); + values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup)); + values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); diff --git a/contrib/pageinspect/pageinspect--1.10--1.11.sql b/contrib/pageinspect/pageinspect--1.10--1.11.sql new file mode 100644 index 00000000000..236f18aa2f8 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.10--1.11.sql @@ -0,0 +1,145 @@ +/* contrib/pageinspect/pageinspect--1.10--1.11.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.11'" to load this file. \quit + +-- +-- gist_page_opaque_info() +-- +DROP FUNCTION gist_page_opaque_info(bytea); +CREATE FUNCTION gist_page_opaque_info(IN page bytea, + OUT lsn pg_lsn, + OUT nsn pg_lsn, + OUT rightlink bigint, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'gist_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + + +-- +-- gist_page_items_bytea() +-- +DROP FUNCTION gist_page_items_bytea(bytea); +CREATE FUNCTION gist_page_items_bytea(IN page bytea, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT key_data bytea) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items_bytea' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- gist_page_items() +-- +DROP FUNCTION gist_page_items(bytea, regclass); +CREATE FUNCTION gist_page_items(IN page bytea, + IN index_oid regclass, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT keys text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_raw_page() +-- +DROP FUNCTION get_raw_page(text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, int4); +CREATE FUNCTION get_raw_page(text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +DROP FUNCTION get_raw_page(text, text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, text, int4); +CREATE FUNCTION get_raw_page(text, text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_checksum() +-- +DROP FUNCTION page_checksum(IN page bytea, IN blkno int8); +DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4); +CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8) +RETURNS smallint +AS 'MODULE_PATHNAME', 'page_checksum_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_metap() +-- +DROP FUNCTION bt_metap(text); +CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int8, + OUT level int8, + OUT fastroot int8, + OUT fastlevel int8, + OUT last_cleanup_num_delpages int8, + OUT last_cleanup_num_tuples float8, + OUT allequalimage boolean) +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_stats() +-- +DROP FUNCTION bt_page_stats(text, int8); +DROP FUNCTION IF EXISTS bt_page_stats(text, int4); +CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo_level int8, + OUT btpo_flags int4) +AS 'MODULE_PATHNAME', 'bt_page_stats_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_items() +-- +DROP FUNCTION bt_page_items(text, int8); +DROP FUNCTION IF EXISTS bt_page_items(text, int4); +CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text, + OUT dead boolean, + OUT htid tid, + OUT tids tid[]) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_page_items_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_page_items() +-- +DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass); +CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, + OUT itemoffset int, + OUT blknum int8, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'brin_page_items' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql index 1e40c3c97e2..fdbd2995a22 100644 --- a/contrib/pageinspect/pageinspect--1.5.sql +++ b/contrib/pageinspect/pageinspect--1.5.sql @@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea, OUT special smallint, OUT pagesize smallint, OUT version smallint, + OUT xid_base xid, + OUT multi_base xid, OUT prune_xid xid) AS 'MODULE_PATHNAME', 'page_header' LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index 7cdf37913da..f277413dd8c 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.10' +default_version = '1.11' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index 4bfa346c24a..e9dbc4174a0 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/relation.h" +#include "commands/sequence.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -252,8 +253,8 @@ page_header(PG_FUNCTION_ARGS) Datum result; HeapTuple tuple; - Datum values[9]; - bool nulls[9]; + Datum values[11]; + bool nulls[11]; PageHeader page; XLogRecPtr lsn; @@ -274,7 +275,7 @@ page_header(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); - page = (PageHeader) VARDATA(raw_page); + page = (PageHeader) get_page_from_raw(raw_page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) @@ -324,12 +325,29 @@ page_header(PG_FUNCTION_ARGS) } values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); - values[8] = TransactionIdGetDatum(page->pd_prune_xid); /* Build and return the tuple. */ memset(nulls, 0, sizeof(nulls)); + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) + { + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + + values[8] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + values[9] = TransactionIdGetDatum(pageSpecial->pd_multi_base); + values[10] = TransactionIdGetDatum(HeapPageGetPruneXid(page)); + nulls[8] = false; + nulls[9] = false; + nulls[10] = false; + } + else + { + nulls[8] = true; + nulls[9] = true; + nulls[10] = true; + } + tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c index f06385e8d3e..82e0f22ad50 100644 --- a/contrib/pg_surgery/heap_surgery.c +++ b/contrib/pg_surgery/heap_surgery.c @@ -270,11 +270,17 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) else { HeapTupleHeader htup; + HeapTupleData tuple; Assert(heap_force_opt == HEAP_FORCE_FREEZE); htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = htup; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); + /* * Reset all visibility-related fields of the tuple. This * logic should mimic heap_execute_freeze_tuple(), but we @@ -282,8 +288,8 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) * potentially-garbled data is left behind. */ ItemPointerSet(&htup->t_ctid, blkno, curoff); - HeapTupleHeaderSetXmin(htup, FrozenTransactionId); - HeapTupleHeaderSetXmax(htup, InvalidTransactionId); + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleSetXmax(&tuple, InvalidTransactionId); if (htup->t_infomask & HEAP_MOVED) { if (htup->t_infomask & HEAP_MOVED_OFF) diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out index 9de54db2a29..32bfbaa84e0 100644 --- a/contrib/pg_visibility/expected/pg_visibility.out +++ b/contrib/pg_visibility/expected/pg_visibility.out @@ -267,6 +267,22 @@ select * from pg_check_frozen('copyfreeze'); -------- (0 rows) +create table vacuum_test as select 42 i; +vacuum (disable_page_skipping) vacuum_test; +-- pg_check_visible() can report false positive due to autovacuum activity. +-- To workaround this issue, repeat the call. See PGPRO-4776. +do $$ +declare + non_visible_count bigint; + i integer; +begin + for i in 1 .. 10 loop + if i > 1 then perform pg_sleep(1); end if; + select count(*) from pg_check_visible('vacuum_test') into non_visible_count; + if non_visible_count = 0 then exit; end if; + end loop; + if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if; +end $$; -- cleanup drop table test_partitioned; drop view test_view; @@ -277,3 +293,4 @@ drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; drop table copyfreeze; +drop table vacuum_test; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index a206c0abd8d..cd695167d53 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -653,6 +653,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Initialize a HeapTupleData structure for checks below. */ ItemPointerSet(&(tuple.t_self), blkno, offnum); + HeapTupleCopyBaseFromPage(&tuple, page); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = relid; @@ -762,7 +763,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) * be set here. So just check the xmin. */ - xmin = HeapTupleHeaderGetXmin(tup->t_data); + xmin = HeapTupleGetXmin(tup); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql index ff3538f9964..42c4fad36ad 100644 --- a/contrib/pg_visibility/sql/pg_visibility.sql +++ b/contrib/pg_visibility/sql/pg_visibility.sql @@ -170,6 +170,23 @@ commit; select * from pg_visibility_map('copyfreeze'); select * from pg_check_frozen('copyfreeze'); +create table vacuum_test as select 42 i; +vacuum (disable_page_skipping) vacuum_test; +-- pg_check_visible() can report false positive due to autovacuum activity. +-- To workaround this issue, repeat the call. See PGPRO-4776. +do $$ +declare + non_visible_count bigint; + i integer; +begin + for i in 1 .. 10 loop + if i > 1 then perform pg_sleep(1); end if; + select count(*) from pg_check_visible('vacuum_test') into non_visible_count; + if non_visible_count = 0 then exit; end if; + end loop; + if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if; +end $$; + -- cleanup drop table test_partitioned; drop view test_view; @@ -180,3 +197,4 @@ drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; drop table copyfreeze; +drop table vacuum_test; diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index d8946dc5107..e26451c9d5a 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -156,7 +156,7 @@ pgrowlocks(PG_FUNCTION_ARGS) htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), hscan->rs_cbuf); - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; /* @@ -168,7 +168,7 @@ pgrowlocks(PG_FUNCTION_ARGS) PointerGetDatum(&tuple->t_self)); values[Atnum_xmax] = palloc(NCHARS * sizeof(char)); - snprintf(values[Atnum_xmax], NCHARS, "%u", xmax); + snprintf(values[Atnum_xmax], NCHARS, XID_FMT, xmax); if (infomask & HEAP_XMAX_IS_MULTI) { MultiXactMember *members; @@ -209,7 +209,7 @@ pgrowlocks(PG_FUNCTION_ARGS) strcat(values[Atnum_modes], ","); strcat(values[Atnum_pids], ","); } - snprintf(buf, NCHARS, "%u", members[j].xid); + snprintf(buf, NCHARS, XID_FMT, members[j].xid); strcat(values[Atnum_xids], buf); switch (members[j].status) { @@ -250,7 +250,7 @@ pgrowlocks(PG_FUNCTION_ARGS) values[Atnum_ismulti] = pstrdup("false"); values[Atnum_xids] = palloc(NCHARS * sizeof(char)); - snprintf(values[Atnum_xids], NCHARS, "{%u}", xmax); + snprintf(values[Atnum_xids], NCHARS, "{" XID_FMT "}", xmax); values[Atnum_modes] = palloc(NCHARS); if (infomask & HEAP_XMAX_LOCK_ONLY) diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 3b836f370e2..029226bef2a 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -153,6 +153,7 @@ statapprox_heap(Relation rel, output_type *stat) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); /* * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 6c4b053dd07..7ed7f1381a2 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -605,7 +605,7 @@ pgstathashindex(PG_FUNCTION_ARGS) metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; - stats.space_per_page = metap->hashm_bsize; + stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData)); _hash_relbuf(rel, metabuf); /* Get the current relation length */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 7720ab9c581..a2662d541bf 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -4447,16 +4447,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------- - Update on public.ft2 - Output: c1, c2, c3, c4, c5, c6, c7, c8 - -> Foreign Update on public.ft2 - Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 -(4 rows) +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 + Sort Key: t.c1 + CTE t + -> Update on public.ft2 + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8 + -> Foreign Update on public.ft2 + Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 + -> CTE Scan on t + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 +(10 rows) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+--------------------+------------------------------+--------------------------+----+------------+----- 7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo @@ -4576,16 +4584,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down - QUERY PLAN --------------------------------------------------------------------------------------------- - Delete on public.ft2 - Output: c1, c4 - -> Foreign Delete on public.ft2 - Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 -(4 rows) + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c4 + Sort Key: t.c1 + CTE t + -> Delete on public.ft2 + Output: ft2.c1, ft2.c4 + -> Foreign Delete on public.ft2 + Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 + -> CTE Scan on t + Output: t.c1, t.c4 +(10 rows) -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; c1 | c4 ------+------------------------------ 5 | Tue Jan 06 00:00:00 1970 PST @@ -5846,7 +5862,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; 1218 | 818 | ggg_trig_update | | | (--; | ft2 | (1 row) -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+------------------------+------------------------------+--------------------------+----+------------+----- 8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index fa9a099f134..379f65cc430 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4791,8 +4791,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, * Note: no need to care about tableoid here because it will be * initialized in ExecProcessReturning(). */ - HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId); + HeapTupleSetXmin(resultTup, InvalidTransactionId); + HeapTupleSetXmax(resultTup, InvalidTransactionId); HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId); } @@ -7251,6 +7251,7 @@ make_tuple_from_result_row(PGresult *res, */ if (ctid) tuple->t_self = tuple->t_data->t_ctid = *ctid; + HeapTupleSetZeroBase(tuple); /* * Stomp on the xmin, xmax, and cmin fields from the tuple created by @@ -7260,8 +7261,8 @@ make_tuple_from_result_row(PGresult *res, * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(tuple, InvalidTransactionId); + HeapTupleSetXmin(tuple, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* Clean up */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index beeac8af1ed..1e7d9a91110 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1243,16 +1243,20 @@ EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; @@ -1359,7 +1363,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *; INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; -- Test errors thrown on remote side during update ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0); diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c index e5cd84e85e4..d06acd21c3d 100644 --- a/contrib/test_decoding/test_decoding.c +++ b/contrib/test_decoding/test_decoding.c @@ -310,7 +310,7 @@ pg_output_begin(LogicalDecodingContext *ctx, TestDecodingData *data, ReorderBuff { OutputPluginPrepareWrite(ctx, last_write); if (data->include_xids) - appendStringInfo(ctx->out, "BEGIN %u", txn->xid); + appendStringInfo(ctx->out, "BEGIN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "BEGIN"); OutputPluginWrite(ctx, last_write); @@ -333,7 +333,7 @@ pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "COMMIT %u", txn->xid); + appendStringInfo(ctx->out, "COMMIT " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "COMMIT"); @@ -378,7 +378,7 @@ pg_decode_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, quote_literal_cstr(txn->gid)); if (data->include_xids) - appendStringInfo(ctx->out, ", txid %u", txn->xid); + appendStringInfo(ctx->out, ", txid " XID_FMT, txn->xid); if (data->include_timestamp) appendStringInfo(ctx->out, " (at %s)", @@ -400,7 +400,7 @@ pg_decode_commit_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn quote_literal_cstr(txn->gid)); if (data->include_xids) - appendStringInfo(ctx->out, ", txid %u", txn->xid); + appendStringInfo(ctx->out, ", txid " XID_FMT, txn->xid); if (data->include_timestamp) appendStringInfo(ctx->out, " (at %s)", @@ -424,7 +424,7 @@ pg_decode_rollback_prepared_txn(LogicalDecodingContext *ctx, quote_literal_cstr(txn->gid)); if (data->include_xids) - appendStringInfo(ctx->out, ", txid %u", txn->xid); + appendStringInfo(ctx->out, ", txid " XID_FMT, txn->xid); if (data->include_timestamp) appendStringInfo(ctx->out, " (at %s)", @@ -773,7 +773,7 @@ pg_output_stream_start(LogicalDecodingContext *ctx, TestDecodingData *data, Reor { OutputPluginPrepareWrite(ctx, last_write); if (data->include_xids) - appendStringInfo(ctx->out, "opening a streamed block for transaction TXN %u", txn->xid); + appendStringInfo(ctx->out, "opening a streamed block for transaction TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "opening a streamed block for transaction"); OutputPluginWrite(ctx, last_write); @@ -791,7 +791,7 @@ pg_decode_stream_stop(LogicalDecodingContext *ctx, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "closing a streamed block for transaction TXN %u", txn->xid); + appendStringInfo(ctx->out, "closing a streamed block for transaction TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "closing a streamed block for transaction"); OutputPluginWrite(ctx, true); @@ -825,7 +825,7 @@ pg_decode_stream_abort(LogicalDecodingContext *ctx, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "aborting streamed (sub)transaction TXN %u", txn->xid); + appendStringInfo(ctx->out, "aborting streamed (sub)transaction TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "aborting streamed (sub)transaction"); OutputPluginWrite(ctx, true); @@ -845,7 +845,7 @@ pg_decode_stream_prepare(LogicalDecodingContext *ctx, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "preparing streamed transaction TXN %s, txid %u", + appendStringInfo(ctx->out, "preparing streamed transaction TXN %s, txid " XID_FMT, quote_literal_cstr(txn->gid), txn->xid); else appendStringInfo(ctx->out, "preparing streamed transaction %s", @@ -876,7 +876,7 @@ pg_decode_stream_commit(LogicalDecodingContext *ctx, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "committing streamed transaction TXN %u", txn->xid); + appendStringInfo(ctx->out, "committing streamed transaction TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "committing streamed transaction"); @@ -910,7 +910,7 @@ pg_decode_stream_change(LogicalDecodingContext *ctx, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "streaming change for TXN %u", txn->xid); + appendStringInfo(ctx->out, "streaming change for TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "streaming change for transaction"); OutputPluginWrite(ctx, true); @@ -963,7 +963,7 @@ pg_decode_stream_truncate(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, OutputPluginPrepareWrite(ctx, true); if (data->include_xids) - appendStringInfo(ctx->out, "streaming truncate for TXN %u", txn->xid); + appendStringInfo(ctx->out, "streaming truncate for TXN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "streaming truncate for transaction"); OutputPluginWrite(ctx, true); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 0b56b0fa5a9..dac5fb6d8bc 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -640,10 +640,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) result = PointerGetDatum(&(tup->t_self)); break; case MinTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup)); break; case MaxTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: @@ -688,6 +688,7 @@ heap_copytuple(HeapTuple tuple) newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); return newTuple; @@ -714,6 +715,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; + HeapTupleCopyBase(dest, src); dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len); } @@ -1077,6 +1079,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(tuple); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); @@ -1161,6 +1164,7 @@ heap_modify_tuple(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); return newTuple; } @@ -1224,6 +1228,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); return newTuple; } @@ -1464,6 +1469,7 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup) result->t_len = len; ItemPointerSetInvalid(&(result->t_self)); result->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(result); result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2)); diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 3420e47599c..46c44d26f3a 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -251,58 +251,6 @@ static relopt_int intRelOpts[] = }, -1, 1, 10000 }, - { - { - "autovacuum_freeze_min_age", - "Minimum age at which VACUUM should freeze a table row, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_multixact_freeze_min_age", - "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_freeze_max_age", - "Age at which to autovacuum a table to prevent transaction ID wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 100000, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_max_age", - "Multixact age at which to autovacuum a table to prevent multixact wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 10000, 2000000000 - }, - { - { - "autovacuum_freeze_table_age", - "Age at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_table_age", - "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, { { "log_autovacuum_min_duration", @@ -379,6 +327,60 @@ static relopt_int intRelOpts[] = static relopt_int64 int64RelOpts[] = { + { + { + "autovacuum_freeze_min_age", + "Minimum age at which VACUUM should freeze a table row, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_multixact_freeze_min_age", + "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_freeze_max_age", + "Age at which to autovacuum a table to prevent transaction ID wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(100000), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_max_age", + "Multixact age at which to autovacuum a table to prevent multixact wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(10000), INT64CONST(2000000000) + }, + { + { + "autovacuum_freeze_table_age", + "Age at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_table_age", + "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, /* list terminator */ {{NULL}} }; @@ -1912,17 +1914,17 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)}, {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)}, - {"autovacuum_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)}, - {"autovacuum_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)}, - {"autovacuum_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)}, - {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)}, - {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, - {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 1e343df0afc..51d4570ef0b 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -317,11 +317,10 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) * INTERNAL and allowing any such function seems too scary. */ if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) && - (argtype == DATEOID || - argtype == XIDOID || argtype == CIDOID)) + (argtype == DATEOID || argtype == CIDOID)) /* okay, allowed use of hashint4() */ ; else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) && - (argtype == XID8OID)) + (argtype == XID8OID || argtype == XIDOID)) /* okay, allowed use of hashint8() */ ; else if ((funcid == F_TIMESTAMP_HASH || funcid == F_TIMESTAMP_HASH_EXTENDED) && diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0b4a46b31ba..a6f4366891f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -52,6 +52,9 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/namespace.h" +#include "commands/vacuum.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" @@ -73,7 +76,7 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, - TransactionId xid, CommandId cid, int options); + CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, @@ -214,6 +217,20 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] = #define TUPLOCK_from_mxstatus(status) \ (MultiXactStatusLock[(status)]) +ShortTransactionId +HeapPageSetPruneXidInternal(Page page, TransactionId xid) +{ + if (TransactionIdIsNormal(xid) && + /* abuse unsigned underflow */ + xid - HeapPageGetSpecial(page)->pd_xid_base - FirstNormalTransactionId > + MaxShortTransactionId) + elog(WARNING, "pd_prune_xid is out of range"); + ((PageHeader) (page))->pd_prune_xid = + NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, xid); + + return ((PageHeader) (page))->pd_prune_xid; +} + /* ---------------------------------------------------------------- * heap support routines * ---------------------------------------------------------------- @@ -458,6 +475,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); loctup.t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&(loctup.t_self), page, lineoff); if (all_visible) @@ -674,6 +692,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); return; } @@ -700,6 +719,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* @@ -999,6 +1019,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); /* check that rs_cindex is in sync */ Assert(scan->rs_cindex < scan->rs_ntuples); @@ -1021,6 +1042,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* @@ -1653,6 +1675,7 @@ heap_fetch(Relation relation, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(tuple, page); /* * check tuple visibility, then release lock @@ -1661,7 +1684,7 @@ heap_fetch(Relation relation, if (valid) PredicateLockTID(relation, &(tuple->t_self), snapshot, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleGetXmin(tuple)); HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot); @@ -1733,6 +1756,9 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Assert(TransactionIdIsValid(RecentXmin)); Assert(BufferGetBlockNumber(buffer) == blkno); + heapTuple->t_self = *tid; + HeapTupleCopyBaseFromPage(heapTuple, dp); + /* Scan through possible multiple members of HOT-chain */ for (;;) { @@ -1768,6 +1794,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple->t_len = ItemIdGetLength(lp); heapTuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(heapTuple, dp); ItemPointerSet(&heapTuple->t_self, blkno, offnum); /* @@ -1782,7 +1809,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) + HeapTupleGetXmin(heapTuple))) break; /* @@ -1803,7 +1830,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, { ItemPointerSetOffsetNumber(tid, offnum); PredicateLockTID(relation, &heapTuple->t_self, snapshot, - HeapTupleHeaderGetXmin(heapTuple->t_data)); + HeapTupleGetXmin(heapTuple)); if (all_dead) *all_dead = false; return true; @@ -1838,7 +1865,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, blkno); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; - prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + prev_xmax = HeapTupleGetUpdateXidAny(heapTuple); } else break; /* end of chain */ @@ -1925,13 +1952,14 @@ heap_get_latest_tid(TableScanDesc sscan, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tp, page); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp))) { UnlockReleaseBuffer(buffer); break; @@ -1950,7 +1978,7 @@ heap_get_latest_tid(TableScanDesc sscan, * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(tp.t_data) || + HeapTupleIsOnlyLocked(&tp) || HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { @@ -1959,7 +1987,7 @@ heap_get_latest_tid(TableScanDesc sscan, } ctid = tp.t_data->t_ctid; - priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&tp); UnlockReleaseBuffer(buffer); } /* end of loop */ } @@ -1984,7 +2012,7 @@ heap_get_latest_tid(TableScanDesc sscan, static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { - Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid)); + Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) @@ -2076,7 +2104,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ - heaptup = heap_prepare_insert(relation, tup, xid, cid, options); + heaptup = heap_prepare_insert(relation, tup, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, @@ -2103,6 +2131,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber); + heap_page_prepare_for_xid(relation, buffer, xid, false); + HeapTupleCopyBaseFromPage(heaptup, BufferGetPage(buffer)); + HeapTupleSetXmin(heaptup, xid); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2184,6 +2216,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } XLogBeginInsert(); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) &xlrec, SizeOfHeapInsert); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; @@ -2238,6 +2274,544 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } } +/* + * Find minimum and maximum short transaction ids which occurs in the page. + */ +static bool +heap_page_xid_min_max(Page page, bool multi, + ShortTransactionId *min, + ShortTransactionId *max) +{ + TransactionId xid_base = HeapPageGetSpecial(page)->pd_xid_base; + bool found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + if (!multi) + { + if (!HeapTupleHeaderXminFrozen(htup) && + TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin)) + { + Assert(htup->t_choice.t_heap.t_xmin >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin <= MaxShortTransactionId); + + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmin; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmin); + *max = Max(*max, htup->t_choice.t_heap.t_xmin); + } + } + + if (htup->t_infomask & HEAP_XMAX_IS_MULTI) + { + if (!(htup->t_infomask & (HEAP_XMAX_INVALID | HEAP_XMAX_LOCK_ONLY))) + { + TransactionId update_xid = + MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup), + htup->t_infomask); + ShortTransactionId xid = NormalTransactionIdToShort(xid_base, update_xid); + + Assert(xid >= FirstNormalTransactionId); + Assert(xid <= MaxShortTransactionId); + + if (!found) + { + *min = *max = xid; + found = true; + } + else + { + *min = Min(*min, xid); + *max = Max(*max, xid); + } + } + } + else if (!(htup->t_infomask & HEAP_XMAX_INVALID) && + TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + { + Assert(htup->t_choice.t_heap.t_xmax >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax <= MaxShortTransactionId); + + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmax; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmax); + *max = Max(*max, htup->t_choice.t_heap.t_xmax); + } + } + } + else + { + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + Assert(htup->t_choice.t_heap.t_xmax >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax <= MaxShortTransactionId); + + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmax; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmax); + *max = Max(*max, htup->t_choice.t_heap.t_xmax); + } + } + } + } + return found; +} + +/* + * Shift xid base in the page. WAL-logged if buffer is specified. + */ +static void +heap_page_shift_base(Relation relation, Buffer buffer, Page page, bool multi, int64 delta) +{ + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + OffsetNumber offnum, + maxoff; + + START_CRIT_SECTION(); + + /* Iterate over page items */ + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* Apply xid shift to heap tuple */ + if (!multi) + { + if (!HeapTupleHeaderXminFrozen(htup) && + TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin)) + { + Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmin -= delta; + } + + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + !(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + } + else + { + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + } + } + + /* Apply xid shift to base as well */ + if (!multi) + pageSpecial->pd_xid_base += delta; + else + pageSpecial->pd_multi_base += delta; + + if (BufferIsValid(buffer)) + MarkBufferDirty(buffer); + + /* Write WAL record if needed */ + if (relation && RelationNeedsWAL(relation)) + { + XLogRecPtr recptr; + xl_heap_base_shift xlrec; + + xlrec.multi = multi; + xlrec.delta = delta; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift); + + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); +} + +/* + * Freeze xids in the single heap page. Useful when we can't fit new xid even + * with base shift. + */ +static void +freeze_single_heap_page(Relation relation, Buffer buffer) +{ + Page page = BufferGetPage(buffer); + OffsetNumber offnum, + maxoff; + HeapTupleData tuple; + int nfrozen = 0; + xl_heap_freeze_tuple *frozen; + TransactionId OldestXmin, + FreezeXid; + MultiXactId MultiXactCutoff; + GlobalVisState *vistest; + + vacuum_set_xid_limits(relation, 0, 0, 0, 0, + &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, + NULL); + + vistest = GlobalVisTestFor(relation); + + heap_page_prune(relation, buffer, vistest, + InvalidTransactionId, 0, + false, &offnum, false); + + /* + * Now scan the page to collect vacuumable items and check for tuples + * requiring freezing. + */ + maxoff = PageGetMaxOffsetNumber(page); + frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); + + /* + * Note: If you change anything in the loop below, also look at + * heap_page_is_all_visible to see if that needs to be changed. + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + bool tuple_totally_frozen; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + Assert(ItemIdIsNormal(itemid)); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tuple, page); + + /* + * Each non-removable tuple must be checked to see if it needs + * freezing. Note we already have exclusive buffer lock. + */ + if (heap_prepare_freeze_tuple(&tuple, + relation->rd_rel->relfrozenxid, + relation->rd_rel->relminmxid, + FreezeXid, MultiXactCutoff, + &frozen[nfrozen], &tuple_totally_frozen)) + frozen[nfrozen++].offset = offnum; + } /* scan along page */ + + /* + * If we froze any tuples, mark the buffer dirty, and write a WAL + * record recording the changes. We must log the changes to be + * crash-safe against future truncation of CLOG. + */ + if (nfrozen > 0) + { + int i; + + START_CRIT_SECTION(); + + MarkBufferDirty(buffer); + + /* execute collected freezes */ + for (i = 0; i < nfrozen; i++) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, frozen[i].offset); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + heap_execute_freeze_tuple_page(page, htup, &frozen[i]); + } + + /* Now WAL-log freezing if necessary */ + if (RelationNeedsWAL(relation)) + { + XLogRecPtr recptr; + + recptr = log_heap_freeze(relation, buffer, FreezeXid, + frozen, nfrozen); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + } + + return; +} + +/* + * Ensure that given xid fits base of given page. + */ +bool +heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi) +{ + Page page = BufferGetPage(buffer); + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + TransactionId base; + bool found; + ShortTransactionId min, + max; + int i; + + /* "Double xmax" page format doesn't require any preparation */ + if (HeapPageIsDoubleXmax(page)) + return false; + + if (!TransactionIdIsNormal(xid)) + return false; + + for (i = 0; i < 2; i++) + { + base = multi ? pageSpecial->pd_multi_base : pageSpecial->pd_xid_base; + + /* Can we already store this xid? */ + if (xid >= base + FirstNormalTransactionId && xid <= base + MaxShortTransactionId) + return false; + + /* Find minimum and maximum xids in the page */ + found = heap_page_xid_min_max(page, multi, &min, &max); + + /* No items on the page? */ + if (!found) + { + int64 delta; + + delta = (int64) (xid - FirstNormalTransactionId) - (int64) base; + + if (xid < base + delta + FirstNormalTransactionId || + xid > base + delta + MaxShortTransactionId) + { + elog(FATAL, "Fatal xid base calculation error: " + "xid = " XID_FMT ", " + "base = " XID_FMT ", " + "min = %u, " + "max = %u, " + "delta = " INT64_FORMAT, + xid, base, min, max, delta + ); + } + + Assert(xid >= base + delta + FirstNormalTransactionId); + Assert(xid <= base + delta + MaxShortTransactionId); + + heap_page_shift_base(relation, buffer, page, multi, delta); + + base = multi ? pageSpecial->pd_multi_base : pageSpecial->pd_xid_base; + Assert(xid >= base + FirstNormalTransactionId); + Assert(xid <= base + MaxShortTransactionId); + return false; + } + + /* Can we just shift base on the page */ + if (xid < base + FirstNormalTransactionId) + { + int64 freeDelta = MaxShortTransactionId - max, + requiredDelta = (base + FirstNormalTransactionId) - xid; + + /* Shouldn't consider setting base less than 0 */ + freeDelta = Min(freeDelta, base); + + if (requiredDelta <= freeDelta) + { + int64 delta = - (freeDelta + requiredDelta) / 2; + + if (xid < base + delta + FirstNormalTransactionId || + xid > base + delta + MaxShortTransactionId) + { + elog(FATAL, "Fatal xid base calculation error: " + "xid = " XID_FMT ", " + "base = " XID_FMT ", " + "min = %u, " + "max = %u, " + "freeDelta = " INT64_FORMAT ", " + "requiredDelta = " INT64_FORMAT ", " + "delta = " INT64_FORMAT, + xid, base, min, max, freeDelta, requiredDelta, delta + ); + } + + Assert(xid >= base + delta + FirstNormalTransactionId); + Assert(xid <= base + delta + MaxShortTransactionId); + + heap_page_shift_base(relation, buffer, page, multi, delta); + + base = multi ? pageSpecial->pd_multi_base : pageSpecial->pd_xid_base; + Assert(xid >= base + FirstNormalTransactionId); + Assert(xid <= base + MaxShortTransactionId); + return true; + } + } + else + { + int64 freeDelta = min - FirstNormalTransactionId, + requiredDelta = xid - (base + MaxShortTransactionId); + + Assert(xid > base + MaxShortTransactionId); + + if (requiredDelta <= freeDelta) + { + int64 delta = (freeDelta + requiredDelta) / 2; + + if (xid < base + delta + FirstNormalTransactionId || + xid > base + delta + MaxShortTransactionId) + { + elog(FATAL, "Fatal xid base calculation error: " + "xid = " XID_FMT ", " + "base = " XID_FMT ", " + "min = %u, " + "max = %u, " + "freeDelta = " INT64_FORMAT ", " + "requiredDelta = " INT64_FORMAT ", " + "delta = " INT64_FORMAT, + xid, base, min, max, freeDelta, requiredDelta, delta + ); + } + + Assert(xid >= base + delta + FirstNormalTransactionId); + Assert(xid <= base + delta + MaxShortTransactionId); + + heap_page_shift_base(relation, buffer, page, multi, delta); + + base = multi ? pageSpecial->pd_multi_base : pageSpecial->pd_xid_base; + Assert(xid >= base + FirstNormalTransactionId); + Assert(xid <= base + MaxShortTransactionId); + return true; + } + } + + if (i == 1) + { + break; + } + + /* Have to try freeing the page... */ + freeze_single_heap_page(relation, buffer); + } + + elog(ERROR, "Can't fit xid into page."); + return false; +} + +/* + * Ensure that given xid fits base of given page. + */ +bool +rewrite_page_prepare_for_xid(Page page, TransactionId xid, bool multi) +{ + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + TransactionId base; + bool found; + ShortTransactionId min, + max; + + if (!TransactionIdIsNormal(xid)) + return false; + + if (!multi) + base = pageSpecial->pd_xid_base; + else + base = pageSpecial->pd_multi_base; + + /* Can we already store this xid? */ + if (xid >= base + FirstNormalTransactionId && xid <= base + MaxShortTransactionId) + return false; + + /* Find minimum and maximum xids in the page */ + found = heap_page_xid_min_max(page, multi, &min, &max); + + /* No items on the page? */ + if (!found) + { + if (!multi) + pageSpecial->pd_xid_base = xid - FirstNormalTransactionId; + else + pageSpecial->pd_multi_base = xid - FirstNormalTransactionId; + return false; + } + + /* Can we just shift base on the page */ + if (xid < base + FirstNormalTransactionId) + { + int64 freeDelta = MaxShortTransactionId - max, + requiredDelta = (base + FirstNormalTransactionId) - xid; + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(NULL, InvalidBuffer, + page, multi, - (freeDelta + requiredDelta) / 2); + return true; + } + } + else + { + int64 freeDelta = min - FirstNormalTransactionId, + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(NULL, InvalidBuffer, + page, multi, (freeDelta + requiredDelta) / 2); + return true; + } + } + + elog(ERROR, "Can't fit xid into page."); + return false; +} + + /* * Subroutine for heap_insert(). Prepares a tuple for insertion. This sets the * tuple header fields and toasts the tuple if necessary. Returns a toasted @@ -2245,7 +2819,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * that in any case, the header fields are also set in the original tuple. */ static HeapTuple -heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, +heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options) { /* @@ -2262,12 +2836,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - HeapTupleHeaderSetXmin(tup->t_data, xid); + HeapTupleSetXmin(tup, InvalidTransactionId); if (options & HEAP_INSERT_FROZEN) HeapTupleHeaderSetXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); - HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ + HeapTupleSetXmax(tup, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* @@ -2330,8 +2904,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL); slots[i]->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slots[i]->tts_tableOid; - heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid, - options); + heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options); } /* @@ -2387,6 +2960,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) all_frozen_set = true; + heap_page_prepare_for_xid(relation, buffer, xid, false); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2394,6 +2969,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ + HeapTupleCopyBaseFromPage(heaptuples[ndone], BufferGetPage(buffer)); + HeapTupleSetXmin(heaptuples[ndone], xid); RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); /* @@ -2410,6 +2987,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; + HeapTupleCopyBaseFromPage(heaptup, BufferGetPage(buffer)); + HeapTupleSetXmin(heaptup, xid); RelationPutHeapTuple(relation, buffer, heaptup, false); /* @@ -2545,6 +3124,10 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); @@ -2765,6 +3348,7 @@ heap_delete(Relation relation, ItemPointer tid, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); l1: result = HeapTupleSatisfiesUpdate(&tp, cid, buffer); @@ -2782,7 +3366,7 @@ l1: uint16 infomask; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tp.t_data); + xwait = HeapTupleGetRawXmax(&tp); infomask = tp.t_data->t_infomask; /* @@ -2821,13 +3405,16 @@ l1: NULL); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyBaseFromPage(&tp, page); + /* * If xwait had just locked the tuple then some other xact * could update this tuple before we get to this point. Check * for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; } @@ -2854,13 +3441,16 @@ l1: XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyBaseFromPage(&tp, page); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; @@ -2874,7 +3464,7 @@ l1: */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tp.t_data)) + HeapTupleIsOnlyLocked(&tp)) result = TM_Ok; else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) result = TM_Updated; @@ -2899,9 +3489,9 @@ l1: Assert(result != TM_Updated || !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)); tmfd->ctid = tp.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&tp); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); + tmfd->cmax = HeapTupleGetCmax(&tp); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -2924,7 +3514,7 @@ l1: CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer)); /* replace cid with a combo CID if necessary */ - HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo); + HeapTupleHeaderAdjustCmax(&tp, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so @@ -2942,11 +3532,15 @@ l1: */ MultiXactIdSetOldestMember(); - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, buffer, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&tp, page); + START_CRIT_SECTION(); /* @@ -2972,7 +3566,7 @@ l1: tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); - HeapTupleHeaderSetXmax(tp.t_data, new_xmax); + HeapTupleSetXmax(&tp, new_xmax); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; @@ -3166,7 +3760,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; - Page page; + Page page, + newpage; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, @@ -3258,9 +3853,11 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; + HeapTupleCopyBaseFromPage(&oldtup, page); /* the new tuple is ready, except for this: */ newtup->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(newtup, page); /* Determine columns modified by the update. */ modified_attrs = HeapDetermineModifiedColumns(relation, interesting_attrs, @@ -3343,7 +3940,7 @@ l2: */ /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; /* @@ -3394,6 +3991,7 @@ l2: checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); /* * If xwait had just locked the tuple then some other xact @@ -3402,7 +4000,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup), xwait)) goto l2; } @@ -3428,7 +4026,7 @@ l2: * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) - update_xact = HeapTupleGetUpdateXid(oldtup.t_data); + update_xact = HeapTupleGetUpdateXid(&oldtup); else update_xact = InvalidTransactionId; @@ -3476,6 +4074,8 @@ l2: checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -3483,7 +4083,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, - HeapTupleHeaderGetRawXmax(oldtup.t_data))) + HeapTupleGetRawXmax(&oldtup))) goto l2; /* Otherwise check if it committed or aborted */ @@ -3520,9 +4120,9 @@ l2: Assert(result != TM_Updated || !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid)); tmfd->ctid = oldtup.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); + tmfd->cmax = HeapTupleGetCmax(&oldtup); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -3552,6 +4152,7 @@ l2: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); goto l2; } @@ -3561,7 +4162,7 @@ l2: * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, @@ -3580,7 +4181,7 @@ l2: (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else - xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xmax_new_tuple = HeapTupleGetRawXmax(&oldtup); if (!TransactionIdIsValid(xmax_new_tuple)) { @@ -3613,17 +4214,15 @@ l2: */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); - HeapTupleHeaderSetXmin(newtup->t_data, xid); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; - HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple); /* * Replace cid with a combo CID if necessary. Note that we already put * the plain cid into the new tuple. */ - HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); + HeapTupleHeaderAdjustCmax(&oldtup, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit @@ -3653,7 +4252,7 @@ l2: newtupsize = MAXALIGN(newtup->t_len); - if (need_toast || newtupsize > pagefree) + if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { TransactionId xmax_lock_old_tuple; uint16 infomask_lock_old_tuple, @@ -3678,7 +4277,7 @@ l2: * updating, because the potentially created multixact would otherwise * be wrong. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, @@ -3687,6 +4286,10 @@ l2: Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple)); + heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple, + (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&oldtup, page); + START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ @@ -3695,9 +4298,9 @@ l2: HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_lock_old_tuple); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ @@ -3780,7 +4383,11 @@ l2: */ for (;;) { - if (newtupsize > pagefree) + /* + * We can't fit new tuple to "double xmax" page, since it's impossible to + * set xmin there. + */ + if (newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { /* It doesn't fit, must use RelationGetBufferForTuple. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, @@ -3813,6 +4420,9 @@ l2: break; } } + + /* Copy possibly updated xid base to old tuple after relocking */ + HeapTupleCopyBaseFromPage(&oldtup, page); } else { @@ -3870,6 +4480,33 @@ l2: bms_overlap(modified_attrs, id_attrs), &old_key_copied); + newpage = BufferGetPage(newbuf); + + /* + * Prepare pages for the current xid, that witten to the new tuple's + * Xmax and old page's pd_prune_xid. + */ + heap_page_prepare_for_xid(relation, buffer, xid, false); + if (newbuf != buffer) + heap_page_prepare_for_xid(relation, newbuf, xid, false); + + /* Prepare pages for tuple's Xmax */ + heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple, + (infomask_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple, + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + + /* Copy possibly updated Xid bases to the both tuples. */ + HeapTupleCopyBaseFromPage(&oldtup, page); + HeapTupleCopyBaseFromPage(heaptup, newpage); + + /* + * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already + * shifted. + */ + HeapTupleSetXmin(heaptup, xid); + HeapTupleSetXmax(heaptup, xmax_new_tuple); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -3912,9 +4549,9 @@ l2: oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_old_tuple); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* record address of new tuple in t_ctid of old one */ @@ -3969,6 +4606,18 @@ l2: END_CRIT_SECTION(); + if (newtup != heaptup) + { + /* + * Set new tuple's Xmin/Xmax only after both xid base preparations. + * Old tuple's Xmin/Xmax were already shifted because old tuple is + * on the page. + */ + HeapTupleCopyBase(newtup, heaptup); + HeapTupleSetXmin(newtup, xid); + HeapTupleSetXmax(newtup, xmax_new_tuple); + } + if (newbuf != buffer) LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -4267,6 +4916,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(tuple, page); l3: result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); @@ -4293,7 +4943,7 @@ l3: ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); + xwait = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); @@ -4451,11 +5101,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * Make sure it's still an appropriate lock, else start over. @@ -4464,7 +5116,7 @@ l3: * now need to follow the update chain to lock the new * versions. */ - if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) && + if (!HeapTupleIsOnlyLocked(tuple) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; @@ -4491,6 +5143,7 @@ l3: !HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * Make sure it's still an appropriate lock, else start over. @@ -4519,8 +5172,10 @@ l3: * meantime, start over. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4531,10 +5186,11 @@ l3: else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; /* otherwise, we're good */ @@ -4559,8 +5215,10 @@ l3: { /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); @@ -4581,6 +5239,7 @@ l3: if (require_sleep && (result == TM_Updated || result == TM_Deleted)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } else if (require_sleep) @@ -4606,6 +5265,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } @@ -4632,6 +5292,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } break; @@ -4672,6 +5333,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } break; @@ -4698,11 +5360,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * xwait is done, but if xwait had just locked the tuple then some @@ -4710,7 +5374,7 @@ l3: * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4738,7 +5402,7 @@ l3: if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple->t_data)) + HeapTupleIsOnlyLocked(tuple)) result = TM_Ok; else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) result = TM_Updated; @@ -4755,9 +5419,9 @@ failed: Assert(result != TM_Updated || !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)); tmfd->ctid = tuple->t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(tuple); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); + tmfd->cmax = HeapTupleGetCmax(tuple); else tmfd->cmax = InvalidCommandId; goto out_locked; @@ -4777,10 +5441,11 @@ failed: LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto l3; } - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); old_infomask = tuple->t_data->t_infomask; /* @@ -4802,6 +5467,10 @@ failed: GetCurrentTransactionId(), mode, false, &xid, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, *buffer, xid, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(tuple, page); + START_CRIT_SECTION(); /* @@ -4820,7 +5489,7 @@ failed: tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); - HeapTupleHeaderSetXmax(tuple->t_data, xid); + HeapTupleSetXmax(tuple, xid); /* * Make sure there is no forward chain link in t_ctid. Note that in the @@ -5136,7 +5805,7 @@ l5: * TransactionIdIsInProgress() should have returned false. We * assume it's no longer locked in this case. */ - elog(WARNING, "LOCK_ONLY found for Xid in progress %u", xmax); + elog(WARNING, "LOCK_ONLY found for Xid in progress " XID_FMT, xmax); old_infomask |= HEAP_XMAX_INVALID; old_infomask &= ~HEAP_XMAX_LOCK_ONLY; goto l5; @@ -5413,12 +6082,18 @@ l4: LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); } + /* + * Copy xid base after buffer relocking, it could have changed + * since heap_fetch(). + */ + HeapTupleCopyBaseFromPage(&mytup, BufferGetPage(buf)); + /* * Check the tuple XMIN against prior XMAX, if any. If we reached the * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), + !TransactionIdEquals(HeapTupleGetXmin(&mytup), priorXmax)) { result = TM_Ok; @@ -5430,7 +6105,7 @@ l4: * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ - if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data))) + if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup))) { result = TM_Ok; goto out_locked; @@ -5438,7 +6113,7 @@ l4: old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; - xmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + xmax = HeapTupleGetRawXmax(&mytup); /* * If this tuple version has been updated or locked by some concurrent @@ -5451,7 +6126,7 @@ l4: TransactionId rawxmax; bool needwait; - rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + rawxmax = HeapTupleGetRawXmax(&mytup); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; @@ -5592,14 +6267,18 @@ l4: VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; + heap_page_prepare_for_xid(rel, buf, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&mytup, BufferGetPage(buf)); + START_CRIT_SECTION(); /* ... and set them */ - HeapTupleHeaderSetXmax(mytup.t_data, new_xmax); mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; + HeapTupleSetXmax(&mytup, new_xmax); MarkBufferDirty(buf); @@ -5633,14 +6312,14 @@ next: if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || - HeapTupleHeaderIsOnlyLocked(mytup.t_data)) + HeapTupleIsOnlyLocked(&mytup)) { result = TM_Ok; goto out_locked; } /* tail recursion */ - priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); } @@ -5847,12 +6526,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid) tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ - if (tp.t_data->t_choice.t_heap.t_xmin != xid) + if (HeapTupleGetRawXmin(&tp) != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); @@ -5881,6 +6561,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid) prune_xid = relation->rd_rel->relfrozenxid; else prune_xid = TransactionXmin; + Assert(TransactionIdIsValid(prune_xid)); + heap_page_prepare_for_xid(relation, buffer, prune_xid, false); PageSetPrunable(page, prune_xid); /* store transaction information of xact deleting the tuple */ @@ -5891,8 +6573,10 @@ heap_abort_speculative(Relation relation, ItemPointer tid) * Set the tuple header xmin to InvalidTransactionId. This makes the * tuple immediately invisible everyone. (In particular, to any * transactions waiting on the speculative token, woken up later.) + * Don't need to reload xid base from page because InvalidTransactionId + * doesn't require xid base to be valid. */ - HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); + HeapTupleSetXmin(&tp, InvalidTransactionId); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; @@ -6111,7 +6795,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, else if (MultiXactIdPrecedes(multi, relminmxid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found multixact %u from before relminmxid %u", + errmsg_internal("found multixact " XID_FMT " from before relminmxid " XID_FMT, multi, relminmxid))); else if (MultiXactIdPrecedes(multi, cutoff_multi)) { @@ -6125,7 +6809,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("multixact %u from before cutoff %u found to be still running", + errmsg_internal("multixact " XID_FMT " from before cutoff " XID_FMT " found to be still running", multi, cutoff_multi))); if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask)) @@ -6144,7 +6828,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdPrecedes(xid, relfrozenxid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found update xid %u from before relfrozenxid %u", + errmsg_internal("found update xid " XID_FMT " from before relfrozenxid " XID_FMT, xid, relfrozenxid))); /* @@ -6156,7 +6840,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdDidCommit(xid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("cannot freeze committed update xid %u", xid))); + errmsg_internal("cannot freeze committed update xid " XID_FMT, xid))); *flags |= FRM_INVALIDATE_XMAX; xid = InvalidTransactionId; /* not strictly necessary */ } @@ -6214,7 +6898,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * to keep. */ nnewmembers = 0; - newmembers = palloc(sizeof(MultiXactMember) * nmembers); + newmembers = palloc0(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; @@ -6232,7 +6916,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdPrecedes(xid, relfrozenxid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found update xid %u from before relfrozenxid %u", + errmsg_internal("found update xid " XID_FMT " from before relfrozenxid " XID_FMT, xid, relfrozenxid))); /* @@ -6282,7 +6966,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, TransactionIdPrecedes(update_xid, cutoff_xid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found update xid %u from before xid cutoff %u", + errmsg_internal("found update xid " XID_FMT " from before xid cutoff " XID_FMT, update_xid, cutoff_xid))); /* @@ -6380,7 +7064,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * recovery. We really need to remove old xids. */ bool -heap_prepare_freeze_tuple(HeapTupleHeader tuple, +heap_prepare_freeze_tuple(HeapTuple htup, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p) @@ -6390,11 +7074,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, bool xmin_frozen; bool freeze_xmax; TransactionId xid; + HeapTupleHeader tuple = htup->t_data; frz->frzflags = 0; frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; - frz->xmax = HeapTupleHeaderGetRawXmax(tuple); + frz->xmax = HeapTupleGetRawXmax(htup); /* * Process xmin. xmin_frozen has two slightly different meanings: in the @@ -6404,7 +7089,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * otherwise). In both cases we're allowed to set totally_frozen, as far * as xmin is concerned. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (!TransactionIdIsNormal(xid)) xmin_frozen = true; else @@ -6412,7 +7097,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (TransactionIdPrecedes(xid, relfrozenxid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found xmin %u from before relfrozenxid %u", + errmsg_internal("found xmin " XID_FMT " from before relfrozenxid " XID_FMT, xid, relfrozenxid))); xmin_frozen = TransactionIdPrecedes(xid, cutoff_xid); @@ -6421,7 +7106,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (!TransactionIdDidCommit(xid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen", + errmsg_internal("uncommitted xmin " XID_FMT " from before xid cutoff " XID_FMT " needs to be frozen", xid, cutoff_xid))); frz->t_infomask |= HEAP_XMIN_FROZEN; @@ -6438,7 +7123,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * * Make sure to keep heap_tuple_needs_freeze in sync with this. */ - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { @@ -6493,7 +7178,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (TransactionIdPrecedes(xid, relfrozenxid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found xmax %u from before relfrozenxid %u", + errmsg_internal("found xmax " XID_FMT " from before relfrozenxid " XID_FMT, xid, relfrozenxid))); if (TransactionIdPrecedes(xid, cutoff_xid)) @@ -6508,7 +7193,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionIdDidCommit(xid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("cannot freeze committed xmax %u", + errmsg_internal("cannot freeze committed xmax " XID_FMT, xid))); freeze_xmax = true; } @@ -6516,7 +7201,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, freeze_xmax = false; } else if ((tuple->t_infomask & HEAP_XMAX_INVALID) || - !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + !TransactionIdIsValid(HeapTupleGetRawXmax(htup))) { freeze_xmax = false; xmax_already_frozen = true; @@ -6524,7 +7209,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found xmax %u (infomask 0x%04x) not frozen, not multi, not normal", + errmsg_internal("found xmax " XID_FMT " (infomask 0x%04x) not frozen, not multi, not normal", xid, tuple->t_infomask))); if (freeze_xmax) @@ -6609,18 +7294,35 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * NB: All code in here must be safe to execute during crash recovery! */ void -heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) +heap_execute_freeze_tuple(HeapTuple htup, xl_heap_freeze_tuple *frz) { - HeapTupleHeaderSetXmax(tuple, frz->xmax); + HeapTupleHeader tuple = htup->t_data; + + tuple->t_infomask = frz->t_infomask; + tuple->t_infomask2 = frz->t_infomask2; + + HeapTupleSetXmax(htup, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); if (frz->frzflags & XLH_INVALID_XVAC) HeapTupleHeaderSetXvac(tuple, InvalidTransactionId); +} +void +heap_execute_freeze_tuple_page(Page page, HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) +{ tuple->t_infomask = frz->t_infomask; tuple->t_infomask2 = frz->t_infomask2; + + HeapTupleHeaderSetXmax(page, tuple, frz->xmax); + + if (frz->frzflags & XLH_FREEZE_XVAC) + HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); + + if (frz->frzflags & XLH_INVALID_XVAC) + HeapTupleHeaderSetXvac(tuple, InvalidTransactionId); } /* @@ -6630,7 +7332,7 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) * Useful for callers like CLUSTER that perform their own WAL logging. */ bool -heap_freeze_tuple(HeapTupleHeader tuple, +heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi) { @@ -6794,10 +7496,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask) * checking the hint bits. */ TransactionId -HeapTupleGetUpdateXid(HeapTupleHeader tuple) +HeapTupleGetUpdateXid(HeapTuple tuple) { - return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple), - tuple->t_infomask); + return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple), + tuple->t_data->t_infomask); } /* @@ -7028,12 +7730,16 @@ bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { TransactionId xid; + HeapTupleData htup; + + htup.t_data = tuple; + HeapTupleSetZeroBase(&htup); /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(&htup); if (TransactionIdIsNormal(xid)) return true; @@ -7044,13 +7750,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { MultiXactId multi; - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(&htup); if (MultiXactIdIsValid(multi)) return true; } else { - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(&htup); if (TransactionIdIsNormal(xid)) return true; } @@ -7078,12 +7784,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) * on a standby. */ bool -heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, +heap_tuple_needs_freeze(HeapTuple htup, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf) { TransactionId xid; + HeapTupleHeader tuple = htup->t_data; - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; @@ -7097,7 +7804,7 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, { MultiXactId multi; - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); if (!MultiXactIdIsValid(multi)) { /* no xmax set, ignore */ @@ -7132,7 +7839,7 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, } else { - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; @@ -7157,14 +7864,14 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, * with queries. */ void -HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, +HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid) { - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple); - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + TransactionId xmin = HeapTupleGetXmin(tuple); + TransactionId xmax = HeapTupleGetUpdateXidAny(tuple); + TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data); - if (tuple->t_infomask & HEAP_MOVED) + if (tuple->t_data->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; @@ -7176,8 +7883,8 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, * * Look for a committed hint bit, or if no xmin bit is set, check clog. */ - if (HeapTupleHeaderXminCommitted(tuple) || - (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin))) + if (HeapTupleHeaderXminCommitted(tuple->t_data) || + (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *latestRemovedXid)) @@ -7527,7 +8234,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) for (;;) { ItemId lp; - HeapTupleHeader htup; + HeapTupleData htup; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -7564,16 +8271,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) if (!ItemIdIsNormal(lp)) break; - htup = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&htup, page); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax)) break; - HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid); + HeapTupleHeaderAdvanceLatestRemovedXid(&htup, &latestRemovedXid); /* * If the tuple is not HOT-updated, then we are at the end of this @@ -7581,13 +8290,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) * chain (they get their own index entries) -- just move on to * next htid from index AM caller. */ - if (!HeapTupleHeaderIsHotUpdated(htup)) + if (!HeapTupleHeaderIsHotUpdated(htup.t_data)) break; /* Advance to next HOT chain member */ - Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno); - offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno); + offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid); + priorXmax = HeapTupleGetUpdateXidAny(&htup); } /* Enable further/final shrinking of deltids for caller */ @@ -8167,13 +8876,13 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); - xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data); + xlrec.old_xmax = HeapTupleGetRawXmax(oldtup); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); - xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); + xlrec.new_xmax = HeapTupleGetRawXmax(newtup); bufflags = REGBUF_STANDARD; if (init) @@ -8185,6 +8894,10 @@ log_heap_update(Relation reln, Buffer oldbuf, if (oldbuf != newbuf) XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate); /* @@ -8297,8 +9010,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup) { Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID)); Assert(!HeapTupleHeaderXminInvalid(hdr)); - xlrec.cmin = HeapTupleHeaderGetCmin(hdr); - xlrec.cmax = HeapTupleHeaderGetCmax(hdr); + xlrec.cmin = HeapTupleGetCmin(tup); + xlrec.cmax = HeapTupleGetCmax(tup); xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr); } /* No combo CID, so only cmin or cmax can be set by this TX */ @@ -8501,7 +9214,8 @@ heap_xlog_prune(XLogReaderState *record) heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, - nowunused, nunused); + nowunused, nunused, + true); /* * Note: we don't worry about updating the page's prunability hints. @@ -8793,7 +9507,7 @@ heap_xlog_freeze_page(XLogReaderState *record) lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */ tuple = (HeapTupleHeader) PageGetItem(page, lp); - heap_execute_freeze_tuple(tuple, xlrec_tp); + heap_execute_freeze_tuple_page(page, tuple, xlrec_tp); } PageSetLSN(page, lsn); @@ -8880,9 +9594,9 @@ heap_xlog_delete(XLogReaderState *record) fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + HeapTupleHeaderSetXmax(page, htup, xlrec->xmax); else - HeapTupleHeaderSetXmin(htup, InvalidTransactionId); + HeapTupleHeaderSetXmin(page, htup, InvalidTransactionId); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ @@ -8907,7 +9621,7 @@ static void heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record); + xl_heap_insert *xlrec; Buffer buffer; Page page; union @@ -8923,6 +9637,17 @@ heap_xlog_insert(XLogReaderState *record) BlockNumber blkno; ItemPointerData target_tid; XLogRedoAction action; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); + + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_insert *) rec_data; XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); ItemPointerSetBlockNumber(&target_tid, blkno); @@ -8947,11 +9672,12 @@ heap_xlog_insert(XLogReaderState *record) * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ - if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + if (isinit) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; action = BLK_NEEDS_REDO; } else @@ -8983,7 +9709,7 @@ heap_xlog_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; @@ -9043,12 +9769,19 @@ heap_xlog_multi_insert(XLogReaderState *record) int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ - xlrec = (xl_heap_multi_insert *) XLogRecGetData(record); + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + xlrec = (xl_heap_multi_insert *) rec_data; XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); @@ -9075,7 +9808,8 @@ heap_xlog_multi_insert(XLogReaderState *record) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; action = BLK_NEEDS_REDO; } else @@ -9126,7 +9860,7 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); @@ -9174,7 +9908,7 @@ static void heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); + xl_heap_update *xlrec; RelFileNode rnode; BlockNumber oldblk; BlockNumber newblk; @@ -9199,6 +9933,17 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Size freespace = 0; XLogRedoAction oldaction; XLogRedoAction newaction; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); + + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_update *) rec_data; /* initialize to keep the compiler quiet */ oldtup.t_data = NULL; @@ -9257,6 +10002,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) oldtup.t_data = htup; oldtup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&oldtup, page); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; @@ -9266,7 +10012,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); + HeapTupleHeaderSetXmax(page, htup, xlrec->old_xmax); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9289,11 +10035,12 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) nbuffer = obuffer; newaction = oldaction; } - else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + else if (isinit) { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); - PageInit(page, BufferGetPageSize(nbuffer), 0); + PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; newaction = BLK_NEEDS_REDO; } else @@ -9399,9 +10146,9 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); - HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); + HeapTupleHeaderSetXmax(page, htup, xlrec->new_xmax); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9540,7 +10287,7 @@ heap_xlog_lock(XLogReaderState *record) BufferGetBlockNumber(buffer), offnum); } - HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); + HeapTupleHeaderSetXmax(page, htup, xlrec->locking_xid); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -9600,7 +10347,7 @@ heap_xlog_lock_updated(XLogReaderState *record) htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + HeapTupleHeaderSetXmax(page, htup, xlrec->xmax); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -9650,6 +10397,31 @@ heap_xlog_inplace(XLogReaderState *record) UnlockReleaseBuffer(buffer); } +static void +heap_xlog_base_shift(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record); + Buffer buffer; + Page page; + BlockNumber blkno; + RelFileNode target_node; + + XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi, xlrec->delta); + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + + void heap_redo(XLogReaderState *record) { @@ -9736,6 +10508,21 @@ heap2_redo(XLogReaderState *record) } } +void +heap3_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + switch (info & XLOG_HEAP_OPMASK) + { + case XLOG_HEAP3_BASE_SHIFT: + heap_xlog_base_shift(record); + break; + default: + elog(PANIC, "heap3_redo: unknown op code %u", info); + } +} + /* * Mask a heap page before performing consistency checks on it. */ @@ -9748,6 +10535,10 @@ heap_mask(char *pagedata, BlockNumber blkno) mask_page_lsn_and_checksum(page); mask_page_hint_bits(page); + + /* Ignore prune_xid (it's like a hint-bit) */ + HeapPageSetPruneXid(page, InvalidTransactionId); + mask_unused_space(page); for (off = 1; off <= PageGetMaxOffsetNumber(page); off++) @@ -9863,14 +10654,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, case HEAPTUPLE_LIVE: if (visible) return; - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_DELETE_IN_PROGRESS: if (visible) - xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); + xid = HeapTupleGetUpdateXidAny(tuple); else - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); if (TransactionIdPrecedes(xid, TransactionXmin)) { @@ -9880,7 +10671,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, } break; case HEAPTUPLE_INSERT_IN_PROGRESS: - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_DEAD: Assert(!visible); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 9befe012a9e..8638dbdf35c 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -413,7 +413,7 @@ tuple_lock_retry: * changes in an existing tuple, except to invalid or * frozen, and neither of those can match priorXmax.) */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -424,7 +424,7 @@ tuple_lock_retry: if (TransactionIdIsValid(SnapshotDirty.xmin)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"", + errmsg_internal("t_xmin " XID_FMT " is uncommitted in tuple (%u,%u) to be updated in table \"%s\"", SnapshotDirty.xmin, ItemPointerGetBlockNumber(&tuple->t_self), ItemPointerGetOffsetNumber(&tuple->t_self), @@ -473,7 +473,7 @@ tuple_lock_retry: * variable instead of doing HeapTupleHeaderGetXmin again. */ if (TransactionIdIsCurrentTransactionId(priorXmax) && - HeapTupleHeaderGetCmin(tuple->t_data) >= cid) + HeapTupleGetCmin(tuple) >= cid) { tmfd->xmax = priorXmax; @@ -481,7 +481,7 @@ tuple_lock_retry: * Cmin is the problematic value, so store that. See * above. */ - tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data); + tmfd->cmax = HeapTupleGetCmin(tuple); ReleaseBuffer(buffer); return TM_SelfModified; } @@ -506,7 +506,7 @@ tuple_lock_retry: /* * As above, if xmin isn't what we're expecting, do nothing. */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { if (BufferIsValid(buffer)) @@ -539,7 +539,7 @@ tuple_lock_retry: /* updated, so look at the updated row */ *tid = tuple->t_data->t_ctid; /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + priorXmax = HeapTupleGetUpdateXidAny(tuple); if (BufferIsValid(buffer)) ReleaseBuffer(buffer); /* loop back to fetch next in chain */ @@ -860,7 +860,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * case we had better copy it. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ @@ -872,7 +872,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ @@ -1057,6 +1057,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, targtuple->t_tableOid = RelationGetRelid(scan->rs_rd); targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(targtuple, targpage); switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, hscan->rs_cbuf)) @@ -1092,7 +1093,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * numbers we send to the stats collector to make this come * out right.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple))) { sample_it = true; *liverows += 1; @@ -1123,7 +1124,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * but not the post-image. We also get sane results if the * concurrent transaction never commits. */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple))) *deadrows += 1; else { @@ -1465,7 +1466,7 @@ heapam_index_build_range_scan(Relation heapRelation, * before commit there. Give a warning if neither case * applies. */ - xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + xwait = HeapTupleGetXmin(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1524,7 +1525,7 @@ heapam_index_build_range_scan(Relation heapRelation, break; } - xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + xwait = HeapTupleGetUpdateXidAny(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -2202,13 +2203,14 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); loctup.t_len = ItemIdGetLength(lp); loctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&loctup.t_self, page, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) { hscan->rs_vistuples[ntup++] = offnum; PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot, - HeapTupleHeaderGetXmin(loctup.t_data)); + HeapTupleGetXmin(&loctup)); } HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, buffer, snapshot); @@ -2247,6 +2249,7 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); hscan->rs_ctup.t_len = ItemIdGetLength(lp); hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&hscan->rs_ctup, dp); ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset); pgstat_count_heap_fetch(scan->rs_rd); @@ -2387,6 +2390,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(tuple, page); ItemPointerSet(&(tuple->t_self), blockno, tupoffset); diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index b72b03ea25a..2a17313e50d 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -217,7 +217,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -229,7 +229,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -241,7 +241,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -251,11 +251,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -284,7 +284,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -299,17 +299,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -327,7 +327,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; } @@ -416,7 +416,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ - else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + else if (!TransactionIdIsValid(HeapTupleGetXmin(htup))) return false; } @@ -506,9 +506,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= curcid) + if (HeapTupleGetCmin(htup) >= curcid) return TM_Invisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -518,7 +518,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleHeaderGetRawXmax(tuple); + xmax = HeapTupleGetRawXmax(htup); /* * Careful here: even though this tuple was created by our own @@ -549,7 +549,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -557,21 +557,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; return TM_Ok; } else { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -579,16 +579,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, return TM_Ok; } - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return TM_Invisible; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -622,17 +622,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return TM_BeingModified; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return TM_Ok; } - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); if (!TransactionIdIsValid(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; } @@ -641,13 +641,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; if (TransactionIdDidCommit(xmax)) @@ -663,7 +663,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * what about the other members? */ - if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * There's no member, even just a locker, alive anymore, so we can @@ -680,20 +680,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return TM_BeingModified; - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return TM_BeingModified; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -711,7 +711,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) return TM_Updated; /* updated by other */ else @@ -794,7 +794,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -806,7 +806,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -818,7 +818,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -828,7 +828,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * Return the speculative token to caller. Caller can worry about @@ -844,13 +844,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Assert(snapshot->speculativeToken != 0); } - snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); + snapshot->xmin = HeapTupleGetRawXmin(htup); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -879,7 +879,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -897,21 +897,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) - snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); + snapshot->xmax = HeapTupleGetRawXmax(htup); return true; } - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -929,7 +929,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; /* updated by other */ } @@ -1008,9 +1008,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) + if (HeapTupleGetCmin(htup) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -1023,7 +1023,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1031,13 +1031,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; - else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + else if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* updated after scan started */ else return false; /* updated before scan started */ } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1045,16 +1045,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return true; } - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -1067,7 +1067,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && - XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; /* treat as still in progress */ } @@ -1086,14 +1086,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ @@ -1108,18 +1108,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1129,12 +1129,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } else { /* xmax is committed, but maybe not according to our snapshot */ - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; /* treat as still in progress */ } @@ -1249,21 +1249,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de return HEAPTUPLE_DEAD; } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple)) + HeapTupleIsOnlyLocked(htup)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * It'd be possible to discern between INSERT/DELETE in progress @@ -1275,9 +1275,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* @@ -1319,14 +1319,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && - MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); @@ -1344,7 +1344,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - TransactionId xmax = HeapTupleGetUpdateXid(tuple); + TransactionId xmax = HeapTupleGetUpdateXid(htup); /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); @@ -1367,7 +1367,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de *dead_after = xmax; return HEAPTUPLE_RECENTLY_DEAD; } - else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * Not in Progress, Not Committed, so either Aborted or crashed. @@ -1381,11 +1381,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); else { /* @@ -1407,7 +1407,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * Deleter committed, allow caller to check if it was recent enough that * some open transactions could still see the tuple. */ - *dead_after = HeapTupleHeaderGetRawXmax(tuple); + *dead_after = HeapTupleGetRawXmax(htup); return HEAPTUPLE_RECENTLY_DEAD; } @@ -1503,7 +1503,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) /* Deleter committed, so tuple is dead if the XID is old enough. */ return GlobalVisTestIsRemovableXid(vistest, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } /* @@ -1516,8 +1516,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) * at the top of this file. */ bool -HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) +HeapTupleIsOnlyLocked(HeapTuple htup) { + HeapTupleHeader tuple = htup->t_data; TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ @@ -1528,7 +1529,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return true; /* invalid xmax means no update */ - if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) return true; /* @@ -1539,7 +1540,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1587,8 +1588,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); + TransactionId xmin = HeapTupleGetXmin(htup); + TransactionId xmax = HeapTupleGetRawXmax(htup); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); @@ -1688,7 +1689,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); } /* check if it's one of our txids, toplevel is also in there */ diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index 55bbe1d5847..5d95f29e79c 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, result_tuple->t_len = new_tuple_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; + HeapTupleCopyBase(result_tuple, newtup); new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; @@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) */ new_tuple->t_self = tup->t_self; new_tuple->t_tableOid = tup->t_tableOid; + HeapTupleCopyBase(new_tuple, tup); new_tuple->t_data->t_choice = tup->t_data->t_choice; new_tuple->t_data->t_ctid = tup->t_data->t_ctid; @@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup, ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tup; + HeapTupleSetZeroBase(&tmptup); /* * Break down the tuple into fields. diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index d34edb4190c..800f5a1424c 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -232,6 +232,9 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + /* * Add the page to the FSM without initializing. If we were to * initialize here, the page would potentially get flushed out to disk @@ -243,7 +246,7 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) /* we'll need this info below */ blockNum = BufferGetBlockNumber(buffer); - freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData; + freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData)); UnlockReleaseBuffer(buffer); @@ -514,6 +517,9 @@ loop: /* * Now we can check to see if there's enough free space here. If so, * we're done. + * + * "Double xmax" page is not suitable for any new tuple, since xmin + * can't be set there. */ page = BufferGetPage(buffer); @@ -525,12 +531,13 @@ loop: */ if (PageIsNew(page)) { - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); MarkBufferDirty(buffer); } pageFreeSpace = PageGetHeapFreeSpace(page); - if (targetFreeSpace <= pageFreeSpace) + if (targetFreeSpace <= pageFreeSpace && + !HeapPageIsDoubleXmax(page)) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); @@ -635,7 +642,8 @@ loop: BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; MarkBufferDirty(buffer); /* diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 522a00af6d1..7640e111399 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -115,7 +115,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * determining the appropriate horizon is a waste if there's no prune_xid * (i.e. no updates/deletes left potentially dead tuples around). */ - prune_xid = ((PageHeader) page)->pd_prune_xid; + prune_xid = HeapPageGetPruneXidNoAssert(page); if (!TransactionIdIsValid(prune_xid)) return; @@ -186,7 +186,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) nnewlpdead; ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin, - limited_ts, &nnewlpdead, NULL); + limited_ts, &nnewlpdead, NULL, true); /* * Report the number of tuples reclaimed to pgstats. This is @@ -245,7 +245,8 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId old_snap_xmin, TimestampTz old_snap_ts, int *nnewlpdead, - OffsetNumber *off_loc) + OffsetNumber *off_loc, + bool repairFragmentation) { int ndeleted = 0; Page page = BufferGetPage(buffer); @@ -319,13 +320,15 @@ heap_page_prune(Relation relation, Buffer buffer, heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused); + prstate.nowunused, prstate.nunused, + repairFragmentation); /* * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + if (XidFitsPage(page, prstate.new_prune_xid)) + HeapPageSetPruneXid(page, prstate.new_prune_xid); /* * Also clear the "page is full" flag, since there's no point in @@ -387,10 +390,10 @@ heap_page_prune(Relation relation, Buffer buffer, * point in repeating the prune/defrag process until something else * happens to the page. */ - if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid || + if (HeapPageGetPruneXid(page) != prstate.new_prune_xid || PageIsFull(page)) { - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + HeapPageSetPruneXid(page, prstate.new_prune_xid); PageClearFull(page); MarkBufferDirtyHint(buffer, true); } @@ -534,6 +537,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) HeapTupleData tup; tup.t_tableOid = RelationGetRelid(prstate->rel); + HeapTupleCopyBaseFromPage(&tup, dp); rootlp = PageGetItemId(dp, rootoffnum); @@ -572,7 +576,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); - HeapTupleHeaderAdvanceLatestRemovedXid(htup, + HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); ndeleted++; } @@ -640,13 +644,14 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) tup.t_data = htup; tup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&tup, dp); ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax)) break; /* @@ -673,7 +678,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: @@ -683,7 +688,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_LIVE: @@ -712,7 +717,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) if (tupdead) { latestdead = offnum; - HeapTupleHeaderAdvanceLatestRemovedXid(htup, + HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); } else if (!recent_dead) @@ -734,7 +739,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } /* @@ -851,7 +856,8 @@ void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused) + OffsetNumber *nowunused, int nunused, + bool repairFragmentation) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; @@ -971,7 +977,8 @@ heap_page_prune_execute(Buffer buffer, * Finally, repair any fragmentation, and update the page's hint bit about * whether it has free pointers. */ - PageRepairFragmentation(page); + if (repairFragmentation) + PageRepairFragmentation(page); } @@ -1006,6 +1013,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; + HeapTupleData tup; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) @@ -1014,6 +1022,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); /* * Check if this tuple is part of a HOT-chain rooted at some other @@ -1035,7 +1045,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } else { @@ -1074,9 +1084,11 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) break; htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup))) break; /* Remember the root line pointer for this item */ @@ -1090,7 +1102,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup)); nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } } } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 986a776bbd5..bbb204b193c 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -379,6 +379,7 @@ rewrite_heap_tuple(RewriteState state, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); + HeapTupleCopyBase(new_tuple, old_tuple); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= @@ -388,7 +389,7 @@ rewrite_heap_tuple(RewriteState state, * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ - heap_freeze_tuple(new_tuple->t_data, + heap_freeze_tuple(new_tuple, state->rs_old_rel->rd_rel->relfrozenxid, state->rs_old_rel->rd_rel->relminmxid, state->rs_freeze_xid, @@ -404,7 +405,7 @@ rewrite_heap_tuple(RewriteState state, * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && + HeapTupleIsOnlyLocked(old_tuple)) && !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) @@ -412,7 +413,7 @@ rewrite_heap_tuple(RewriteState state, OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); + hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) @@ -485,7 +486,7 @@ rewrite_heap_tuple(RewriteState state, * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple), state->rs_oldest_xmin)) { /* @@ -494,7 +495,7 @@ rewrite_heap_tuple(RewriteState state, UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(new_tuple); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -582,7 +583,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple) bool found; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(old_tuple); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -618,6 +619,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup) Size len; OffsetNumber newoff; HeapTuple heaptup; + TransactionId xmin, xmax; /* * If the new tuple is too big for storage or contains already toasted @@ -703,10 +705,20 @@ raw_heap_insert(RewriteState state, HeapTuple tup) if (!state->rs_buffer_valid) { /* Initialize a new empty page */ - PageInit(page, BLCKSZ, 0); + PageInit(page, BLCKSZ, sizeof(HeapPageSpecialData)); state->rs_buffer_valid = true; } + rewrite_page_prepare_for_xid(page, HeapTupleGetXmin(heaptup), false); + rewrite_page_prepare_for_xid(page, HeapTupleGetRawXmax(heaptup), + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + + xmin = HeapTupleGetXmin(heaptup); + xmax = HeapTupleGetRawXmax(heaptup); + HeapTupleCopyBaseFromPage(heaptup, page); + HeapTupleSetXmin(heaptup, xmin); + HeapTupleSetXmax(heaptup, xmax); + /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); @@ -999,7 +1011,10 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid, "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, dboid, relid, LSN_FORMAT_ARGS(state->rs_begin_lsn), - xid, GetCurrentTransactionId()); + (uint32) (xid >> 32), + (uint32) xid, + (uint32) (GetCurrentTransactionId() >> 32), + (uint32) GetCurrentTransactionId()); dlist_init(&src->mappings); src->num_mappings = 0; @@ -1048,9 +1063,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, if (!state->rs_logical_rewrite) return; - xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + xmin = HeapTupleGetXmin(new_tuple); /* use *GetUpdateXid to correctly deal with multixacts */ - xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); + xmax = HeapTupleGetUpdateXidAny(new_tuple); /* * Log the mapping iff the tuple has been created recently. @@ -1121,7 +1136,10 @@ heap_xlog_logical_rewrite(XLogReaderState *r) "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, xlrec->mapped_db, xlrec->mapped_rel, LSN_FORMAT_ARGS(xlrec->start_lsn), - xlrec->mapped_xid, XLogRecGetXid(r)); + (uint32) (xlrec->mapped_xid >> 32), + (uint32) xlrec->mapped_xid, + (uint32) (XLogRecGetXid(r) >> 32), + (uint32) XLogRecGetXid(r)); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY); @@ -1217,10 +1235,12 @@ CheckPointLogicalRewriteHeap(void) Oid dboid; Oid relid; XLogRecPtr lsn; - TransactionId rewrite_xid; - TransactionId create_xid; - uint32 hi, - lo; + uint32 lsn_hi, + lsn_lo, + rewrite_xid_hi, + rewrite_xid_lo, + create_xid_hi, + create_xid_lo; if (strcmp(mapping_de->d_name, ".") == 0 || strcmp(mapping_de->d_name, "..") == 0) @@ -1235,10 +1255,12 @@ CheckPointLogicalRewriteHeap(void) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) + &dboid, &relid, &lsn_hi, &lsn_lo, + &rewrite_xid_hi, &rewrite_xid_lo, + &create_xid_hi, &create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - lsn = ((uint64) hi) << 32 | lo; + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index cd603e6aa41..2831b34a563 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -255,7 +255,6 @@ static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer *vmbuffer); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel); -static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void lazy_cleanup_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, @@ -608,11 +607,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->pinskipped_pages, vacrel->frozenskipped_pages); appendStringInfo(&buf, - _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %u\n"), + _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: "), (long long) vacrel->tuples_deleted, (long long) vacrel->new_rel_tuples, - (long long) vacrel->new_dead_tuples, - OldestXmin); + (long long) vacrel->new_dead_tuples); + appendStringInfo(&buf, XID_FMT "\n", OldestXmin); orig_rel_pages = vacrel->rel_pages + vacrel->pages_removed; if (orig_rel_pages > 0) { @@ -789,13 +788,6 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) vacrel->indstats = (IndexBulkDeleteResult **) palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); - /* - * Do failsafe precheck before calling dead_items_alloc. This ensures - * that parallel VACUUM won't be attempted when relfrozenxid is already - * dangerously old. - */ - lazy_check_wraparound_failsafe(vacrel); - /* * Allocate the space for dead_items. Note that this handles parallel * VACUUM initialization as part of allocating shared memory space used @@ -988,7 +980,6 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) */ if (blkno - next_failsafe_block >= FAILSAFE_EVERY_PAGES) { - lazy_check_wraparound_failsafe(vacrel); next_failsafe_block = blkno; } @@ -1149,7 +1140,9 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0) { - Size freespace = BLCKSZ - SizeOfPageHeaderData; + Size freespace = BufferGetPageSize(buf) + - SizeOfPageHeaderData + - sizeof(HeapPageSpecialData); RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); } @@ -1478,8 +1471,9 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) */ initStringInfo(&buf); appendStringInfo(&buf, - _("%lld dead row versions cannot be removed yet, oldest xmin: %u\n"), - (long long) vacrel->new_dead_tuples, vacrel->OldestXmin); + _("%lld dead row versions cannot be removed yet, oldest xmin: \n"), + (long long) vacrel->new_dead_tuples); + appendStringInfo(&buf, XID_FMT "\n", vacrel->OldestXmin); appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ", "Skipped %u pages due to buffer pins, ", vacrel->pinskipped_pages), @@ -1567,7 +1561,7 @@ retry: */ tuples_deleted = heap_page_prune(rel, buf, vistest, InvalidTransactionId, 0, &nnewlpdead, - &vacrel->offnum); + &vacrel->offnum, true); /* * Now scan the page to collect LP_DEAD items and check for tuples @@ -1634,6 +1628,7 @@ retry: tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); /* * DEAD tuples are almost always pruned into LP_DEAD line pointers by @@ -1697,7 +1692,7 @@ retry: * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin)) { prunestate->all_visible = false; @@ -1754,7 +1749,7 @@ retry: */ num_tuples++; prunestate->hastup = true; - if (heap_prepare_freeze_tuple(tuple.t_data, + if (heap_prepare_freeze_tuple(&tuple, vacrel->relfrozenxid, vacrel->relminmxid, vacrel->FreezeLimit, @@ -1802,15 +1797,19 @@ retry: MarkBufferDirty(buf); + tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyBaseFromPage(&tuple, page); + /* execute collected freezes */ for (int i = 0; i < nfrozen; i++) { - HeapTupleHeader htup; - itemid = PageGetItemId(page, frozen[i].offset); - htup = (HeapTupleHeader) PageGetItem(page, itemid); - heap_execute_freeze_tuple(htup, &frozen[i]); + ItemPointerSet(&tuple.t_self, blkno, frozen[i].offset); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + + heap_execute_freeze_tuple(&tuple, &frozen[i]); } /* Now WAL-log freezing if necessary */ @@ -2045,13 +2044,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); Assert(MultiXactIdIsValid(vacrel->relminmxid)); - /* Precheck for XID wraparound emergencies */ - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- don't even start an index scan */ - return false; - } - /* Report that we are now vacuuming indexes */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); @@ -2066,13 +2058,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples, vacrel); - - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- end current index scan */ - allindexes = false; - break; - } } } else @@ -2080,13 +2065,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) /* Outsource everything to parallel variant */ parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, vacrel->old_live_tuples, vacrel->num_index_scans); - - /* - * Do a postcheck to consider applying wraparound failsafe now. Note - * that parallel VACUUM only gets the precheck and this postcheck. - */ - if (lazy_check_wraparound_failsafe(vacrel)) - allindexes = false; } /* @@ -2374,6 +2352,7 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel) offnum = OffsetNumberNext(offnum)) { ItemId itemid; + HeapTupleData htup; /* * Set the offset number so that we can display it along with any @@ -2391,8 +2370,10 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel) continue; tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); + htup.t_data = tupleheader; + HeapTupleCopyBaseFromPage(&htup, page); - if (heap_tuple_needs_freeze(tupleheader, vacrel->FreezeLimit, + if (heap_tuple_needs_freeze(&htup, vacrel->FreezeLimit, vacrel->MultiXactCutoff, buf)) break; } /* scan along page */ @@ -2403,55 +2384,6 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel) return (offnum <= maxoff); } -/* - * Trigger the failsafe to avoid wraparound failure when vacrel table has a - * relfrozenxid and/or relminmxid that is dangerously far in the past. - * Triggering the failsafe makes the ongoing VACUUM bypass any further index - * vacuuming and heap vacuuming. Truncating the heap is also bypassed. - * - * Any remaining work (work that VACUUM cannot just bypass) is typically sped - * up when the failsafe triggers. VACUUM stops applying any cost-based delay - * that it started out with. - * - * Returns true when failsafe has been triggered. - */ -static bool -lazy_check_wraparound_failsafe(LVRelState *vacrel) -{ - /* Don't warn more than once per VACUUM */ - if (vacrel->failsafe_active) - return true; - - if (unlikely(vacuum_xid_failsafe_check(vacrel->relfrozenxid, - vacrel->relminmxid))) - { - vacrel->failsafe_active = true; - - /* Disable index vacuuming, index cleanup, and heap rel truncation */ - vacrel->do_index_vacuuming = false; - vacrel->do_index_cleanup = false; - vacrel->do_rel_truncate = false; - - ereport(WARNING, - (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans", - get_database_name(MyDatabaseId), - vacrel->relnamespace, - vacrel->relname, - vacrel->num_index_scans), - errdetail("The table's relfrozenxid or relminmxid is too far in the past."), - errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" - "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); - - /* Stop applying cost limits from this point on */ - VacuumCostActive = false; - VacuumCostBalance = 0; - - return true; - } - - return false; -} - /* * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. */ @@ -3077,6 +3009,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyBaseFromPage(&tuple, page); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf)) { @@ -3096,7 +3029,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin)) { all_visible = false; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 0aea476b8ce..7f3f9904c83 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -546,6 +546,7 @@ _bt_getroot(Relation rel, int access) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), @@ -649,6 +650,7 @@ _bt_gettrueroot(Relation rel) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index 3485e93ef64..20ee69b7523 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -141,6 +141,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft, perfectpenalty, leaffillfactor; + int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY; FindSplitData state; FindSplitStrat strategy; ItemId itemid; @@ -154,6 +155,7 @@ _bt_findsplitloc(Relation rel, opaque = (BTPageOpaque) PageGetSpecialPointer(origpage); maxoff = PageGetMaxOffsetNumber(origpage); + maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY)); /* Total free space available on a btree page, after fixed overhead */ leftspace = rightspace = @@ -215,6 +217,15 @@ _bt_findsplitloc(Relation rel, itemid = PageGetItemId(origpage, offnum); itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); +#ifdef USE_ASSERT_CHECKING + /* Ending of rightmost tuple on a page can be shifted relative to + * left boundary of BTPageOpaqueData due to conversion from EE96, + * which used different BTPageOpaqueData layout. It is only checked + * in the assert below. + */ + if (maxTupleEnd < ItemIdGetTupleEnd(itemid)) + maxTupleEnd = ItemIdGetTupleEnd(itemid); +#endif /* * When item offset number is not newitemoff, neither side of the * split can be newitem. Record a split after the previous data item @@ -249,7 +260,7 @@ _bt_findsplitloc(Relation rel, * (Though only when it's possible that newitem will end up alone on new * right page.) */ - Assert(olddataitemstoleft == olddataitemstotal); + Assert(olddataitemstoleft + ((PageHeader)origpage)->pd_special - maxTupleEnd == olddataitemstotal); if (newitemoff > maxoff) _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 786c08c0cea..66d17f21ea3 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -15,6 +15,8 @@ #include "postgres.h" #include "access/bufmask.h" +#include "access/heapam_xlog.h" +#include "access/htup_details.h" #include "access/nbtree.h" #include "access/nbtxlog.h" #include "access/transam.h" diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c index b12f43a1bba..2e5007535ce 100644 --- a/src/backend/access/rmgrdesc/clogdesc.c +++ b/src/backend/access/rmgrdesc/clogdesc.c @@ -25,17 +25,17 @@ clog_desc(StringInfo buf, XLogReaderState *record) if (info == CLOG_ZEROPAGE) { - int pageno; + int64 pageno; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "page %d", pageno); + memcpy(&pageno, rec, sizeof(int64)); + appendStringInfo(buf, "page " INT64_FORMAT, pageno); } else if (info == CLOG_TRUNCATE) { xl_clog_truncate xlrec; memcpy(&xlrec, rec, sizeof(xl_clog_truncate)); - appendStringInfo(buf, "page %d; oldestXact %u", + appendStringInfo(buf, "page " INT64_FORMAT "; oldestXact " XID_FMT, xlrec.pageno, xlrec.oldestXact); } } diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c index 26bad44b964..22c3f234919 100644 --- a/src/backend/access/rmgrdesc/committsdesc.c +++ b/src/backend/access/rmgrdesc/committsdesc.c @@ -26,16 +26,16 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record) if (info == COMMIT_TS_ZEROPAGE) { - int pageno; + int64 pageno; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + memcpy(&pageno, rec, sizeof(int64)); + appendStringInfo(buf, INT64_FORMAT, pageno); } else if (info == COMMIT_TS_TRUNCATE) { xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec; - appendStringInfo(buf, "pageno %d, oldestXid %u", + appendStringInfo(buf, "pageno " INT64_FORMAT ", oldestXid " XID_FMT, trunc->pageno, trunc->oldestXid); } } diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 8ae31126ebf..f65dab201c8 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,17 +26,16 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid " XID_FMT, xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, - EpochFromFullTransactionId(xlrec->latestRemovedFullXid), XidFromFullTransactionId(xlrec->latestRemovedFullXid)); } static void out_gistxlogDelete(StringInfo buf, gistxlogDelete *xlrec) { - appendStringInfo(buf, "delete: latestRemovedXid %u, nitems: %u", + appendStringInfo(buf, "delete: latestRemovedXid " XID_FMT ", nitems: %u", xlrec->latestRemovedXid, xlrec->ntodelete); } @@ -51,8 +50,7 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { - appendStringInfo(buf, "deleteXid %u:%u; downlink %u", - EpochFromFullTransactionId(xlrec->deleteXid), + appendStringInfo(buf, "deleteXid " XID_FMT "; downlink %u", XidFromFullTransactionId(xlrec->deleteXid), xlrec->downlinkOffset); } diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c index 90ccea08e2c..2f9f081811e 100644 --- a/src/backend/access/rmgrdesc/hashdesc.c +++ b/src/backend/access/rmgrdesc/hashdesc.c @@ -113,7 +113,7 @@ hash_desc(StringInfo buf, XLogReaderState *record) { xl_hash_vacuum_one_page *xlrec = (xl_hash_vacuum_one_page *) rec; - appendStringInfo(buf, "ntuples %d, latestRemovedXid %u", + appendStringInfo(buf, "ntuples %d, latestRemovedXid " XID_FMT, xlrec->ntuples, xlrec->latestRemovedXid); break; diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 5c29fd9eae2..093ee281a4b 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -58,12 +58,12 @@ heap_desc(StringInfo buf, XLogReaderState *record) { xl_heap_update *xlrec = (xl_heap_update *) rec; - appendStringInfo(buf, "off %u xmax %u flags 0x%02X ", + appendStringInfo(buf, "off %u xmax " XID_FMT " flags 0x%02X ", xlrec->old_offnum, xlrec->old_xmax, xlrec->flags); out_infobits(buf, xlrec->old_infobits_set); - appendStringInfo(buf, "; new off %u xmax %u", + appendStringInfo(buf, "; new off %u xmax " XID_FMT, xlrec->new_offnum, xlrec->new_xmax); } @@ -71,12 +71,12 @@ heap_desc(StringInfo buf, XLogReaderState *record) { xl_heap_update *xlrec = (xl_heap_update *) rec; - appendStringInfo(buf, "off %u xmax %u flags 0x%02X ", + appendStringInfo(buf, "off %u xmax " XID_FMT " flags 0x%02X ", xlrec->old_offnum, xlrec->old_xmax, xlrec->flags); out_infobits(buf, xlrec->old_infobits_set); - appendStringInfo(buf, "; new off %u xmax %u", + appendStringInfo(buf, "; new off %u xmax " XID_FMT, xlrec->new_offnum, xlrec->new_xmax); } @@ -103,7 +103,7 @@ heap_desc(StringInfo buf, XLogReaderState *record) { xl_heap_lock *xlrec = (xl_heap_lock *) rec; - appendStringInfo(buf, "off %u: xid %u: flags 0x%02X ", + appendStringInfo(buf, "off %u: xid " XID_FMT ": flags 0x%02X ", xlrec->offnum, xlrec->locking_xid, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } @@ -125,7 +125,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_prune *xlrec = (xl_heap_prune *) rec; - appendStringInfo(buf, "latestRemovedXid %u nredirected %u ndead %u", + appendStringInfo(buf, "latestRemovedXid " XID_FMT " nredirected %u ndead %u", xlrec->latestRemovedXid, xlrec->nredirected, xlrec->ndead); @@ -140,14 +140,14 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; - appendStringInfo(buf, "cutoff xid %u ntuples %u", + appendStringInfo(buf, "cutoff xid " XID_FMT " ntuples %u", xlrec->cutoff_xid, xlrec->ntuples); } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; - appendStringInfo(buf, "cutoff xid %u flags 0x%02X", + appendStringInfo(buf, "cutoff xid " XID_FMT " flags 0x%02X", xlrec->cutoff_xid, xlrec->flags); } else if (info == XLOG_HEAP2_MULTI_INSERT) @@ -161,7 +161,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec; - appendStringInfo(buf, "off %u: xmax %u: flags 0x%02X ", + appendStringInfo(buf, "off %u: xmax " XID_FMT ": flags 0x%02X ", xlrec->offnum, xlrec->xmax, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } @@ -180,6 +180,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record) } } +void +heap3_desc(StringInfo buf, XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + info &= XLOG_HEAP_OPMASK; + if (info == XLOG_HEAP3_BASE_SHIFT) + { + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec; + + appendStringInfo(buf, "%s delta " INT64_FORMAT " ", + xlrec->multi ? "MultiXactId" : "XactId", + xlrec->delta); + } +} + const char * heap_identify(uint8 info) { @@ -263,3 +280,18 @@ heap2_identify(uint8 info) return id; } + +const char * +heap3_identify(uint8 info) +{ + const char *id = NULL; + + switch (info & ~XLR_INFO_MASK) + { + case XLOG_HEAP3_BASE_SHIFT: + id = "BASE_SHIFT"; + break; + } + + return id; +} diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c index 8c37690e659..6f57268c0f8 100644 --- a/src/backend/access/rmgrdesc/mxactdesc.c +++ b/src/backend/access/rmgrdesc/mxactdesc.c @@ -19,7 +19,7 @@ static void out_member(StringInfo buf, MultiXactMember *member) { - appendStringInfo(buf, "%u ", member->xid); + appendStringInfo(buf, XID_FMT " ", member->xid); switch (member->status) { case MultiXactStatusForKeyShare: @@ -55,18 +55,18 @@ multixact_desc(StringInfo buf, XLogReaderState *record) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE || info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { - int pageno; + int64 pageno; memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + appendStringInfo(buf, INT64_FORMAT, pageno); } else if (info == XLOG_MULTIXACT_CREATE_ID) { xl_multixact_create *xlrec = (xl_multixact_create *) rec; int i; - appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid, - xlrec->moff, xlrec->nmembers); + appendStringInfo(buf, XID_FMT " offset " INT64_FORMAT " nmembers %d: ", + xlrec->mid, xlrec->moff, xlrec->nmembers); for (i = 0; i < xlrec->nmembers; i++) out_member(buf, &xlrec->members[i]); } @@ -74,7 +74,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record) { xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec; - appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)", + appendStringInfo(buf, "offsets [" XID_FMT ", " XID_FMT "), members [" INT64_FORMAT ", " INT64_FORMAT ")", xlrec->startTruncOff, xlrec->endTruncOff, xlrec->startTruncMemb, xlrec->endTruncMemb); } diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index 710efbd36a1..d8e561a8366 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -63,7 +63,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_delete *xlrec = (xl_btree_delete *) rec; - appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u; nupdated %u", + appendStringInfo(buf, "latestRemovedXid " XID_FMT "; ndeleted %u; nupdated %u", xlrec->latestRemovedXid, xlrec->ndeleted, xlrec->nupdated); break; } @@ -80,9 +80,8 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec; - appendStringInfo(buf, "left %u; right %u; level %u; safexid %u:%u; ", + appendStringInfo(buf, "left %u; right %u; level %u; safexid " XID_FMT "; ", xlrec->leftsib, xlrec->rightsib, xlrec->level, - EpochFromFullTransactionId(xlrec->safexid), XidFromFullTransactionId(xlrec->safexid)); appendStringInfo(buf, "leafleft %u; leafright %u; leaftopparent %u", xlrec->leafleftsib, xlrec->leafrightsib, @@ -100,10 +99,9 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; - appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid " XID_FMT, xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, - EpochFromFullTransactionId(xlrec->latestRemovedFullXid), XidFromFullTransactionId(xlrec->latestRemovedFullXid)); break; } diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c index 0fefe386b8e..5a68e28aa2c 100644 --- a/src/backend/access/rmgrdesc/spgdesc.c +++ b/src/backend/access/rmgrdesc/spgdesc.c @@ -118,7 +118,7 @@ spg_desc(StringInfo buf, XLogReaderState *record) { spgxlogVacuumRedirect *xlrec = (spgxlogVacuumRedirect *) rec; - appendStringInfo(buf, "ntoplaceholder: %u, firstplaceholder: %u, newestredirectxid: %u", + appendStringInfo(buf, "ntoplaceholder: %u, firstplaceholder: %u, newestredirectxid: " XID_FMT, xlrec->nToPlaceholder, xlrec->firstPlaceholder, xlrec->newestRedirectXid); diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c index 01ee7ac6d2c..77afee53090 100644 --- a/src/backend/access/rmgrdesc/standbydesc.c +++ b/src/backend/access/rmgrdesc/standbydesc.c @@ -21,7 +21,7 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec) { int i; - appendStringInfo(buf, "nextXid %u latestCompletedXid %u oldestRunningXid %u", + appendStringInfo(buf, "nextXid " XID_FMT " latestCompletedXid " XID_FMT " oldestRunningXid " XID_FMT, xlrec->nextXid, xlrec->latestCompletedXid, xlrec->oldestRunningXid); @@ -29,7 +29,7 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec) { appendStringInfo(buf, "; %d xacts:", xlrec->xcnt); for (i = 0; i < xlrec->xcnt; i++) - appendStringInfo(buf, " %u", xlrec->xids[i]); + appendStringInfo(buf, " " XID_FMT, xlrec->xids[i]); } if (xlrec->subxid_overflow) @@ -48,7 +48,7 @@ standby_desc(StringInfo buf, XLogReaderState *record) int i; for (i = 0; i < xlrec->nlocks; i++) - appendStringInfo(buf, "xid %u db %u rel %u ", + appendStringInfo(buf, "xid " XID_FMT " db %u rel %u ", xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid); } diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index fca03a00d98..93cddd62813 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -99,7 +99,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); @@ -183,7 +184,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed) { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); @@ -276,7 +278,7 @@ xact_desc_subxacts(StringInfo buf, int nsubxacts, TransactionId *subxacts) { appendStringInfoString(buf, "; subxacts:"); for (i = 0; i < nsubxacts; i++) - appendStringInfo(buf, " %u", subxacts[i]); + appendStringInfo(buf, " " XID_FMT, subxacts[i]); } } @@ -289,7 +291,7 @@ xact_desc_commit(StringInfo buf, uint8 info, xl_xact_commit *xlrec, RepOriginId /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) - appendStringInfo(buf, "%u: ", parsed.twophase_xid); + appendStringInfo(buf, XID_FMT ": ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); @@ -324,7 +326,7 @@ xact_desc_abort(StringInfo buf, uint8 info, xl_xact_abort *xlrec, RepOriginId or /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) - appendStringInfo(buf, "%u: ", parsed.twophase_xid); + appendStringInfo(buf, XID_FMT ": ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); @@ -377,7 +379,7 @@ xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec) appendStringInfoString(buf, "subxacts:"); for (i = 0; i < xlrec->nsubxacts; i++) - appendStringInfo(buf, " %u", xlrec->xsub[i]); + appendStringInfo(buf, " " XID_FMT, xlrec->xsub[i]); } void @@ -416,7 +418,7 @@ xact_desc(StringInfo buf, XLogReaderState *record) * interested in the top-level xid that issued the record and which * xids are being reported here. */ - appendStringInfo(buf, "xtop %u: ", xlrec->xtop); + appendStringInfo(buf, "xtop " XID_FMT ": ", xlrec->xtop); xact_desc_assignment(buf, xlrec); } else if (info == XLOG_XACT_INVALIDATIONS) diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 5bf2346dd91..88e97877971 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -45,15 +45,14 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " - "oldest xid %u in DB %u; oldest multi %u in DB %u; " - "oldest/newest commit timestamp xid: %u/%u; " - "oldest running xid %u; %s", + "tli %u; prev tli %u; fpw %s; xid " XID_FMT "; oid %u; multi " XID_FMT "; offset " INT64_FORMAT "; " + "oldest xid " XID_FMT " in DB %u; oldest multi " XID_FMT " in DB %u; " + "oldest/newest commit timestamp xid: " XID_FMT "/" XID_FMT "; " + "oldest running xid " XID_FMT "; %s", LSN_FORMAT_ARGS(checkpoint->redo), checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", - EpochFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid), checkpoint->nextOid, checkpoint->nextMulti, diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 3ea16a270a8..8d98cd10658 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -89,24 +89,24 @@ static SlruCtlData XactCtlData; #define XactCtl (&XactCtlData) -static int ZeroCLOGPage(int pageno, bool writeXlog); -static bool CLOGPagePrecedes(int page1, int page2); -static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact, +static int ZeroCLOGPage(int64 pageno, bool writeXlog); +static bool CLOGPagePrecedes(int64 page1, int64 page2); +static void WriteZeroPageXlogRec(int64 pageno); +static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno, + XLogRecPtr lsn, int64 pageno, bool all_xact_same_page); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno); static void set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn); static bool TransactionGroupUpdateXidStatus(TransactionId xid, - XidStatus status, XLogRecPtr lsn, int pageno); + XidStatus status, XLogRecPtr lsn, int64 pageno); static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno); + XLogRecPtr lsn, int64 pageno); /* @@ -164,7 +164,7 @@ void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { - int pageno = TransactionIdToPage(xid); /* get page of parent */ + int64 pageno = TransactionIdToPage(xid); /* get page of parent */ int i; Assert(status == TRANSACTION_STATUS_COMMITTED || @@ -238,7 +238,7 @@ static void set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { - int pageno = TransactionIdToPage(subxids[0]); + int64 pageno = TransactionIdToPage(subxids[0]); int offset = 0; int i = 0; @@ -247,7 +247,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids, while (i < nsubxids) { int num_on_page = 0; - int nextpageno; + int64 nextpageno; do { @@ -273,7 +273,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids, static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno, + XLogRecPtr lsn, int64 pageno, bool all_xact_same_page) { /* Can't use group update when PGPROC overflows. */ @@ -338,7 +338,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno) + XLogRecPtr lsn, int64 pageno) { int slotno; int i; @@ -412,7 +412,7 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, */ static bool TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, - XLogRecPtr lsn, int pageno) + XLogRecPtr lsn, int64 pageno) { volatile PROC_HDR *procglobal = ProcGlobal; PGPROC *proc = MyProc; @@ -638,7 +638,7 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; @@ -712,6 +712,9 @@ void BootStrapCLOG(void) { int slotno; + int64 pageno; + + pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid)); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -722,6 +725,16 @@ BootStrapCLOG(void) SimpleLruWritePage(XactCtl, slotno); Assert(!XactCtl->shared->page_dirty[slotno]); + if (pageno != 0) + { + /* Create and zero the first page of the commit log */ + slotno = ZeroCLOGPage(pageno, false); + + /* Make sure it's written out */ + SimpleLruWritePage(XactCtl, slotno); + Assert(!XactCtl->shared->page_dirty[slotno]); + } + LWLockRelease(XactSLRULock); } @@ -735,7 +748,7 @@ BootStrapCLOG(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroCLOGPage(int pageno, bool writeXlog) +ZeroCLOGPage(int64 pageno, bool writeXlog) { int slotno; @@ -755,7 +768,7 @@ void StartupCLOG(void) { TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -774,7 +787,7 @@ void TrimCLOG(void) { TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -839,7 +852,7 @@ CheckPointCLOG(void) void ExtendCLOG(TransactionId newestXact) { - int pageno; + int64 pageno; /* * No work except at first XID of a page. But beware: just after @@ -878,7 +891,7 @@ ExtendCLOG(TransactionId newestXact) void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -911,49 +924,26 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) SimpleLruTruncate(XactCtl, cutoffPage); } - /* * Decide whether a CLOG page number is "older" for truncation purposes. * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() - * would get weird about permanent xact IDs. So, offset both such that xid1, - * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset - * is relevant to page 0 and to the page preceding page 0. - * - * The page containing oldestXact-2^31 is the important edge case. The - * portion of that page equaling or following oldestXact-2^31 is expendable, - * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is - * the first XID of a page and segment, the entire page and segment is - * expendable, and we could truncate the segment. Recognizing that case would - * require making oldestXact, not just the page containing oldestXact, - * available to this callback. The benefit would be rare and small, so we - * don't optimize that edge case. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool -CLOGPagePrecedes(int page1, int page2) +CLOGPagePrecedes(int64 page1, int64 page2) { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId + 1; - xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId + 1; - - return (TransactionIdPrecedes(xid1, xid2) && - TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1)); + return page1 < page2; } - /* * Write a ZEROPAGE xlog record */ static void -WriteZeroPageXlogRec(int pageno) +WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); } @@ -964,7 +954,7 @@ WriteZeroPageXlogRec(int pageno) * in TruncateCLOG(). */ static void -WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb) +WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb) { XLogRecPtr recptr; xl_clog_truncate xlrec; @@ -992,10 +982,10 @@ clog_redo(XLogReaderState *record) if (info == CLOG_ZEROPAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index cbbe19fea83..73d91ba4edb 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -105,16 +105,16 @@ bool track_commit_timestamp; static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, - RepOriginId nodeid, int pageno); + RepOriginId nodeid, int64 pageno); static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, RepOriginId nodeid, int slotno); static void error_commit_ts_disabled(void); -static int ZeroCommitTsPage(int pageno, bool writeXlog); -static bool CommitTsPagePrecedes(int page1, int page2); +static int ZeroCommitTsPage(int64 pageno, bool writeXlog); +static bool CommitTsPagePrecedes(int64 page1, int64 page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); -static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid); +static void WriteZeroPageXlogRec(int64 pageno); +static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid); /* * TransactionTreeSetCommitTsData @@ -170,7 +170,7 @@ TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, */ for (i = 0, headxid = xid;;) { - int pageno = TransactionIdToCTsPage(headxid); + int64 pageno = TransactionIdToCTsPage(headxid); int j; for (j = i; j < nsubxids; j++) @@ -214,7 +214,7 @@ TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, - RepOriginId nodeid, int pageno) + RepOriginId nodeid, int64 pageno) { int slotno; int i; @@ -266,7 +266,7 @@ bool TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, RepOriginId *nodeid) { - int pageno = TransactionIdToCTsPage(xid); + int64 pageno = TransactionIdToCTsPage(xid); int entryno = TransactionIdToCTsEntry(xid); int slotno; CommitTimestampEntry entry; @@ -276,7 +276,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, if (!TransactionIdIsValid(xid)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("cannot retrieve commit timestamp for transaction %u", xid))); + errmsg("cannot retrieve commit timestamp for transaction %" PRIu64, xid))); else if (!TransactionIdIsNormal(xid)) { /* frozen and bootstrap xids are always committed far in the past */ @@ -587,7 +587,7 @@ BootStrapCommitTs(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroCommitTsPage(int pageno, bool writeXlog) +ZeroCommitTsPage(int64 pageno, bool writeXlog) { int slotno; @@ -680,7 +680,7 @@ static void ActivateCommitTs(void) { TransactionId xid; - int pageno; + int64 pageno; /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); @@ -813,7 +813,7 @@ CheckPointCommitTs(void) void ExtendCommitTs(TransactionId newestXact) { - int pageno; + int64 pageno; /* * Nothing to do if module not enabled. Note we do an unlocked read of @@ -851,7 +851,7 @@ ExtendCommitTs(TransactionId newestXact) void TruncateCommitTs(TransactionId oldestXact) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -916,49 +916,23 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) * Decide whether a commitTS page number is "older" for truncation purposes. * Analogous to CLOGPagePrecedes(). * - * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This - * introduces differences compared to CLOG and the other SLRUs having (1 << - * 31) % per_page == 0. This function never tests exactly - * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, - * there are two possible counts of page boundaries between oldestXact and the - * latest XID assigned, depending on whether oldestXact is within the first - * 128 entries of its page. Since this function doesn't know the location of - * oldestXact within page2, it returns false for one page that actually is - * expendable. This is a wider (yet still negligible) version of the - * truncation opportunity that CLOGPagePrecedes() cannot recognize. - * - * For the sake of a worked example, number entries with decimal values such - * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of - * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, - * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, - * because entry=2.85 is the border that toggles whether entries precede the - * last entry of the oldestXact page. While page 2 is expendable at - * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool -CommitTsPagePrecedes(int page1, int page2) +CommitTsPagePrecedes(int64 page1, int64 page2) { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId + 1; - xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId + 1; - - return (TransactionIdPrecedes(xid1, xid2) && - TransactionIdPrecedes(xid1, xid2 + COMMIT_TS_XACTS_PER_PAGE - 1)); + return page1 < page2; } - /* * Write a ZEROPAGE xlog record */ static void -WriteZeroPageXlogRec(int pageno) +WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); } @@ -966,7 +940,7 @@ WriteZeroPageXlogRec(int pageno) * Write a TRUNCATE xlog record */ static void -WriteTruncateXlogRec(int pageno, TransactionId oldestXid) +WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid) { xl_commit_ts_truncate xlrec; @@ -991,10 +965,10 @@ commit_ts_redo(XLogReaderState *record) if (info == COMMIT_TS_ZEROPAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index e6c70ed0bc2..2ef7d5a9de8 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -112,15 +112,15 @@ ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) #define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) -#define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* * The situation for members is a bit more complex: we store one byte of * additional flag bits for each TransactionId. To do this without getting - * into alignment issues, we store four bytes of flags, and then the - * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and - * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups - * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and + * into alignment issues, we store eight bytes of flags, and then the + * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and + * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups + * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and * performance) trumps space efficiency here. * * Note that the "offset" macros work with byte offset, not array indexes, so @@ -132,7 +132,7 @@ #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) /* how many full bytes of flags are there in a group? */ -#define MULTIXACT_FLAGBYTES_PER_GROUP 4 +#define MULTIXACT_FLAGBYTES_PER_GROUP 8 #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) /* size in bytes of a complete group */ @@ -142,22 +142,9 @@ #define MULTIXACT_MEMBERS_PER_PAGE \ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP) -/* - * Because the number of items per page is not a divisor of the last item - * number (member 0xFFFFFFFF), the last segment does not use the maximum number - * of pages, and moreover the last used page therein does not use the same - * number of items as previous pages. (Another way to say it is that the - * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page - * has some empty space after that item.) - * - * This constant is the number of members in the last page of the last segment. - */ -#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \ - ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1)) - /* page in which a member is to be found */ #define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE) -#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* Location (byte offset within page) of flag word for a given member */ #define MXOffsetToFlagsOffset(xid) \ @@ -216,22 +203,8 @@ typedef struct MultiXactStateData MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; - /* - * Oldest multixact offset that is potentially referenced by a multixact - * referenced by a relation. We don't always know this value, so there's - * a flag here to indicate whether or not we currently do. - */ - MultiXactOffset oldestOffset; - bool oldestOffsetKnown; - /* support for anti-wraparound measures */ MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - - /* support for members anti-wraparound measures */ - MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */ /* * Per-backend data starts here. We have two arrays stored in the area @@ -353,19 +326,16 @@ static void mXactCachePut(MultiXactId multi, int nmembers, static char *mxstatus_to_string(MultiXactStatus status); /* management of SLRU infrastructure */ -static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog); -static int ZeroMultiXactMemberPage(int pageno, bool writeXlog); -static bool MultiXactOffsetPagePrecedes(int page1, int page2); -static bool MultiXactMemberPagePrecedes(int page1, int page2); +static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog); +static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog); +static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2); +static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2); static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); -static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, - MultiXactOffset start, uint32 distance); -static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); -static void WriteMZeroPageXlogRec(int pageno, uint8 info); +static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startTruncOff, MultiXactId endTruncOff, @@ -397,6 +367,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); + /* memset members array because with 64-bit xids it has a padding hole */ + MemSet(members, 0, sizeof(members)); + /* * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs * are still running. In typical usage, xid2 will be our own XID and the @@ -451,7 +424,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); - debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s", + debug_elog5(DEBUG2, "Expand: received multi " XID_FMT ", xid " XID_FMT " status %s", multi, xid, mxstatus_to_string(status)); /* @@ -476,7 +449,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) member.status = status; newMulti = MultiXactIdCreateFromMembers(1, &member); - debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u", + debug_elog4(DEBUG2, "Expand: " XID_FMT " has no members, create singleton " XID_FMT, multi, newMulti); return newMulti; } @@ -490,7 +463,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) if (TransactionIdEquals(members[i].xid, xid) && (members[i].status == status)) { - debug_elog4(DEBUG2, "Expand: %u is already a member of %u", + debug_elog4(DEBUG2, "Expand: " XID_FMT " is already a member of " XID_FMT, xid, multi); pfree(members); return multi; @@ -511,7 +484,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) * end of the loop. */ newMembers = (MultiXactMember *) - palloc(sizeof(MultiXactMember) * (nmembers + 1)); + palloc0(sizeof(MultiXactMember) * (nmembers + 1)); for (i = 0, j = 0; i < nmembers; i++) { @@ -526,12 +499,13 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) newMembers[j].xid = xid; newMembers[j++].status = status; + newMulti = MultiXactIdCreateFromMembers(j, newMembers); pfree(members); pfree(newMembers); - debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti); + debug_elog3(DEBUG2, "Expand: returning new multi " XID_FMT, newMulti); return newMulti; } @@ -554,7 +528,7 @@ MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly) int nmembers; int i; - debug_elog3(DEBUG2, "IsRunning %u?", multi); + debug_elog3(DEBUG2, "IsRunning " XID_FMT "?", multi); /* * "false" here means we assume our callers have checked that the given @@ -594,7 +568,7 @@ MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly) { if (TransactionIdIsInProgress(members[i].xid)) { - debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running", + debug_elog4(DEBUG2, "IsRunning: member %d (" XID_FMT ") is running", i, members[i].xid); pfree(members); return true; @@ -603,7 +577,7 @@ MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly) pfree(members); - debug_elog3(DEBUG2, "IsRunning: %u is not running", multi); + debug_elog3(DEBUG2, "IsRunning: " XID_FMT " is not running", multi); return false; } @@ -657,7 +631,7 @@ MultiXactIdSetOldestMember(void) LWLockRelease(MultiXactGenLock); - debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u", + debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = " XID_FMT, MyBackendId, nextMXact); } } @@ -710,7 +684,7 @@ MultiXactIdSetOldestVisible(void) LWLockRelease(MultiXactGenLock); - debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u", + debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = " XID_FMT, MyBackendId, oldestMXact); } } @@ -729,9 +703,6 @@ ReadNextMultiXactId(void) mxid = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - if (mxid < FirstMultiXactId) - mxid = FirstMultiXactId; - return mxid; } @@ -862,8 +833,8 @@ static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members) { - int pageno; - int prev_pageno; + int64 pageno; + int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -899,8 +870,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; - uint32 *flagsptr; - uint32 flagsval; + uint64 *flagsptr; + uint64 flagsval; int bshift; int flagsoff; int memberoff; @@ -921,14 +892,15 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, memberptr = (TransactionId *) (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff); + *memberptr = members[i].xid; - flagsptr = (uint32 *) + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; - flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); - flagsval |= (members[i].status << bshift); + flagsval &= ~((uint64)((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); + flagsval |= ((uint64)members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; @@ -981,8 +953,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * If we're past multiVacLimit or the safe threshold for member storage * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. - * If we're past multiWarnLimit, start issuing warnings. - * If we're past multiStopLimit, refuse to create new MultiXactIds. * * Note these are pretty much the same protections in GetNewTransactionId. *---------- @@ -996,41 +966,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit; - MultiXactId multiStopLimit = MultiXactState->multiStopLimit; - MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit; - Oid oldest_datoid = MultiXactState->oldestMultiXactDB; LWLockRelease(MultiXactGenLock); - if (IsUnderPostmaster && - !MultiXactIdPrecedes(result, multiStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* - * Immediately kick autovacuum into action as we're already in - * ERROR territory. - */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K multis generated. This still gives @@ -1039,36 +977,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) if (IsUnderPostmaster && (result % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (!MultiXactIdPrecedes(result, multiWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datname, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datoid, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); result = MultiXactState->nextMXact; - if (result < FirstMultiXactId) - result = FirstMultiXactId; } /* Make sure there is room for the MXID in the file. */ @@ -1088,78 +999,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) else *offset = nextOffset; - /*---------- - * Protect against overrun of the members space as well, with the - * following rules: - * - * If we're past offsetStopLimit, refuse to generate more multis. - * If we're close to offsetStopLimit, emit a warning. - * - * Arbitrarily, we start emitting warnings when we're 20 segments or less - * from offsetStopLimit. - * - * Note we haven't updated the shared state yet, so if we fail at this - * point, the multixact ID we grabbed can still be used by the next guy. - * - * Note that there is no point in forcing autovacuum runs here: the - * multixact freeze settings would have to be reduced for that to have any - * effect. - *---------- - */ -#define OFFSET_WARN_SEGMENTS 20 - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, - nmembers)) - { - /* see comment in the corresponding offsets wraparound case */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("multixact \"members\" limit exceeded"), - errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.", - "This command would create a multixact with %u members, but the remaining space is only enough for %u members.", - MultiXactState->offsetStopLimit - nextOffset - 1, - nmembers, - MultiXactState->offsetStopLimit - nextOffset - 1), - errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", - MultiXactState->oldestMultiXactDB))); - } - - /* - * Check whether we should kick autovacuum into action, to prevent members - * wraparound. NB we use a much larger window to trigger autovacuum than - * just the warning limit. The warning is just a measure of last resort - - * this is in line with GetNewTransactionId's behaviour. - */ - if (!MultiXactState->oldestOffsetKnown || - (MultiXactState->nextOffset - MultiXactState->oldestOffset - > MULTIXACT_MEMBER_SAFE_THRESHOLD)) - { - /* - * To avoid swamping the postmaster with signals, we issue the autovac - * request only when crossing a segment boundary. With default - * compilation settings that's roughly after 50k members. This still - * gives plenty of chances before we get into real trouble. - */ - if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) != - (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT)) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - } - - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, - nextOffset, - nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) - ereport(WARNING, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used", - "database with OID %u must be vacuumed before %d more multixact members are used", - MultiXactState->offsetStopLimit - nextOffset + nmembers, - MultiXactState->oldestMultiXactDB, - MultiXactState->offsetStopLimit - nextOffset + nmembers), - errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); - ExtendMultiXactMember(nextOffset, nmembers); /* @@ -1188,7 +1027,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) LWLockRelease(MultiXactGenLock); - debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset); + debug_elog4(DEBUG2, "GetNew: returning " XID_FMT " offset " INT64_FORMAT, result, *offset); return result; } @@ -1223,8 +1062,8 @@ int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool onlyLock) { - int pageno; - int prev_pageno; + int64 pageno; + int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -1232,13 +1071,12 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, int length; int truelength; int i; - MultiXactId oldestMXact; MultiXactId nextMXact; MultiXactId tmpMXact; MultiXactOffset nextOffset; MultiXactMember *ptr; - debug_elog3(DEBUG2, "GetMembers: asked for %u", multi); + debug_elog3(DEBUG2, "GetMembers: asked for " XID_FMT, multi); if (!MultiXactIdIsValid(multi) || from_pgupgrade) { @@ -1288,24 +1126,11 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, */ LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMXact = MultiXactState->oldestMultiXactId; nextMXact = MultiXactState->nextMXact; nextOffset = MultiXactState->nextOffset; LWLockRelease(MultiXactGenLock); - if (MultiXactIdPrecedes(multi, oldestMXact)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u does no longer exist -- apparent wraparound", - multi))); - - if (!MultiXactIdPrecedes(multi, nextMXact)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u has not been created yet -- apparent wraparound", - multi))); - /* * Find out the offset at which we need to start reading MultiXactMembers * and the number of members in the multixact. We determine the latter as @@ -1350,7 +1175,9 @@ retry: offptr += entryno; offset = *offptr; - Assert(offset != 0); + if (offset == 0) + ereport(ERROR, + (errmsg("found invalid zero offset in multixact " XID_FMT, multi))); /* * Use the same increment rule as GetNewMultiXactId(), that is, don't @@ -1367,10 +1194,6 @@ retry: { MultiXactOffset nextMXOffset; - /* handle wraparound if needed */ - if (tmpMXact < FirstMultiXactId) - tmpMXact = FirstMultiXactId; - prev_pageno = pageno; pageno = MultiXactIdToOffsetPage(tmpMXact); @@ -1397,7 +1220,7 @@ retry: LWLockRelease(MultiXactOffsetSLRULock); - ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember)); + ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember)); /* Now get the members themselves. */ LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -1407,7 +1230,7 @@ retry: for (i = 0; i < length; i++, offset++) { TransactionId *xactptr; - uint32 *flagsptr; + uint64 *flagsptr; int flagsoff; int bshift; int memberoff; @@ -1433,7 +1256,7 @@ retry: flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); - flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; @@ -1517,7 +1340,7 @@ mXactCacheGetBySet(int nmembers, MultiXactMember *members) */ if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0) { - debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi); + debug_elog3(DEBUG2, "CacheGet: found " XID_FMT, entry->multi); dlist_move_head(&MXactCache, iter.cur); return entry->multi; } @@ -1540,7 +1363,7 @@ mXactCacheGetById(MultiXactId multi, MultiXactMember **members) { dlist_iter iter; - debug_elog3(DEBUG2, "CacheGet: looking for %u", multi); + debug_elog3(DEBUG2, "CacheGet: looking for " XID_FMT, multi); dlist_foreach(iter, &MXactCache) { @@ -1621,7 +1444,7 @@ mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members) MXactCacheMembers--; entry = dlist_container(mXactCacheEnt, node, node); - debug_elog3(DEBUG2, "CachePut: pruning cached multi %u", + debug_elog3(DEBUG2, "CachePut: pruning cached multi " XID_FMT, entry->multi); pfree(entry); @@ -1663,11 +1486,11 @@ mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members) initStringInfo(&buf); - appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid, + appendStringInfo(&buf, XID_FMT " %d[" XID_FMT " (%s)", multi, nmembers, members[0].xid, mxstatus_to_string(members[0].status)); for (i = 1; i < nmembers; i++) - appendStringInfo(&buf, ", %u (%s)", members[i].xid, + appendStringInfo(&buf, ", " XID_FMT " (%s)", members[i].xid, mxstatus_to_string(members[i].status)); appendStringInfoChar(&buf, ']'); @@ -1894,6 +1717,8 @@ void BootStrapMultiXact(void) { int slotno; + int64 multiOffsetPageno; + int64 multiMemberPageno; LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); @@ -1904,6 +1729,17 @@ BootStrapMultiXact(void) SimpleLruWritePage(MultiXactOffsetCtl, slotno); Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + multiOffsetPageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact - 1); + if (multiOffsetPageno != 0) + { + /* Create and zero the first page of the offsets log */ + slotno = ZeroMultiXactOffsetPage(multiOffsetPageno, false); + + /* Make sure it's written out */ + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + } + LWLockRelease(MultiXactOffsetSLRULock); LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -1915,7 +1751,32 @@ BootStrapMultiXact(void) SimpleLruWritePage(MultiXactMemberCtl, slotno); Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]); + multiMemberPageno = MXOffsetToMemberPage(MultiXactState->nextOffset - 1); + if (multiMemberPageno != 0) + { + /* Create and zero the first page of the members log */ + slotno = ZeroMultiXactMemberPage(multiMemberPageno, false); + + /* Make sure it's written out */ + SimpleLruWritePage(MultiXactMemberCtl, slotno); + Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]); + } + LWLockRelease(MultiXactMemberSLRULock); + + /* + * If we're starting not from zero offset, initilize dummy multixact to + * evade too long loop in PerformMembersTruncation(). + */ + if (MultiXactState->nextOffset > 0 && MultiXactState->nextMXact > 0) + { + RecordNewMultiXact(FirstMultiXactId, + MultiXactState->nextOffset, + 0, NULL); + RecordNewMultiXact(MultiXactState->nextMXact - 1, + MultiXactState->nextOffset, + 0, NULL); + } } /* @@ -1928,7 +1789,7 @@ BootStrapMultiXact(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroMultiXactOffsetPage(int pageno, bool writeXlog) +ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog) { int slotno; @@ -1944,7 +1805,7 @@ ZeroMultiXactOffsetPage(int pageno, bool writeXlog) * Ditto, for MultiXactMember */ static int -ZeroMultiXactMemberPage(int pageno, bool writeXlog) +ZeroMultiXactMemberPage(int64 pageno, bool writeXlog) { int slotno; @@ -1974,7 +1835,7 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog) static void MaybeExtendOffsetSlru(void) { - int pageno; + int64 pageno; pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); @@ -2009,7 +1870,7 @@ StartupMultiXact(void) { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; - int pageno; + int64 pageno; /* * Initialize offset's idea of the latest page number. @@ -2034,7 +1895,7 @@ TrimMultiXact(void) MultiXactOffset offset; MultiXactId oldestMXact; Oid oldestMXactDB; - int pageno; + int64 pageno; int entryno; int flagsoff; @@ -2144,7 +2005,7 @@ MultiXactGetCheckptMulti(bool is_shutdown, LWLockRelease(MultiXactGenLock); debug_elog6(DEBUG2, - "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u", + "MultiXact: checkpoint is nextMulti " XID_FMT ", nextOffset " INT64_FORMAT ", oldestMulti " XID_FMT " in DB %u", *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB); } @@ -2179,7 +2040,7 @@ void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset) { - debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u", + debug_elog4(DEBUG2, "MultiXact: setting next multi to " XID_FMT " offset " INT64_FORMAT, nextMulti, nextMultiOffset); LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->nextMXact = nextMulti; @@ -2214,47 +2075,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup) { MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - MultiXactId curMulti; - bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); - /* - * We pretend that a wrap will happen halfway through the multixact ID - * space, but that's not really true, because multixacts wrap differently - * from transaction IDs. Note that, separately from any concern about - * multixact IDs wrapping, we must ensure that multixact members do not - * wrap. Limits for that are set in SetOffsetVacuumLimit, not here. - */ - multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1); - if (multiWrapLimit < FirstMultiXactId) - multiWrapLimit += FirstMultiXactId; - - /* - * We'll refuse to continue assigning MultiXactIds once we get within 3M - * multi of data loss. See SetTransactionIdLimit. - */ - multiStopLimit = multiWrapLimit - 3000000; - if (multiStopLimit < FirstMultiXactId) - multiStopLimit -= FirstMultiXactId; - - /* - * We'll start complaining loudly when we get within 40M multis of data - * loss. This is kind of arbitrary, but if you let your gas gauge get - * down to 2% of full, would you be looking for the next gas station? We - * need to be fairly liberal about this number because there are lots of - * scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - multiWarnLimit = multiWrapLimit - 40000000; - if (multiWarnLimit < FirstMultiXactId) - multiWarnLimit -= FirstMultiXactId; - /* * We'll start trying to force autovacuums when oldest_datminmxid gets to * be more than autovacuum_multixact_freeze_max_age mxids old. @@ -2264,25 +2087,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, * its value. See SetTransactionIdLimit. */ multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age; - if (multiVacLimit < FirstMultiXactId) - multiVacLimit += FirstMultiXactId; /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; MultiXactState->oldestMultiXactDB = oldest_datoid; MultiXactState->multiVacLimit = multiVacLimit; - MultiXactState->multiWarnLimit = multiWarnLimit; - MultiXactState->multiStopLimit = multiStopLimit; - MultiXactState->multiWrapLimit = multiWrapLimit; - curMulti = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u", - multiWrapLimit, oldest_datoid))); - /* * Computing the actual limits is only possible once the data directory is * in a consistent state. There's no need to compute the limits while @@ -2294,59 +2106,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, return; Assert(!InRecovery); - - /* Set limits for offset vacuum. */ - needs_offset_vacuum = SetOffsetVacuumLimit(is_startup); - - /* - * If past the autovacuum force point, immediately signal an autovac - * request. The reason for this is that autovac only processes one - * database per invocation. Once it's finished cleaning up the oldest - * database, it'll call here, and we'll signal the postmaster to start - * another iteration immediately if there are still any old databases. - */ - if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - needs_offset_vacuum) && IsUnderPostmaster) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (MultiXactIdPrecedes(multiWarnLimit, curMulti)) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datname, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datoid, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } /* @@ -2365,12 +2124,12 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti, LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti)) { - debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti); + debug_elog3(DEBUG2, "MultiXact: setting next multi to " XID_FMT, minMulti); MultiXactState->nextMXact = minMulti; } if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset)) { - debug_elog3(DEBUG2, "MultiXact: setting next offset to %u", + debug_elog3(DEBUG2, "MultiXact: setting next offset to " INT64_FORMAT, minMultiOffset); MultiXactState->nextOffset = minMultiOffset; } @@ -2403,7 +2162,7 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB) static void ExtendMultiXactOffset(MultiXactId multi) { - int pageno; + int64 pageno; /* * No work except at first MultiXactId of a page. But beware: just after @@ -2452,7 +2211,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) flagsbit = MXOffsetToFlagsBitShift(offset); if (flagsoff == 0 && flagsbit == 0) { - int pageno; + int64 pageno; pageno = MXOffsetToMemberPage(offset); @@ -2464,23 +2223,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) LWLockRelease(MultiXactMemberSLRULock); } - /* - * Compute the number of items till end of current page. Careful: if - * addition of unsigned ints wraps around, we're at the last page of - * the last segment; since that page holds a different number of items - * than other pages, we need to do it differently. - */ - if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset) - { - /* - * This is the last page of the last segment; we can compute the - * number of items left to allocate in it without modulo - * arithmetic. - */ - difference = MaxMultiXactOffset - offset + 1; - } - else - difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; + difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; /* * Advance to next page, taking care to properly handle the wraparound @@ -2544,183 +2287,6 @@ GetOldestMultiXactId(void) return oldestMXact; } -/* - * Determine how aggressively we need to vacuum in order to prevent member - * wraparound. - * - * To do so determine what's the oldest member offset and install the limit - * info in MultiXactState, where it can be used to prevent overrun of old data - * in the members SLRU area. - * - * The return value is true if emergency autovacuum is required and false - * otherwise. - */ -static bool -SetOffsetVacuumLimit(bool is_startup) -{ - MultiXactId oldestMultiXactId; - MultiXactId nextMXact; - MultiXactOffset oldestOffset = 0; /* placate compiler */ - MultiXactOffset prevOldestOffset; - MultiXactOffset nextOffset; - bool oldestOffsetKnown = false; - bool prevOldestOffsetKnown; - MultiXactOffset offsetStopLimit = 0; - MultiXactOffset prevOffsetStopLimit; - - /* - * NB: Have to prevent concurrent truncation, we might otherwise try to - * lookup an oldestMulti that's concurrently getting truncated away. - */ - LWLockAcquire(MultiXactTruncationLock, LW_SHARED); - - /* Read relevant fields from shared memory. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMXact = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; - prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown; - prevOldestOffset = MultiXactState->oldestOffset; - prevOffsetStopLimit = MultiXactState->offsetStopLimit; - Assert(MultiXactState->finishedStartup); - LWLockRelease(MultiXactGenLock); - - /* - * Determine the offset of the oldest multixact. Normally, we can read - * the offset from the multixact itself, but there's an important special - * case: if there are no multixacts in existence at all, oldestMXact - * obviously can't point to one. It will instead point to the multixact - * ID that will be assigned the next time one is needed. - */ - if (oldestMultiXactId == nextMXact) - { - /* - * When the next multixact gets created, it will be stored at the next - * offset. - */ - oldestOffset = nextOffset; - oldestOffsetKnown = true; - } - else - { - /* - * Figure out where the oldest existing multixact's offsets are - * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, - * the supposedly-earliest multixact might not really exist. We are - * careful not to fail in that case. - */ - oldestOffsetKnown = - find_multixact_start(oldestMultiXactId, &oldestOffset); - - if (oldestOffsetKnown) - ereport(DEBUG1, - (errmsg_internal("oldest MultiXactId member is at offset %u", - oldestOffset))); - else - ereport(LOG, - (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk", - oldestMultiXactId))); - } - - LWLockRelease(MultiXactTruncationLock); - - /* - * If we can, compute limits (and install them MultiXactState) to prevent - * overrun of old data in the members SLRU area. We can only do so if the - * oldest offset is known though. - */ - if (oldestOffsetKnown) - { - /* move back to start of the corresponding segment */ - offsetStopLimit = oldestOffset - (oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); - - /* always leave one segment before the wraparound point */ - offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); - - if (!prevOldestOffsetKnown && !is_startup) - ereport(LOG, - (errmsg("MultiXact member wraparound protections are now enabled"))); - - ereport(DEBUG1, - (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u", - offsetStopLimit, oldestMultiXactId))); - } - else if (prevOldestOffsetKnown) - { - /* - * If we failed to get the oldest offset this time, but we have a - * value from a previous pass through this function, use the old - * values rather than automatically forcing an emergency autovacuum - * cycle again. - */ - oldestOffset = prevOldestOffset; - oldestOffsetKnown = true; - offsetStopLimit = prevOffsetStopLimit; - } - - /* Install the computed values */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - MultiXactState->oldestOffset = oldestOffset; - MultiXactState->oldestOffsetKnown = oldestOffsetKnown; - MultiXactState->offsetStopLimit = offsetStopLimit; - LWLockRelease(MultiXactGenLock); - - /* - * Do we need an emergency autovacuum? If we're not sure, assume yes. - */ - return !oldestOffsetKnown || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); -} - -/* - * Return whether adding "distance" to "start" would move past "boundary". - * - * We use this to determine whether the addition is "wrapping around" the - * boundary point, hence the name. The reason we don't want to use the regular - * 2^31-modulo arithmetic here is that we want to be able to use the whole of - * the 2^32-1 space here, allowing for more multixacts than would fit - * otherwise. - */ -static bool -MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, - uint32 distance) -{ - MultiXactOffset finish; - - /* - * Note that offset number 0 is not used (see GetMultiXactIdMembers), so - * if the addition wraps around the UINT_MAX boundary, skip that value. - */ - finish = start + distance; - if (finish < start) - finish++; - - /*----------------------------------------------------------------------- - * When the boundary is numerically greater than the starting point, any - * value numerically between the two is not wrapped: - * - * <----S----B----> - * [---) = F wrapped past B (and UINT_MAX) - * [---) = F not wrapped - * [----] = F wrapped past B - * - * When the boundary is numerically less than the starting point (i.e. the - * UINT_MAX wraparound occurs somewhere in between) then all values in - * between are wrapped: - * - * <----B----S----> - * [---) = F not wrapped past B (but wrapped past UINT_MAX) - * [---) = F wrapped past B (and UINT_MAX) - * [----] = F not wrapped - *----------------------------------------------------------------------- - */ - if (start < boundary) - return finish >= boundary || finish < start; - else - return finish >= boundary && finish < start; -} - /* * Find the starting offset of the given MultiXactId. * @@ -2734,7 +2300,7 @@ static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; - int pageno; + int64 pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -2764,97 +2330,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result) return true; } -/* - * Determine how many multixacts, and how many multixact members, currently - * exist. Return false if unable to determine. - */ -static bool -ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) -{ - MultiXactOffset nextOffset; - MultiXactOffset oldestOffset; - MultiXactId oldestMultiXactId; - MultiXactId nextMultiXactId; - bool oldestOffsetKnown; - - LWLockAcquire(MultiXactGenLock, LW_SHARED); - nextOffset = MultiXactState->nextOffset; - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMultiXactId = MultiXactState->nextMXact; - oldestOffset = MultiXactState->oldestOffset; - oldestOffsetKnown = MultiXactState->oldestOffsetKnown; - LWLockRelease(MultiXactGenLock); - - if (!oldestOffsetKnown) - return false; - - *members = nextOffset - oldestOffset; - *multixacts = nextMultiXactId - oldestMultiXactId; - return true; -} - -/* - * Multixact members can be removed once the multixacts that refer to them - * are older than every datminmxid. autovacuum_multixact_freeze_max_age and - * vacuum_multixact_freeze_table_age work together to make sure we never have - * too many multixacts; we hope that, at least under normal circumstances, - * this will also be sufficient to keep us from using too many offsets. - * However, if the average multixact has many members, we might exhaust the - * members space while still using few enough members that these limits fail - * to trigger full table scans for relminmxid advancement. At that point, - * we'd have no choice but to start failing multixact-creating operations - * with an error. - * - * To prevent that, if more than a threshold portion of the members space is - * used, we effectively reduce autovacuum_multixact_freeze_max_age and - * to a value just less than the number of multixacts in use. We hope that - * this will quickly trigger autovacuuming on the table or tables with the - * oldest relminmxid, thus allowing datminmxid values to advance and removing - * some members. - * - * As the fraction of the member space currently in use grows, we become - * more aggressive in clamping this value. That not only causes autovacuum - * to ramp up, but also makes any manual vacuums the user issues more - * aggressive. This happens because vacuum_set_xid_limits() clamps the - * freeze table and the minimum freeze age based on the effective - * autovacuum_multixact_freeze_max_age this function returns. In the worst - * case, we'll claim the freeze_max_age to zero, and every vacuum of any - * table will try to freeze every multixact. - * - * It's possible that these thresholds should be user-tunable, but for now - * we keep it simple. - */ -int -MultiXactMemberFreezeThreshold(void) -{ - MultiXactOffset members; - uint32 multixacts; - uint32 victim_multixacts; - double fraction; - - /* If we can't determine member space utilization, assume the worst. */ - if (!ReadMultiXactCounts(&multixacts, &members)) - return 0; - - /* If member space utilization is low, no special action is required. */ - if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) - return autovacuum_multixact_freeze_max_age; - - /* - * Compute a target for relminmxid advancement. The number of multixacts - * we try to eliminate from the system is based on how far we are past - * MULTIXACT_MEMBER_SAFE_THRESHOLD. - */ - fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / - (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); - victim_multixacts = multixacts * fraction; - - /* fraction could be > 1.0, but lowest possible freeze age is zero */ - if (victim_multixacts > multixacts) - return 0; - return multixacts - victim_multixacts; -} - typedef struct mxtruncinfo { int earliestExistingPage; @@ -2865,7 +2340,7 @@ typedef struct mxtruncinfo * This callback determines the earliest existing page number. */ static bool -SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data) { mxtruncinfo *trunc = (mxtruncinfo *) data; @@ -2881,35 +2356,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data) /* * Delete members segments [oldest, newOldest) - * - * The members SLRU can, in contrast to the offsets one, be filled to almost - * the full range at once. This means SimpleLruTruncate() can't trivially be - * used - instead the to-be-deleted range is computed using the offsets - * SLRU. C.f. TruncateMultiXact(). */ static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset) { - const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset); - int startsegment = MXOffsetToMemberSegment(oldestOffset); - int endsegment = MXOffsetToMemberSegment(newOldestOffset); - int segment = startsegment; - - /* - * Delete all the segments but the last one. The last segment can still - * contain, possibly partially, valid data. - */ - while (segment != endsegment) - { - elog(DEBUG2, "truncating multixact members segment %x", segment); - SlruDeleteSegment(MultiXactMemberCtl, segment); - - /* move to next segment, handling wraparound correctly */ - if (segment == maxsegment) - segment = 0; - else - segment += 1; - } + SimpleLruTruncate(MultiXactMemberCtl, + MXOffsetToMemberPage(newOldestOffset)); } /* @@ -3027,7 +2479,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) else if (!find_multixact_start(oldestMulti, &oldestOffset)) { ereport(LOG, - (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation", + (errmsg("oldest MultiXact %" PRIu64 " not found, earliest MultiXact %" PRIu64 ", skipping truncation", oldestMulti, earliest))); LWLockRelease(MultiXactTruncationLock); return; @@ -3045,21 +2497,27 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) else if (!find_multixact_start(newOldestMulti, &newOldestOffset)) { ereport(LOG, - (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation", + (errmsg("cannot truncate up to MultiXact %" PRIu64 " because it does not exist on disk, skipping truncation", newOldestMulti))); LWLockRelease(MultiXactTruncationLock); return; } elog(DEBUG1, "performing multixact truncation: " - "offsets [%u, %u), offsets segments [%x, %x), " - "members [%u, %u), members segments [%x, %x)", + "offsets [" XID_FMT ", " XID_FMT "), " + "offsets segments [%04x%08x, %04x%08x), " + "members [" INT64_FORMAT ", " INT64_FORMAT "), " + "members segments [%04x%08x, %04x%08x)", oldestMulti, newOldestMulti, - MultiXactIdToOffsetSegment(oldestMulti), - MultiXactIdToOffsetSegment(newOldestMulti), + (uint32) (MultiXactIdToOffsetSegment(oldestMulti) >> 32), + (uint32) MultiXactIdToOffsetSegment(oldestMulti), + (uint32) (MultiXactIdToOffsetSegment(newOldestMulti) >> 32), + (uint32) MultiXactIdToOffsetSegment(newOldestMulti), oldestOffset, newOldestOffset, - MXOffsetToMemberSegment(oldestOffset), - MXOffsetToMemberSegment(newOldestOffset)); + (uint32) (MXOffsetToMemberSegment(oldestOffset) >> 32), + (uint32) MXOffsetToMemberSegment(oldestOffset), + (uint32) (MXOffsetToMemberSegment(newOldestOffset) >> 32), + (uint32) MXOffsetToMemberSegment(newOldestOffset)); /* * Do truncation, and the WAL logging of the truncation, in a critical @@ -3112,81 +2570,40 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) * Decide whether a MultiXactOffset page number is "older" for truncation * purposes. Analogous to CLOGPagePrecedes(). * - * Offsetting the values is optional, because MultiXactIdPrecedes() has - * translational symmetry. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool -MultiXactOffsetPagePrecedes(int page1, int page2) +MultiXactOffsetPagePrecedes(int64 page1, int64 page2) { - MultiXactId multi1; - MultiXactId multi2; - - multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; - multi1 += FirstMultiXactId + 1; - multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; - multi2 += FirstMultiXactId + 1; - - return (MultiXactIdPrecedes(multi1, multi2) && - MultiXactIdPrecedes(multi1, - multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1)); + return page1 < page2; } /* - * Decide whether a MultiXactMember page number is "older" for truncation - * purposes. There is no "invalid offset number" so use the numbers verbatim. - */ -static bool -MultiXactMemberPagePrecedes(int page1, int page2) -{ - MultiXactOffset offset1; - MultiXactOffset offset2; - - offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; - offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; - - return (MultiXactOffsetPrecedes(offset1, offset2) && - MultiXactOffsetPrecedes(offset1, - offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1)); -} - -/* - * Decide which of two MultiXactIds is earlier. + * Decide which of two offsets is earlier. * - * XXX do we need to do something special for InvalidMultiXactId? - * (Doesn't look like it.) + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ -bool -MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) +static bool +MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (offset1 - offset2); return (diff < 0); } /* - * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2? + * Decide whether a MultiXactMember page number is "older" for truncation + * purposes. There is no "invalid offset number" so use the numbers verbatim. * - * XXX do we need to do something special for InvalidMultiXactId? - * (Doesn't look like it.) - */ -bool -MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) -{ - int32 diff = (int32) (multi1 - multi2); - - return (diff <= 0); -} - - -/* - * Decide which of two offsets is earlier. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool -MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) +MultiXactMemberPagePrecedes(int64 page1, int64 page2) { - int32 diff = (int32) (offset1 - offset2); - - return (diff < 0); + return page1 < page2; } /* @@ -3194,10 +2611,10 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) * OFFSETs page (info shows which) */ static void -WriteMZeroPageXlogRec(int pageno, uint8 info) +WriteMZeroPageXlogRec(int64 pageno, uint8 info) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_MULTIXACT_ID, info); } @@ -3242,10 +2659,10 @@ multixact_redo(XLogReaderState *record) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); @@ -3257,10 +2674,10 @@ multixact_redo(XLogReaderState *record) } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -3302,20 +2719,26 @@ multixact_redo(XLogReaderState *record) else if (info == XLOG_MULTIXACT_TRUNCATE_ID) { xl_multixact_truncate xlrec; - int pageno; + int64 pageno; memcpy(&xlrec, XLogRecGetData(record), SizeOfMultiXactTruncate); elog(DEBUG1, "replaying multixact truncation: " - "offsets [%u, %u), offsets segments [%x, %x), " - "members [%u, %u), members segments [%x, %x)", + "offsets [" XID_FMT ", " XID_FMT "), " + "offsets segments [%04x%08x, %04x%08x), " + "members [" INT64_FORMAT ", " INT64_FORMAT "), " + "members segments [%04x%08x, %04x%08x)", xlrec.startTruncOff, xlrec.endTruncOff, - MultiXactIdToOffsetSegment(xlrec.startTruncOff), - MultiXactIdToOffsetSegment(xlrec.endTruncOff), + (uint32) (MultiXactIdToOffsetSegment(xlrec.startTruncOff) >> 32), + (uint32) MultiXactIdToOffsetSegment(xlrec.startTruncOff), + (uint32) (MultiXactIdToOffsetSegment(xlrec.endTruncOff) >> 32), + (uint32) MultiXactIdToOffsetSegment(xlrec.endTruncOff), xlrec.startTruncMemb, xlrec.endTruncMemb, - MXOffsetToMemberSegment(xlrec.startTruncMemb), - MXOffsetToMemberSegment(xlrec.endTruncMemb)); + (uint32) (MXOffsetToMemberSegment(xlrec.startTruncMemb) >> 32), + (uint32) MXOffsetToMemberSegment(xlrec.startTruncMemb), + (uint32) (MXOffsetToMemberSegment(xlrec.endTruncMemb) >> 32), + (uint32) MXOffsetToMemberSegment(xlrec.endTruncMemb)); /* should not be required, but more than cheap enough */ LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE); @@ -3359,7 +2782,7 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) if (mxid < FirstMultiXactId) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid MultiXactId: %u", mxid))); + errmsg("invalid MultiXactId: %" PRIu64, mxid))); if (SRF_IS_FIRSTCALL()) { @@ -3395,7 +2818,7 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) HeapTuple tuple; char *values[2]; - values[0] = psprintf("%u", multi->members[multi->iter].xid); + values[0] = psprintf(XID_FMT, multi->members[multi->iter].xid); values[1] = mxstatus_to_string(multi->members[multi->iter].status); tuple = BuildTupleFromCStrings(funccxt->attinmeta, values); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 7585ae24ce9..6771455a44a 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -59,9 +59,11 @@ #include "pgstat.h" #include "storage/fd.h" #include "storage/shmem.h" +#include "utils/builtins.h" #define SlruFileName(ctl, path, seg) \ - snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) + snprintf(path, MAXPGPATH, "%s/%04X%08X", (ctl)->Dir, \ + (uint32) ((seg) >> 32), (uint32) ((seg) & (int64)0xFFFFFFFF)) /* * During SimpleLruWriteAll(), we will usually not need to write more than one @@ -75,7 +77,7 @@ typedef struct SlruWriteAllData { int num_files; /* # files actually open */ int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */ - int segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */ + int64 segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */ } SlruWriteAllData; typedef struct SlruWriteAllData *SlruWriteAll; @@ -138,15 +140,16 @@ static int slru_errno; static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata); -static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); -static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, +static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno); +static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata); -static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); -static int SlruSelectLRUPage(SlruCtl ctl, int pageno); +static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid); +static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, - int segpage, void *data); -static void SlruInternalDeleteSegment(SlruCtl ctl, int segno); + int64 segpage, void *data); +static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno); + /* * Initialization of shared memory @@ -162,7 +165,7 @@ SimpleLruShmemSize(int nslots, int nlsns) sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */ sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */ sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ - sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */ + sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */ @@ -225,8 +228,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, offset += MAXALIGN(nslots * sizeof(SlruPageStatus)); shared->page_dirty = (bool *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(bool)); - shared->page_number = (int *) (ptr + offset); - offset += MAXALIGN(nslots * sizeof(int)); + shared->page_number = (int64 *) (ptr + offset); + offset += MAXALIGN(nslots * sizeof(int64)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); @@ -277,7 +280,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, * Control lock must be held at entry, and will be held at exit. */ int -SimpleLruZeroPage(SlruCtl ctl, int pageno) +SimpleLruZeroPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; int slotno; @@ -392,7 +395,7 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno) * Control lock must be held at entry, and will be held at exit. */ int -SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, +SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid) { SlruShared shared = ctl->shared; @@ -492,7 +495,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, * It is unspecified whether the lock will be shared or exclusive. */ int -SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) +SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid) { SlruShared shared = ctl->shared; int slotno; @@ -539,7 +542,7 @@ static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; - int pageno = shared->page_number[slotno]; + int64 pageno = shared->page_number[slotno]; bool ok; /* If a write is in progress, wait for it to finish */ @@ -623,11 +626,11 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) * large enough to contain the given page. */ bool -SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) +SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno) { - int segno = pageno / SLRU_PAGES_PER_SEGMENT; - int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; - int offset = rpageno * BLCKSZ; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int64 offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; bool result; @@ -681,11 +684,11 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) * read/write operations. We could cache one virtual file pointer ... */ static bool -SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) +SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno) { SlruShared shared = ctl->shared; - int segno = pageno / SLRU_PAGES_PER_SEGMENT; - int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 rpageno = pageno % SLRU_PAGES_PER_SEGMENT; off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; @@ -753,11 +756,11 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * SimpleLruWriteAll. */ static bool -SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) +SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; - int segno = pageno / SLRU_PAGES_PER_SEGMENT; - int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 rpageno = pageno % SLRU_PAGES_PER_SEGMENT; off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd = -1; @@ -928,11 +931,11 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) * SlruPhysicalWritePage. Call this after cleaning up shared-memory state. */ static void -SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) +SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid) { - int segno = pageno / SLRU_PAGES_PER_SEGMENT; - int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; - int offset = rpageno * BLCKSZ; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int64 offset = rpageno * BLCKSZ; char path[MAXPGPATH]; SlruFileName(ctl, path, segno); @@ -942,52 +945,52 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) case SLRU_OPEN_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %" PRIu64, xid), errdetail("Could not open file \"%s\": %m.", path))); break; case SLRU_SEEK_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("Could not seek in file \"%s\" to offset %u: %m.", + errmsg("could not access status of transaction %" PRIu64, xid), + errdetail("Could not seek in file \"%s\" to offset %" PRIu64 ": %m.", path, offset))); break; case SLRU_READ_FAILED: if (errno) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("Could not read from file \"%s\" at offset %u: %m.", + errmsg("could not access status of transaction %" PRIu64, xid), + errdetail("Could not read from file \"%s\" at offset %" PRIu64 ": %m.", path, offset))); else ereport(ERROR, - (errmsg("could not access status of transaction %u", xid), - errdetail("Could not read from file \"%s\" at offset %u: read too few bytes.", path, offset))); + (errmsg("could not access status of transaction %" PRIu64, xid), + errdetail("Could not read from file \"%s\" at offset %" PRIu64 ": read too few bytes.", path, offset))); break; case SLRU_WRITE_FAILED: if (errno) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("Could not write to file \"%s\" at offset %u: %m.", + errmsg("could not access status of transaction %" PRIu64, xid), + errdetail("Could not write to file \"%s\" at offset %" PRIu64 ": %m.", path, offset))); else ereport(ERROR, - (errmsg("could not access status of transaction %u", xid), - errdetail("Could not write to file \"%s\" at offset %u: wrote too few bytes.", + (errmsg("could not access status of transaction %" PRIu64, xid), + errdetail("Could not write to file \"%s\" at offset %" PRIu64 ": wrote too few bytes.", path, offset))); break; case SLRU_FSYNC_FAILED: ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %" PRIu64, xid), errdetail("Could not fsync file \"%s\": %m.", path))); break; case SLRU_CLOSE_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %" PRIu64, xid), errdetail("Could not close file \"%s\": %m.", path))); break; @@ -1013,7 +1016,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) * Control lock must be held at entry, and will be held at exit. */ static int -SlruSelectLRUPage(SlruCtl ctl, int pageno) +SlruSelectLRUPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; @@ -1024,10 +1027,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) int cur_count; int bestvalidslot = 0; /* keep compiler quiet */ int best_valid_delta = -1; - int best_valid_page_number = 0; /* keep compiler quiet */ + int64 best_valid_page_number = 0; /* keep compiler quiet */ int bestinvalidslot = 0; /* keep compiler quiet */ int best_invalid_delta = -1; - int best_invalid_page_number = 0; /* keep compiler quiet */ + int64 best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ for (slotno = 0; slotno < shared->num_slots; slotno++) @@ -1068,7 +1071,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) for (slotno = 0; slotno < shared->num_slots; slotno++) { int this_delta; - int this_page_number; + int64 this_page_number; if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; @@ -1158,7 +1161,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied) SlruShared shared = ctl->shared; SlruWriteAllData fdata; int slotno; - int pageno = 0; + int64 pageno = 0; int i; bool ok; @@ -1223,7 +1226,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied) * after it has accrued freshly-written data. */ void -SimpleLruTruncate(SlruCtl ctl, int cutoffPage) +SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage) { SlruShared shared = ctl->shared; int slotno; @@ -1301,7 +1304,7 @@ restart:; * they either can't yet contain anything, or have already been cleaned out. */ static void -SlruInternalDeleteSegment(SlruCtl ctl, int segno) +SlruInternalDeleteSegment(SlruCtl ctl, int64 segno) { char path[MAXPGPATH]; @@ -1324,7 +1327,7 @@ SlruInternalDeleteSegment(SlruCtl ctl, int segno) * Delete an individual SLRU segment, identified by the segment number. */ void -SlruDeleteSegment(SlruCtl ctl, int segno) +SlruDeleteSegment(SlruCtl ctl, int64 segno) { SlruShared shared = ctl->shared; int slotno; @@ -1399,6 +1402,7 @@ SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) } #ifdef USE_ASSERT_CHECKING +#if 0 /* XXX remove unit tests */ static void SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) { @@ -1471,6 +1475,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestPage % SLRU_PAGES_PER_SEGMENT), oldestPage)); } +#endif /* * Unit-test a PagePrecedes function. @@ -1484,10 +1489,12 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) { +#if 0 /* XXX remove unit tests */ /* Test first, middle and last entries of a page. */ SlruPagePrecedesTestOffset(ctl, per_page, 0); SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2); SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1); +#endif } #endif @@ -1497,9 +1504,9 @@ SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) * one containing the page passed as "data". */ bool -SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, void *data) { - int cutoffPage = *(int *) data; + int64 cutoffPage = *(int64 *) data; if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) return true; /* found one; don't iterate any more */ @@ -1512,9 +1519,9 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data * This callback deletes segments prior to the one passed in as "data". */ static bool -SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data) { - int cutoffPage = *(int *) data; + int64 cutoffPage = *(int64 *) data; if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); @@ -1527,7 +1534,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) * This callback deletes all segments. */ bool -SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data) { SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); @@ -1555,8 +1562,8 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) bool retval = false; DIR *cldir; struct dirent *clde; - int segno; - int segpage; + int64 segno; + int64 segpage; cldir = AllocateDir(ctl->Dir); while ((clde = ReadDir(cldir, ctl->Dir)) != NULL) @@ -1565,10 +1572,10 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) len = strlen(clde->d_name); - if ((len == 4 || len == 5 || len == 6) && + if ((len == 12 || len == 13 || len == 14) && strspn(clde->d_name, "0123456789ABCDEF") == len) { - segno = (int) strtol(clde->d_name, NULL, 16); + segno = pg_strtouint64(clde->d_name, NULL, 16); segpage = segno * SLRU_PAGES_PER_SEGMENT; elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s", diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 6a8e521f894..0cf4a1e78e2 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -63,8 +63,8 @@ static SlruCtlData SubTransCtlData; #define SubTransCtl (&SubTransCtlData) -static int ZeroSUBTRANSPage(int pageno); -static bool SubTransPagePrecedes(int page1, int page2); +static bool SubTransPagePrecedes(int64 page1, int64 page2); +static int ZeroSUBTRANSPage(int64 pageno); /* @@ -73,7 +73,7 @@ static bool SubTransPagePrecedes(int page1, int page2); void SubTransSetParent(TransactionId xid, TransactionId parent) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; @@ -108,7 +108,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent) TransactionId SubTransGetParent(TransactionId xid) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; @@ -168,7 +168,7 @@ SubTransGetTopmostTransaction(TransactionId xid) * structure that could lead to an infinite loop, so exit. */ if (!TransactionIdPrecedes(parentXid, previousXid)) - elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u", + elog(ERROR, "pg_subtrans contains invalid entry: xid " XID_FMT " points to parent xid " XID_FMT, previousXid, parentXid); } @@ -211,11 +211,14 @@ void BootStrapSUBTRANS(void) { int slotno; + int64 pageno; + + pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid)); LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE); /* Create and zero the first page of the subtrans log */ - slotno = ZeroSUBTRANSPage(0); + slotno = ZeroSUBTRANSPage(pageno); /* Make sure it's written out */ SimpleLruWritePage(SubTransCtl, slotno); @@ -233,7 +236,7 @@ BootStrapSUBTRANS(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroSUBTRANSPage(int pageno) +ZeroSUBTRANSPage(int64 pageno) { return SimpleLruZeroPage(SubTransCtl, pageno); } @@ -249,8 +252,8 @@ void StartupSUBTRANS(TransactionId oldestActiveXID) { FullTransactionId nextXid; - int startPage; - int endPage; + int64 startPage; + int64 endPage; /* * Since we don't expect pg_subtrans to be valid across crashes, we @@ -268,9 +271,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID) { (void) ZeroSUBTRANSPage(startPage); startPage++; - /* must account for wraparound */ - if (startPage > TransactionIdToPage(MaxTransactionId)) - startPage = 0; } (void) ZeroSUBTRANSPage(startPage); @@ -307,7 +307,7 @@ CheckPointSUBTRANS(void) void ExtendSUBTRANS(TransactionId newestXact) { - int pageno; + int64 pageno; /* * No work except at first XID of a page. But beware: just after @@ -337,7 +337,7 @@ ExtendSUBTRANS(TransactionId newestXact) void TruncateSUBTRANS(TransactionId oldestXact) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -347,6 +347,13 @@ TruncateSUBTRANS(TransactionId oldestXact) * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ + + if (oldestXact <= FirstNormalTransactionId) + { + SimpleLruTruncate(SubTransCtl, 0); + return; + } + TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); @@ -357,18 +364,12 @@ TruncateSUBTRANS(TransactionId oldestXact) /* * Decide whether a SUBTRANS page number is "older" for truncation purposes. * Analogous to CLOGPagePrecedes(). + * + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool -SubTransPagePrecedes(int page1, int page2) +SubTransPagePrecedes(int64 page1, int64 page2) { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId + 1; - xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId + 1; - - return (TransactionIdPrecedes(xid1, xid2) && - TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1)); + return page1 < page2; } diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 1ba4bbead55..e05cbbc6153 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -157,7 +157,7 @@ TransactionIdDidCommit(TransactionId transactionId) parentXid = SubTransGetParent(transactionId); if (!TransactionIdIsValid(parentXid)) { - elog(WARNING, "no pg_subtrans entry for subcommitted XID %u", + elog(WARNING, "no pg_subtrans entry for subcommitted XID " XID_FMT, transactionId); return false; } @@ -206,7 +206,7 @@ TransactionIdDidAbort(TransactionId transactionId) if (!TransactionIdIsValid(parentXid)) { /* see notes in TransactionIdDidCommit */ - elog(WARNING, "no pg_subtrans entry for subcommitted XID %u", + elog(WARNING, "no pg_subtrans entry for subcommitted XID " XID_FMT, transactionId); return true; } @@ -293,70 +293,6 @@ TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids) TRANSACTION_STATUS_ABORTED, InvalidXLogRecPtr); } -/* - * TransactionIdPrecedes --- is id1 logically < id2? - */ -bool -TransactionIdPrecedes(TransactionId id1, TransactionId id2) -{ - /* - * If either ID is a permanent XID then we can just do unsigned - * comparison. If both are normal, do a modulo-2^32 comparison. - */ - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 < id2); - - diff = (int32) (id1 - id2); - return (diff < 0); -} - -/* - * TransactionIdPrecedesOrEquals --- is id1 logically <= id2? - */ -bool -TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) -{ - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 <= id2); - - diff = (int32) (id1 - id2); - return (diff <= 0); -} - -/* - * TransactionIdFollows --- is id1 logically > id2? - */ -bool -TransactionIdFollows(TransactionId id1, TransactionId id2) -{ - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 > id2); - - diff = (int32) (id1 - id2); - return (diff > 0); -} - -/* - * TransactionIdFollowsOrEquals --- is id1 logically >= id2? - */ -bool -TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2) -{ - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 >= id2); - - diff = (int32) (id1 - id2); - return (diff >= 0); -} - /* * TransactionIdLatest --- get latest XID among a main xact and its children diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index eb6ecaef474..f1a83efb45f 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -848,7 +848,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ - elog(ERROR, "failed to find GlobalTransaction for xid %u", xid); + elog(ERROR, "failed to find GlobalTransaction for xid " XID_FMT, xid); cached_xid = xid; cached_gxact = result; @@ -940,7 +940,7 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) /************************************************************************/ #define TwoPhaseFilePath(path, xid) \ - snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid) + snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", (uint32)(xid >> 32), (uint32)xid) /* * 2PC state file format: @@ -1847,13 +1847,13 @@ restoreTwoPhaseData(void) cldir = AllocateDir(TWOPHASE_DIR); while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL) { - if (strlen(clde->d_name) == 8 && - strspn(clde->d_name, "0123456789ABCDEF") == 8) + if (strlen(clde->d_name) == 16 && + strspn(clde->d_name, "0123456789ABCDEF") == 16) { TransactionId xid; char *buf; - xid = (TransactionId) strtoul(clde->d_name, NULL, 16); + xid = (TransactionId) pg_strtouint64(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); @@ -2055,7 +2055,7 @@ RecoverPreparedTransactions(void) continue; ereport(LOG, - (errmsg("recovering prepared transaction %u from shared memory", xid))); + (errmsg("recovering prepared transaction %" PRIu64 " from shared memory", xid))); hdr = (TwoPhaseFileHeader *) buf; Assert(TransactionIdEquals(hdr->xid, xid)); @@ -2146,14 +2146,14 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", + (errmsg("removing stale two-phase state file for transaction \"%" PRIu64 "\"", xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", + (errmsg("removing stale two-phase state from memory for transaction \"%" PRIu64 "\"", xid))); PrepareRedoRemove(xid, true); } @@ -2166,14 +2166,14 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", + (errmsg("removing future two-phase state file for transaction \"%" PRIu64 "\"", xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", + (errmsg("removing future two-phase state from memory for transaction \"%" PRIu64 "\"", xid))); PrepareRedoRemove(xid, true); } @@ -2182,7 +2182,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { - /* Read and validate file */ buf = ReadTwoPhaseFile(xid, false); } else @@ -2198,12 +2197,12 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state file for transaction %u", + errmsg("corrupted two-phase state file for transaction %" PRIu64, xid))); else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state in memory for transaction %u", + errmsg("corrupted two-phase state in memory for transaction %" PRIu64, xid))); } @@ -2359,7 +2358,7 @@ RecordTransactionAbortPrepared(TransactionId xid, * RecordTransactionCommitPrepared ... */ if (TransactionIdDidCommit(xid)) - elog(PANIC, "cannot abort transaction %u, it was already committed", + elog(PANIC, "cannot abort transaction " XID_FMT ", it was already committed", xid); START_CRIT_SECTION(); @@ -2466,7 +2465,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, false /* backward */ , false /* WAL */ ); } - elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid); + elog(DEBUG2, "added 2PC data in shared memory for transaction " XID_FMT, gxact->xid); } /* @@ -2509,7 +2508,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) /* * And now we can clean up any files we may have left. */ - elog(DEBUG2, "removing 2PC data for transaction %u", xid); + elog(DEBUG2, "removing 2PC data for transaction " XID_FMT, xid); if (gxact->ondisk) RemoveTwoPhaseFile(xid, giveWarning); RemoveGXact(gxact); diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index a6e98e71bd1..f52dbc92924 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -68,7 +68,7 @@ GetNewTransactionId(bool isSubXact) Assert(!isSubXact); MyProc->xid = BootstrapTransactionId; ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId; - return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId); + return FullTransactionIdFromXid(BootstrapTransactionId); } /* safety check, we should never get this far in a HS standby */ @@ -102,11 +102,6 @@ GetNewTransactionId(bool isSubXact) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; - TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; - TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; - Oid oldest_datoid = ShmemVariableCache->oldestXidDB; - LWLockRelease(XidGenLock); /* @@ -117,48 +112,6 @@ GetNewTransactionId(bool isSubXact) if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, xidStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); full_xid = ShmemVariableCache->nextXid; @@ -270,7 +223,7 @@ ReadNextFullTransactionId(void) } /* - * Advance nextXid to the value after a given xid. The epoch is inferred. + * Advance nextXid to the value after a given xid. * This must only be called during recovery or from two-phase start-up code. */ void @@ -278,7 +231,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) { FullTransactionId newNextFullXid; TransactionId next_xid; - uint32 epoch; /* * It is safe to read nextXid without a lock, because this is only called @@ -292,19 +244,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) if (!TransactionIdFollowsOrEquals(xid, next_xid)) return; - /* - * Compute the FullTransactionId that comes after the given xid. To do - * this, we preserve the existing epoch, but detect when we've wrapped - * into a new epoch. This is necessary because WAL records and 2PC state - * currently contain 32 bit xids. The wrap logic is safe in those cases - * because the span of active xids cannot exceed one epoch at any given - * point in the WAL stream. - */ + /* Compute the FullTransactionId that comes after the given xid. */ TransactionIdAdvance(xid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - if (unlikely(xid < next_xid)) - ++epoch; - newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid); + newNextFullXid = FullTransactionIdFromXid(xid); /* * We still need to take a lock to modify the value when there are @@ -345,54 +287,10 @@ void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; - TransactionId xidWarnLimit; - TransactionId xidStopLimit; - TransactionId xidWrapLimit; TransactionId curXid; Assert(TransactionIdIsNormal(oldest_datfrozenxid)); - /* - * The place where we actually get into deep trouble is halfway around - * from the oldest potentially-existing XID. (This calculation is - * probably off by one or two counts, because the special XIDs reduce the - * size of the loop a little bit. But we throw in plenty of slop below, - * so it doesn't matter.) - */ - xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1); - if (xidWrapLimit < FirstNormalTransactionId) - xidWrapLimit += FirstNormalTransactionId; - - /* - * We'll refuse to continue assigning XIDs in interactive mode once we get - * within 3M transactions of data loss. This leaves lots of room for the - * DBA to fool around fixing things in a standalone backend, while not - * being significant compared to total XID space. (VACUUM requires an XID - * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA - * might do by reflex, assigns an XID. Hence, we had better be sure - * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two - * completely-idle segments. In the event of edge-case bugs involving - * page or segment arithmetic, idle segments render the bugs unreachable - * outside of single-user mode. - */ - xidStopLimit = xidWrapLimit - 3000000; - if (xidStopLimit < FirstNormalTransactionId) - xidStopLimit -= FirstNormalTransactionId; - - /* - * We'll start complaining loudly when we get within 40M transactions of - * data loss. This is kind of arbitrary, but if you let your gas gauge - * get down to 2% of full, would you be looking for the next gas station? - * We need to be fairly liberal about this number because there are lots - * of scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - xidWarnLimit = xidWrapLimit - 40000000; - if (xidWarnLimit < FirstNormalTransactionId) - xidWarnLimit -= FirstNormalTransactionId; - /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. @@ -416,18 +314,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->oldestXid = oldest_datfrozenxid; ShmemVariableCache->xidVacLimit = xidVacLimit; - ShmemVariableCache->xidWarnLimit = xidWarnLimit; - ShmemVariableCache->xidStopLimit = xidStopLimit; - ShmemVariableCache->xidWrapLimit = xidWrapLimit; ShmemVariableCache->oldestXidDB = oldest_datoid; curXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); LWLockRelease(XidGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("transaction ID wrap limit is %u, limited by database with OID %u", - xidWrapLimit, oldest_datoid))); - /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -438,41 +328,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index e7b0bc804d8..d1c9412d9b0 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1726,7 +1726,7 @@ RecordTransactionAbort(bool isSubXact) * Check that we haven't aborted halfway through RecordTransactionCommit. */ if (TransactionIdDidCommit(xid)) - elog(PANIC, "cannot abort transaction %u, it was already committed", + elog(PANIC, "cannot abort transaction " XID_FMT ", it was already committed", xid); /* Fetch the data we need for the abort record */ @@ -5436,22 +5436,22 @@ ShowTransactionStateRec(const char *str, TransactionState s) { int i; - appendStringInfo(&buf, ", children: %u", s->childXids[0]); + appendStringInfo(&buf, ", children: " XID_FMT, s->childXids[0]); for (i = 1; i < s->nChildXids; i++) - appendStringInfo(&buf, " %u", s->childXids[i]); + appendStringInfo(&buf, " " XID_FMT, s->childXids[i]); } if (s->parent) ShowTransactionStateRec(str, s->parent); ereport(DEBUG5, - (errmsg_internal("%s(%d) name: %s; blockState: %s; state: %s, xid/subid/cid: %u/%u/%u%s%s", + (errmsg_internal("%s(%d) name: %s; blockState: %s; state: %s, xid/subid/cid: " XID_FMT "/" XID_FMT "/%u%s%s", str, s->nestingLevel, PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), - (unsigned int) XidFromFullTransactionId(s->fullTransactionId), - (unsigned int) s->subTransactionId, + XidFromFullTransactionId(s->fullTransactionId), + s->subTransactionId, (unsigned int) currentCommandId, currentCommandIdUsed ? " (used)" : "", buf.data))); @@ -5634,6 +5634,17 @@ XactLogCommitRecord(TimestampTz commit_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32)(twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32)(twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; @@ -5647,16 +5658,6 @@ XactLogCommitRecord(TimestampTz commit_time, xl_invals.nmsgs = nmsgs; } - if (TransactionIdIsValid(twophase_xid)) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; - Assert(twophase_gid != NULL); - - if (XLogLogicalInfoActive()) - xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; - } - /* dump transaction origin information */ if (replorigin_session_origin != InvalidRepOriginId) { @@ -5767,23 +5768,24 @@ XactLogAbortRecord(TimestampTz abort_time, xl_subxacts.nsubxacts = nsubxacts; } - if (nrels > 0) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; - xl_relfilenodes.nrels = nrels; - info |= XLR_SPECIAL_REL_UPDATE; - } - if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; + xl_twophase.xid_lo = (uint32)(twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32)(twophase_xid >> 32); Assert(twophase_gid != NULL); if (XLogLogicalInfoActive()) xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; } + if (nrels > 0) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; + xl_relfilenodes.nrels = nrels; + info |= XLR_SPECIAL_REL_UPDATE; + } + if (TransactionIdIsValid(twophase_xid) && XLogLogicalInfoActive()) { xl_xinfo.xinfo |= XACT_XINFO_HAS_DBINFO; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1e1fbe957fa..346ae2ae6b1 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -116,6 +116,10 @@ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; bool track_wal_io_timing = false; +TransactionId start_xid = 0; +MultiXactId start_mx_id = 0; +MultiXactOffset start_mx_offset = 0; + #ifdef WAL_DEBUG bool XLOG_DEBUG = false; #endif @@ -5395,13 +5399,13 @@ BootStrapXLOG(void) checkPoint.PrevTimeLineID = BootstrapTimeLineID; checkPoint.fullPageWrites = fullPageWrites; checkPoint.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + FullTransactionIdFromXid(Max(FirstNormalTransactionId + 1, start_xid)); checkPoint.nextOid = FirstGenbkiObjectId; - checkPoint.nextMulti = FirstMultiXactId; - checkPoint.nextMultiOffset = 0; - checkPoint.oldestXid = FirstNormalTransactionId; + checkPoint.nextMulti = Max(FirstMultiXactId + 1, start_mx_id); + checkPoint.nextMultiOffset = start_mx_offset; + checkPoint.oldestXid = XidFromFullTransactionId(checkPoint.nextXid) - 1; checkPoint.oldestXidDB = TemplateDbOid; - checkPoint.oldestMulti = FirstMultiXactId; + checkPoint.oldestMulti = checkPoint.nextMulti - 1; checkPoint.oldestMultiDB = TemplateDbOid; checkPoint.oldestCommitTsXid = InvalidTransactionId; checkPoint.newestCommitTsXid = InvalidTransactionId; @@ -6048,14 +6052,14 @@ recoveryStopsBefore(XLogReaderState *record) if (isCommit) { ereport(LOG, - (errmsg("recovery stopping before commit of transaction %u, time %s", + (errmsg("recovery stopping before commit of transaction %" PRIu64 ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } else { ereport(LOG, - (errmsg("recovery stopping before abort of transaction %u, time %s", + (errmsg("recovery stopping before abort of transaction %" PRIu64 ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } @@ -6193,7 +6197,7 @@ recoveryStopsAfter(XLogReaderState *record) xact_info == XLOG_XACT_COMMIT_PREPARED) { ereport(LOG, - (errmsg("recovery stopping after commit of transaction %u, time %s", + (errmsg("recovery stopping after commit of transaction %" PRIu64 ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } @@ -6201,7 +6205,7 @@ recoveryStopsAfter(XLogReaderState *record) xact_info == XLOG_XACT_ABORT_PREPARED) { ereport(LOG, - (errmsg("recovery stopping after abort of transaction %u, time %s", + (errmsg("recovery stopping after abort of transaction %" PRIu64 ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } @@ -6237,7 +6241,7 @@ getRecoveryStopReason(void) if (recoveryTarget == RECOVERY_TARGET_XID) snprintf(reason, sizeof(reason), - "%s transaction %u", + "%s transaction " XID_FMT, recoveryStopAfter ? "after" : "before", recoveryStopXid); else if (recoveryTarget == RECOVERY_TARGET_TIME) @@ -6832,7 +6836,7 @@ StartupXLOG(void) (errmsg("entering standby mode"))); else if (recoveryTarget == RECOVERY_TARGET_XID) ereport(LOG, - (errmsg("starting point-in-time recovery to XID %u", + (errmsg("starting point-in-time recovery to XID %" PRIu64, recoveryTargetXid))); else if (recoveryTarget == RECOVERY_TARGET_TIME) ereport(LOG, @@ -7129,16 +7133,16 @@ StartupXLOG(void) U64FromFullTransactionId(checkPoint.nextXid), checkPoint.nextOid))); ereport(DEBUG1, - (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u", + (errmsg_internal("next MultiXactId: " XID_FMT "; next MultiXactOffset: " INT64_FORMAT, checkPoint.nextMulti, checkPoint.nextMultiOffset))); ereport(DEBUG1, - (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u", + (errmsg_internal("oldest unfrozen transaction ID: " XID_FMT ", in database %u", checkPoint.oldestXid, checkPoint.oldestXidDB))); ereport(DEBUG1, - (errmsg_internal("oldest MultiXactId: %u, in database %u", + (errmsg_internal("oldest MultiXactId: " XID_FMT ", in database %u", checkPoint.oldestMulti, checkPoint.oldestMultiDB))); ereport(DEBUG1, - (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u", + (errmsg_internal("commit timestamp Xid oldest/newest: " XID_FMT "/" XID_FMT, checkPoint.oldestCommitTsXid, checkPoint.newestCommitTsXid))); if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid))) @@ -9394,7 +9398,7 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -10411,7 +10415,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -10470,7 +10474,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 3a7de025657..7181b79adf2 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1656,37 +1656,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) return true; } - -#ifndef FRONTEND - -/* - * Extract the FullTransactionId from a WAL record. - */ -FullTransactionId -XLogRecGetFullXid(XLogReaderState *record) -{ - TransactionId xid, - next_xid; - uint32 epoch; - - /* - * This function is only safe during replay, because it depends on the - * replay state. See AdvanceNextFullTransactionIdPastXid() for more. - */ - Assert(AmStartupProcess() || !IsUnderPostmaster); - - xid = XLogRecGetXid(record); - next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - - /* - * If xid is numerically greater than next_xid, it has to be from the last - * epoch. - */ - if (unlikely(xid > next_xid)) - --epoch; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - -#endif diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 48615c0ebcb..cde5413b2f6 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -120,7 +120,7 @@ static const struct typinfo TypInfo[] = { F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid, F_TIDIN, F_TIDOUT}, - {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_DOUBLE, TYPSTORAGE_PLAIN, InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_CIDIN, F_CIDOUT}, @@ -221,7 +221,11 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) argv++; argc--; - while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:-:")) != -1) + start_xid = 0; + start_mx_id = 0; + start_mx_offset = 0; + + while ((flag = getopt(argc, argv, "B:c:d:D:Fkm:o:r:X:Z:-:")) != -1) { switch (flag) { @@ -250,9 +254,30 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) case 'k': bootstrap_data_checksum_version = PG_DATA_CHECKSUM_VERSION; break; + case 'm': + if (sscanf(optarg, XID_FMT, &start_mx_id) != 1 + || !StartMultiXactIdIsValid(start_mx_id)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid start multixact id value"))); + break; + case 'o': + if (sscanf(optarg, XID_FMT, &start_mx_offset) != 1 + || !StartMultiXactOffsetIsValid(start_mx_offset)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid start multixact offset value"))); + break; case 'r': strlcpy(OutputFileName, optarg, MAXPGPATH); break; + case 'Z': + if (sscanf(optarg, XID_FMT, &start_xid) != 1 + || !StartTransactionIdIsValid(start_xid)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid start xid value"))); + break; case 'X': { int WalSegSz = strtoul(optarg, NULL, 0); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 6780ec53b7c..0bd28fef992 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -171,8 +171,8 @@ static const FormData_pg_attribute a2 = { .attnum = MinTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_DOUBLE, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, @@ -199,8 +199,8 @@ static const FormData_pg_attribute a4 = { .attnum = MaxTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_DOUBLE, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index ae990d48776..0d485ebc92a 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -146,7 +146,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached, TransactionId xmin; Snapshot snap; - xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data); + xmin = HeapTupleGetXmin(inheritsTuple); snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 85570085450..de1dc2975bd 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -186,8 +186,8 @@ typedef struct AsyncQueueEntry char data[NAMEDATALEN + NOTIFY_PAYLOAD_MAX_LENGTH]; } AsyncQueueEntry; -/* Currently, no field of AsyncQueueEntry requires more than int alignment */ -#define QUEUEALIGN(len) INTALIGN(len) +/* AsyncQueueEntry.xid requires 8-byte alignment */ +#define QUEUEALIGN(len) MAXALIGN(len) #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) @@ -444,7 +444,7 @@ bool Trace_notify = false; /* local function prototypes */ static int asyncQueuePageDiff(int p, int q); -static bool asyncQueuePagePrecedes(int p, int q); +static bool asyncQueuePagePrecedes(int64 p, int64 q); static void queue_listen(ListenActionKind action, const char *channel); static void Async_UnlistenOnExit(int code, Datum arg); static void Exec_ListenPreCommit(void); @@ -497,16 +497,10 @@ asyncQueuePageDiff(int p, int q) return diff; } -/* - * Is p < q, accounting for wraparound? - * - * Since asyncQueueIsFull() blocks creation of a page that could precede any - * extant page, we need not assess entries within a page. - */ static bool -asyncQueuePagePrecedes(int p, int q) +asyncQueuePagePrecedes(int64 p, int64 q) { - return asyncQueuePageDiff(p, q) < 0; + return p < q; } /* diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 72bfdc07a49..5fa2d144604 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -55,16 +55,6 @@ */ #define SEQ_LOG_VALS 32 -/* - * The "special area" of a sequence's buffer page looks like this. - */ -#define SEQ_MAGIC 0x1717 - -typedef struct sequence_magic -{ - uint32 magic; -} sequence_magic; - /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't @@ -342,10 +332,9 @@ ResetSequence(Oid seq_relid) static void fill_seq_with_data(Relation rel, HeapTuple tuple) { - Buffer buf; - Page page; - sequence_magic *sm; - OffsetNumber offnum; + Buffer buf; + Page page; + OffsetNumber offnum; /* Initialize first page of relation with special magic number */ @@ -354,9 +343,8 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) page = BufferGetPage(buf); - PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(page); - sm->magic = SEQ_MAGIC; + PageInit(page, BufferGetPageSize(buf), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; /* Now insert sequence tuple */ @@ -369,10 +357,10 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleSetXmin(tuple, FrozenTransactionId); HeapTupleHeaderSetXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(tuple, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); @@ -1170,25 +1158,19 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) { Page page; ItemId lp; - sequence_magic *sm; Form_pg_sequence_data seq; *buf = ReadBuffer(rel, 0); LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(*buf); - sm = (sequence_magic *) PageGetSpecialPointer(page); - - if (sm->magic != SEQ_MAGIC) - elog(ERROR, "bad magic number in sequence \"%s\": %08X", - RelationGetRelationName(rel), sm->magic); - lp = PageGetItemId(page, FirstOffsetNumber); Assert(ItemIdIsNormal(lp)); /* Note we currently only bother to set these two fields of *seqdatatuple */ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); seqdatatuple->t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(seqdatatuple, page); /* * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on @@ -1199,9 +1181,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + if (HeapTupleGetRawXmax(seqdatatuple) != InvalidTransactionId) { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(seqdatatuple, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); @@ -1863,7 +1845,6 @@ seq_redo(XLogReaderState *record) char *item; Size itemsz; xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); - sequence_magic *sm; if (info != XLOG_SEQ_LOG) elog(PANIC, "seq_redo: unknown op code %u", info); @@ -1882,9 +1863,8 @@ seq_redo(XLogReaderState *record) */ localpage = (Page) palloc(BufferGetPageSize(buffer)); - PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(localpage); - sm->magic = SEQ_MAGIC; + PageInit(localpage, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(localpage)->pd_xid_base = RecentXmin - FirstNormalTransactionId; item = (char *) xlrec + sizeof(xl_seq_rec); itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c94c187d360..fe48e8edbfa 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -63,13 +63,12 @@ /* * GUC parameters */ -int vacuum_freeze_min_age; -int vacuum_freeze_table_age; -int vacuum_multixact_freeze_min_age; -int vacuum_multixact_freeze_table_age; -int vacuum_failsafe_age; -int vacuum_multixact_failsafe_age; - +int64 vacuum_freeze_min_age; +int64 vacuum_freeze_table_age; +int64 vacuum_multixact_freeze_min_age; +int64 vacuum_multixact_freeze_table_age; +int64 vacuum_failsafe_age; +int64 vacuum_multixact_failsafe_age; /* A few variables that don't seem worth passing around as parameters */ static MemoryContext vac_context = NULL; @@ -962,24 +961,26 @@ get_all_vacuum_rels(int options) */ void vacuum_set_xid_limits(Relation rel, - int freeze_min_age, - int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, + int64 freeze_min_age, + int64 freeze_table_age, + int64 multixact_freeze_min_age, + int64 multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit) { - int freezemin; - int mxid_freezemin; - int effective_multixact_freeze_max_age; + int64 freezemin; + int64 mxid_freezemin; + int64 effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; + TransactionId nextXid; MultiXactId oldestMxact; MultiXactId mxactLimit; MultiXactId safeMxactLimit; + MultiXactId nextMxactId; /* * We can always ignore processes running lazy vacuum. This is because we @@ -1028,8 +1029,10 @@ vacuum_set_xid_limits(Relation rel, /* * Compute the cutoff XID, being careful not to generate a "permanent" XID */ - limit = *oldestXmin - freezemin; - if (!TransactionIdIsNormal(limit)) + limit = *oldestXmin; + if (limit > FirstNormalTransactionId + freezemin) + limit -= freezemin; + else limit = FirstNormalTransactionId; /* @@ -1037,15 +1040,17 @@ vacuum_set_xid_limits(Relation rel, * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum * freeze age of zero. */ - safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age; - if (!TransactionIdIsNormal(safeLimit)) + nextXid = ReadNextTransactionId(); + if (nextXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + safeLimit = nextXid - autovacuum_freeze_max_age; + else safeLimit = FirstNormalTransactionId; if (TransactionIdPrecedes(limit, safeLimit)) { ereport(WARNING, (errmsg("oldest xmin is far in the past"), - errhint("Close open transactions soon to avoid wraparound problems.\n" + errhint("Close open transactions soon to enable SLRU truncation.\n" "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); limit = *oldestXmin; } @@ -1057,7 +1062,7 @@ vacuum_set_xid_limits(Relation rel, * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Determine the minimum multixact freeze age to use: as specified by @@ -1078,16 +1083,23 @@ vacuum_set_xid_limits(Relation rel, if (mxactLimit < FirstMultiXactId) mxactLimit = FirstMultiXactId; - safeMxactLimit = - ReadNextMultiXactId() - effective_multixact_freeze_max_age; - if (safeMxactLimit < FirstMultiXactId) + nextMxactId = ReadNextMultiXactId(); + if (nextMxactId > FirstMultiXactId + effective_multixact_freeze_max_age) + safeMxactLimit = nextMxactId - effective_multixact_freeze_max_age; + else safeMxactLimit = FirstMultiXactId; if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) { ereport(WARNING, - (errmsg("oldest multixact is far in the past"), - errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); + (errmsg("oldest multixact is far in the past: " + INT64_FORMAT " " INT64_FORMAT " " + INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT " " + INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT " ", + multixact_freeze_min_age, vacuum_multixact_freeze_min_age, + mxactLimit, mxid_freezemin, oldestMxact, + safeMxactLimit, effective_multixact_freeze_max_age, nextMxactId), + errhint("Close open transactions with multixacts soon to enable SLRU truncation."))); /* Use the safe limit, unless an older mxact is still running */ if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit)) mxactLimit = oldestMxact; @@ -1099,7 +1111,7 @@ vacuum_set_xid_limits(Relation rel, if (xidFullScanLimit != NULL) { - int freezetable; + int64 freezetable; Assert(mxactFullScanLimit != NULL); @@ -1120,8 +1132,10 @@ vacuum_set_xid_limits(Relation rel, * Compute XID limit causing a full-table vacuum, being careful not to * generate a "permanent" XID. */ - limit = ReadNextTransactionId() - freezetable; - if (!TransactionIdIsNormal(limit)) + limit = ReadNextTransactionId(); + if (limit > FirstNormalTransactionId + freezetable) + limit -= freezetable; + else limit = FirstNormalTransactionId; *xidFullScanLimit = limit; @@ -1145,8 +1159,10 @@ vacuum_set_xid_limits(Relation rel, * Compute MultiXact limit causing a full-table vacuum, being careful * to generate a valid MultiXact value. */ - mxactLimit = ReadNextMultiXactId() - freezetable; - if (mxactLimit < FirstMultiXactId) + mxactLimit = ReadNextMultiXactId(); + if (mxactLimit > FirstMultiXactId + freezetable) + mxactLimit -= freezetable; + else mxactLimit = FirstMultiXactId; *mxactFullScanLimit = mxactLimit; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index eb49817cee4..f2ec8ae70da 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3154,6 +3154,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuphdr; heap_deform_tuple(&tmptup, tupDesc, diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 4ab1302313f..ca4ae69d888 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1070,6 +1070,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull) tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuple; result = heap_getattr(&tmptup, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index d328856ae5b..fac14e51bcf 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2538,6 +2538,7 @@ ExecModifyTable(PlanState *pstate) HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ + HeapTupleSetZeroBase(&oldtupdata); oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 0568ae123f0..47fe8f9283e 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1061,6 +1061,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(mtuple, tuple); } else { diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 91a89b6d51f..2fa23e46a88 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2771,8 +2771,8 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_STRING_FIELD(idxcomment); WRITE_OID_FIELD(indexOid); WRITE_OID_FIELD(oldNode); - WRITE_UINT_FIELD(oldCreateSubid); - WRITE_UINT_FIELD(oldFirstRelfilenodeSubid); + WRITE_UINT64_FIELD(oldCreateSubid); + WRITE_UINT64_FIELD(oldFirstRelfilenodeSubid); WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(primary); WRITE_BOOL_FIELD(isconstraint); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 564a38a13e9..62c69962f5a 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -227,7 +227,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple), TransactionXmin)) { root->glob->transientPlan = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index f6d05628764..78691e12456 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -121,8 +121,8 @@ int autovacuum_vac_ins_thresh; double autovacuum_vac_ins_scale; int autovacuum_anl_thresh; double autovacuum_anl_scale; -int autovacuum_freeze_max_age; -int autovacuum_multixact_freeze_max_age; +int64 autovacuum_freeze_max_age; +int64 autovacuum_multixact_freeze_max_age; double autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; @@ -148,10 +148,10 @@ static TransactionId recentXid; static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ -static int default_freeze_min_age; -static int default_freeze_table_age; -static int default_multixact_freeze_min_age; -static int default_multixact_freeze_table_age; +static int64 default_freeze_min_age; +static int64 default_freeze_table_age; +static int64 default_multixact_freeze_min_age; +static int64 default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; @@ -327,15 +327,15 @@ static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age); + int64 effective_multixact_freeze_max_age); static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1157,6 +1157,7 @@ do_start_worker(void) ListCell *cell; TransactionId xidForceLimit; MultiXactId multiForceLimit; + int64 multiMembersThreshold; bool for_xid_wrap; bool for_multi_wrap; avw_dbase *avdb; @@ -1196,17 +1197,18 @@ do_start_worker(void) * particular tables, but not loosened.) */ recentXid = ReadNextTransactionId(); - xidForceLimit = recentXid - autovacuum_freeze_max_age; - /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ - /* this can cause the limit to go backwards by 3, but that's OK */ - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + xidForceLimit = recentXid - autovacuum_freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + multiMembersThreshold = autovacuum_multixact_freeze_max_age; + if (recentMulti > FirstMultiXactId + multiMembersThreshold) + multiForceLimit = recentMulti - multiMembersThreshold; + else + multiForceLimit = FirstMultiXactId; /* * Choose a database to connect to. We pick the database that was least @@ -1971,7 +1973,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; - int effective_multixact_freeze_max_age; + int64 effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; @@ -2007,7 +2009,7 @@ do_autovacuum(void) * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Find the pg_database entry and select the default freeze ages. We use @@ -2803,7 +2805,7 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age) + int64 effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2872,10 +2874,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, /* OK, it needs something done */ if (doanalyze || dovacuum) { - int freeze_min_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_table_age; int vac_cost_limit; double vac_cost_delay; int log_min_duration; @@ -2995,7 +2997,7 @@ static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound) @@ -3064,7 +3066,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -3093,8 +3095,8 @@ relation_needs_vacanalyze(Oid relid, anltuples; /* freeze parameters */ - int freeze_max_age; - int multixact_freeze_max_age; + int64 freeze_max_age; + int64 multixact_freeze_max_age; TransactionId xidForceLimit; MultiXactId multiForceLimit; @@ -3144,17 +3146,19 @@ relation_needs_vacanalyze(Oid relid, av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ - xidForceLimit = recentXid - freeze_max_age; - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + freeze_max_age) + xidForceLimit = recentXid - freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) && TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit)); if (!force_vacuum) { - multiForceLimit = recentMulti - multixact_freeze_max_age; - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + if (recentMulti > FirstMultiXactId + multixact_freeze_max_age) + multiForceLimit = recentMulti - multixact_freeze_max_age; + else + multiForceLimit = FirstMultiXactId; force_vacuum = MultiXactIdIsValid(classForm->relminmxid) && MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 59aed6cee6c..66f178aa4f1 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -179,6 +179,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *recor case RM_COMMIT_TS_ID: case RM_REPLORIGIN_ID: case RM_GENERIC_ID: + case RM_HEAP3_ID: /* just deal with xid, and done */ ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(record), buf.origptr); @@ -909,8 +910,12 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_insert *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_insert *) rec_data; /* * Ignore insert records without new tuples (this does happen when @@ -966,8 +971,12 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferChange *change; char *data; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_update *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_update *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); @@ -1127,8 +1136,12 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) char *tupledata; Size tuplelen; RelFileNode rnode; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_multi_insert *) rec_data; /* * Ignore insert records without new tuples. This happens when a @@ -1185,6 +1198,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * We can only figure this out after reassembling the transactions. */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&(tuple->tuple)); tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; @@ -1276,6 +1290,7 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple) /* we can only figure this out after reassembling the transactions */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&(tuple->tuple)); /* data is not stored aligned, copy to aligned storage */ memcpy((char *) &xlhdr, diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 10cbdea124c..7a110fd2d7c 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -1614,7 +1614,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin) SpinLockRelease(&slot->mutex); if (got_new_xmin) - elog(DEBUG1, "got new catalog xmin %u at %X/%X", xmin, + elog(DEBUG1, "got new catalog xmin " XID_FMT " at %X/%X", xmin, LSN_FORMAT_ARGS(current_lsn)); /* candidate already valid with the current flush position, apply */ diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index 9f5bf4b639f..fe02a2e5140 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -50,7 +50,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn) /* fixed fields */ pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); } /* @@ -64,7 +64,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data) if (begin_data->final_lsn == InvalidXLogRecPtr) elog(ERROR, "final_lsn not set in begin message"); begin_data->committime = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); } diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 7aa5647a2c6..c803a1dbf9c 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2442,7 +2442,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, /* this is just a sanity check against bad output plugin behaviour */ if (GetCurrentTransactionIdIfAny() != InvalidTransactionId) - elog(ERROR, "output plugin used XID %u", + elog(ERROR, "output plugin used XID " XID_FMT, GetCurrentTransactionId()); /* @@ -2881,7 +2881,7 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid) if (TransactionIdPrecedes(txn->xid, oldestRunningXid)) { - elog(DEBUG2, "aborting old transaction %u", txn->xid); + elog(DEBUG2, "aborting old transaction " XID_FMT, txn->xid); /* remove potential on-disk data, and deallocate this tx */ ReorderBufferCleanupTXN(rb, txn); @@ -3523,7 +3523,7 @@ ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) Size spilled = 0; Size size = txn->size; - elog(DEBUG2, "spill %u changes in XID %u to disk", + elog(DEBUG2, "spill %u changes in XID " XID_FMT " to disk", (uint32) txn->nentries_mem, txn->xid); /* do the same to all child TXs */ @@ -3800,7 +3800,7 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, errno = save_errno ? save_errno : ENOSPC; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write to data file for XID %u: %m", + errmsg("could not write to data file for XID %" PRIu64 ": %m", txn->xid))); } pgstat_report_wait_end(); @@ -4443,7 +4443,7 @@ ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid XLogSegNoOffsetToRecPtr(segno, 0, wal_segment_size, recptr); - snprintf(path, MAXPGPATH, "pg_replslot/%s/xid-%u-lsn-%X-%X.spill", + snprintf(path, MAXPGPATH, "pg_replslot/%s/xid-" XID_FMT "-lsn-%X-%X.spill", NameStr(MyReplicationSlot->data.name), xid, LSN_FORMAT_ARGS(recptr)); } @@ -5024,8 +5024,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; - uint32 f_hi, - f_lo; + uint32 f_lsn_hi, + f_lsn_lo, + f_mapped_xid_hi, + f_mapped_xid_lo, + f_create_xid_hi, + f_create_xid_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -5037,11 +5041,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &f_dboid, &f_relid, &f_hi, &f_lo, - &f_mapped_xid, &f_create_xid) != 6) + &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo, + &f_mapped_xid_hi, &f_mapped_xid_lo, + &f_create_xid_hi, &f_create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - f_lsn = ((uint64) f_hi) << 32 | f_lo; + f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo; + f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo; + f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo; /* mapping for another database */ if (f_dboid != dboid) @@ -5074,7 +5081,7 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) { RewriteMappingFile *f = (RewriteMappingFile *) lfirst(file); - elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname, + elog(DEBUG1, "applying mapping: \"%s\" in " XID_FMT, f->fname, snapshot->subxip[0]); ApplyLogicalMappingFile(tuplecid_data, relid, f->fname); pfree(f); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index dbdc172a2bd..3131a6e8f52 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -847,7 +847,7 @@ SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn) if (rbtxn_prepared(txn) || rbtxn_skip_prepared(txn)) continue; - elog(DEBUG2, "adding a new snapshot to %u at %X/%X", + elog(DEBUG2, "adding a new snapshot to " XID_FMT " at %X/%X", txn->xid, LSN_FORMAT_ARGS(lsn)); /* @@ -922,7 +922,7 @@ SnapBuildPurgeCommittedTxn(SnapBuild *builder) memcpy(builder->committed.xip, workspace, surviving_xids * sizeof(TransactionId)); - elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u", + elog(DEBUG3, "purged committed transactions from %u to %u, xmin: " XID_FMT ", xmax: " XID_FMT, (uint32) builder->committed.xcnt, (uint32) surviving_xids, builder->xmin, builder->xmax); builder->committed.xcnt = surviving_xids; @@ -988,7 +988,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, sub_needs_timetravel = true; needs_snapshot = true; - elog(DEBUG1, "found subtransaction %u:%u with catalog changes", + elog(DEBUG1, "found subtransaction " XID_FMT ":" XID_FMT " with catalog changes", xid, subxid); SnapBuildAddCommittedTxn(builder, subxid); @@ -1014,7 +1014,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, /* if top-level modified catalog, it'll need a snapshot */ if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid)) { - elog(DEBUG2, "found top level transaction %u, with catalog changes", + elog(DEBUG2, "found top level transaction " XID_FMT ", with catalog changes", xid); needs_snapshot = true; needs_timetravel = true; @@ -1027,7 +1027,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, } else if (needs_timetravel) { - elog(DEBUG2, "forced transaction %u to do timetravel", xid); + elog(DEBUG2, "forced transaction " XID_FMT " to do timetravel", xid); SnapBuildAddCommittedTxn(builder, xid); } @@ -1149,7 +1149,7 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact xmin = ReorderBufferGetOldestXmin(builder->reorder); if (xmin == InvalidTransactionId) xmin = running->oldestRunningXid; - elog(DEBUG3, "xmin: %u, xmax: %u, oldest running: %u, oldest xmin: %u", + elog(DEBUG3, "xmin: " XID_FMT ", xmax: " XID_FMT ", oldest running: " XID_FMT ", oldest xmin: " XID_FMT, builder->xmin, builder->xmax, running->oldestRunningXid, xmin); LogicalIncreaseXminForSlot(lsn, xmin); @@ -1239,7 +1239,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn ereport(DEBUG1, (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", LSN_FORMAT_ARGS(lsn)), - errdetail_internal("initial xmin horizon of %u vs the snapshot's %u", + errdetail_internal("initial xmin horizon of " XID_FMT " vs the snapshot's " XID_FMT, builder->initial_xmin_horizon, running->oldestRunningXid))); @@ -1322,7 +1322,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn ereport(LOG, (errmsg("logical decoding found initial starting point at %X/%X", LSN_FORMAT_ARGS(lsn)), - errdetail("Waiting for transactions (approximately %d) older than %u to end.", + errdetail("Waiting for transactions (approximately %d) older than %" PRIu64 " to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); @@ -1346,7 +1346,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn ereport(LOG, (errmsg("logical decoding found initial consistent point at %X/%X", LSN_FORMAT_ARGS(lsn)), - errdetail("Waiting for transactions (approximately %d) older than %u to end.", + errdetail("Waiting for transactions (approximately %d) older than %" PRIu64 " to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 2e79302a48a..e1054a6e940 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1044,7 +1044,7 @@ apply_handle_stream_prepare(StringInfo s) logicalrep_read_stream_prepare(s, &prepare_data); set_apply_error_context_xact(prepare_data.xid, prepare_data.prepare_time); - elog(DEBUG1, "received prepare for streamed transaction %u", prepare_data.xid); + elog(DEBUG1, "received prepare for streamed transaction " XID_FMT, prepare_data.xid); /* Replay all the spooled operations. */ apply_spooled_messages(prepare_data.xid, prepare_data.prepare_lsn); @@ -1426,7 +1426,7 @@ apply_handle_stream_commit(StringInfo s) xid = logicalrep_read_stream_commit(s, &commit_data); set_apply_error_context_xact(xid, commit_data.committime); - elog(DEBUG1, "received commit for streamed transaction %u", xid); + elog(DEBUG1, "received commit for streamed transaction " XID_FMT, xid); apply_spooled_messages(xid, commit_data.commit_lsn); @@ -3155,14 +3155,14 @@ subxact_info_add(TransactionId xid) static inline void subxact_filename(char *path, Oid subid, TransactionId xid) { - snprintf(path, MAXPGPATH, "%u-%u.subxacts", subid, xid); + snprintf(path, MAXPGPATH, "%u-" XID_FMT ".subxacts", subid, xid); } /* format filename for file containing serialized changes */ static inline void changes_filename(char *path, Oid subid, TransactionId xid) { - snprintf(path, MAXPGPATH, "%u-%u.changes", subid, xid); + snprintf(path, MAXPGPATH, "%u-" XID_FMT ".changes", subid, xid); } /* @@ -3324,7 +3324,7 @@ TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid) (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg_internal("invalid two-phase transaction ID"))); - snprintf(gid, szgid, "pg_gid_%u_%u", subid, xid); + snprintf(gid, szgid, "pg_gid_%u_" XID_FMT, subid, xid); } /* Logical Replication Apply worker entry point */ @@ -3641,7 +3641,7 @@ apply_error_callback(void *arg) /* append transaction information */ if (TransactionIdIsNormal(errarg->remote_xid)) { - appendStringInfo(&buf, _(" in transaction %u"), errarg->remote_xid); + appendStringInfo(&buf, _(" in transaction " XID_FMT), errarg->remote_xid); if (errarg->ts != 0) appendStringInfo(&buf, _(" at %s"), timestamptz_to_str(errarg->ts)); diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 7a7eb3784e7..20f8f5e4c7a 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1142,10 +1142,6 @@ static void XLogWalRcvSendHSFeedback(bool immed) { TimestampTz now; - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 xmin_epoch, - catalog_xmin_epoch; TransactionId xmin, catalog_xmin; static TimestampTz sendTime = 0; @@ -1202,30 +1198,15 @@ XLogWalRcvSendHSFeedback(bool immed) catalog_xmin = InvalidTransactionId; } - /* - * Get epoch and adjust if nextXid and oldestXmin are different sides of - * the epoch boundary. - */ - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - xmin_epoch = EpochFromFullTransactionId(nextFullXid); - catalog_xmin_epoch = xmin_epoch; - if (nextXid < xmin) - xmin_epoch--; - if (nextXid < catalog_xmin) - catalog_xmin_epoch--; - - elog(DEBUG2, "sending hot standby feedback xmin %u epoch %u catalog_xmin %u catalog_xmin_epoch %u", - xmin, xmin_epoch, catalog_xmin, catalog_xmin_epoch); + elog(DEBUG2, "sending hot standby feedback xmin " XID_FMT " catalog_xmin " XID_FMT, + xmin, catalog_xmin); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); - pq_sendint32(&reply_message, xmin); - pq_sendint32(&reply_message, xmin_epoch); - pq_sendint32(&reply_message, catalog_xmin); - pq_sendint32(&reply_message, catalog_xmin_epoch); + pq_sendint64(&reply_message, xmin); + pq_sendint64(&reply_message, catalog_xmin); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) primary_has_standby_xmin = true; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 84915ed95bd..6fd3c6776b8 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -252,7 +252,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); -static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p); @@ -2136,44 +2135,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac } } -/* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. - * - * Epoch of nextXid should be same as standby, or if the counter has - * wrapped, then one greater than standby. - * - * This check doesn't care about whether clog exists for these xids - * at all. - */ -static bool -TransactionIdInRecentPast(TransactionId xid, uint32 epoch) -{ - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 nextEpoch; - - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - nextEpoch = EpochFromFullTransactionId(nextFullXid); - - if (xid <= nextXid) - { - if (epoch != nextEpoch) - return false; - } - else - { - if (epoch + 1 != nextEpoch) - return false; - } - - if (!TransactionIdPrecedesOrEquals(xid, nextXid)) - return false; /* epoch OK, but it's wrapped around */ - - return true; -} - /* * Hot Standby feedback */ @@ -2181,9 +2142,7 @@ static void ProcessStandbyHSFeedbackMessage(void) { TransactionId feedbackXmin; - uint32 feedbackEpoch; TransactionId feedbackCatalogXmin; - uint32 feedbackCatalogEpoch; TimestampTz replyTime; /* @@ -2192,10 +2151,8 @@ ProcessStandbyHSFeedbackMessage(void) * of this message. */ replyTime = pq_getmsgint64(&reply_message); - feedbackXmin = pq_getmsgint(&reply_message, 4); - feedbackEpoch = pq_getmsgint(&reply_message, 4); - feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); - feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); + feedbackXmin = pq_getmsgint64(&reply_message); + feedbackCatalogXmin = pq_getmsgint64(&reply_message); if (message_level_is_interesting(DEBUG2)) { @@ -2204,11 +2161,9 @@ ProcessStandbyHSFeedbackMessage(void) /* Copy because timestamptz_to_str returns a static buffer */ replyTimeStr = pstrdup(timestamptz_to_str(replyTime)); - elog(DEBUG2, "hot standby feedback xmin %u epoch %u, catalog_xmin %u epoch %u reply_time %s", + elog(DEBUG2, "hot standby feedback xmin " XID_FMT ", catalog_xmin " XID_FMT " reply_time %s", feedbackXmin, - feedbackEpoch, feedbackCatalogXmin, - feedbackCatalogEpoch, replyTimeStr); pfree(replyTimeStr); @@ -2239,18 +2194,6 @@ ProcessStandbyHSFeedbackMessage(void) return; } - /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. Ignore if not. - */ - if (TransactionIdIsNormal(feedbackXmin) && - !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch)) - return; - - if (TransactionIdIsNormal(feedbackCatalogXmin) && - !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch)) - return; - /* * Set the WalSender's xmin equal to the standby's requested xmin, so that * the xmin will be taken into account by GetSnapshotData() / diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 69ca52094f9..11143fdc590 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -2428,6 +2428,7 @@ statext_expressions_load(Oid stxoid, int idx) ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = td; + HeapTupleSetZeroBase(&tmptup); tup = heap_copytuple(&tmptup); diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile index fd7c40dcb08..ffcc0fc290e 100644 --- a/src/backend/storage/buffer/Makefile +++ b/src/backend/storage/buffer/Makefile @@ -17,6 +17,7 @@ OBJS = \ buf_table.o \ bufmgr.o \ freelist.o \ - localbuf.o + localbuf.o \ + heap_convert.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b4532948d3f..580eee3ebfd 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -457,7 +457,7 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) ) -static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, +static Buffer ReadBuffer_common(Relation rel, SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit); @@ -759,7 +759,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * miss. */ pgstat_count_buffer_read(reln); - buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence, + buf = ReadBuffer_common(reln, RelationGetSmgr(reln), + reln->rd_rel->relpersistence, forkNum, blockNum, mode, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); @@ -787,7 +788,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, Assert(InRecovery); - return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, + return ReadBuffer_common(NULL, smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, mode, strategy, &hit); } @@ -798,7 +799,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, * *hit is set to true if the request was satisfied from shared buffer cache. */ static Buffer -ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, +ReadBuffer_common(Relation rel, SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) { @@ -1030,6 +1031,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, blockNum, relpath(smgr->smgr_rnode, forkNum)))); } + + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION && + !PageIsNew((Page) bufBlock)) + { + Buffer buf = BufferDescriptorGetBuffer(bufHdr); + + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + convert_page(rel, bufBlock, buf, blockNum); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + } } } diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c new file mode 100644 index 00000000000..6c9c5456b04 --- /dev/null +++ b/src/backend/storage/buffer/heap_convert.c @@ -0,0 +1,496 @@ +/* + * heap_convert.c + * Heap page converter from 32bit to 64bit xid format + * + * Copyright (c) 2017, Postgres Professional + * + * src/backend/storage/buffer/heap_convert.c + */ + +#include "postgres.h" + +#include "access/brin_page.h" +#include "access/ginblock.h" +#include "access/generic_xlog.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/nbtree.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/pg_am.h" +#include "catalog/pg_control.h" +#include "common/controldata_utils.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/checksum.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +/* + * Sorting support for repack_tuples() + */ +typedef struct itemData +{ + uint16 index; /* linpointer array index */ + int16 offset; /* page offset of item data */ + uint16 alignedlen; /* MAXALIGN(item data len) */ +} itemData; + +/* Initialize special heap page area */ +static void +InitHeapPageSpecial(PageHeader new, TransactionId xid_base, + MultiXactId multi_base, TransactionId prune_xid) +{ + HeapPageSpecial special; + + new->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)); + + special = (HeapPageSpecial) ((char *) new + new->pd_special); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + HeapPageSetPruneXid(new, prune_xid); +} + +/* Used by get_previous_edition() */ +ControlFileData *ControlFile = NULL; + +static bool was_32bit_xid(PageHeader page); +static void convert_heap(Relation rel, Page page, Buffer buf, BlockNumber blkno); +static void repack_heap_tuples(Relation rel, Page page, Buffer buf, + BlockNumber blkno, bool double_xmax); +static void tuple_set_double_xmax(HeapTupleHeader tuple); + + +/* + * itemoffcompare + * Sorting support for repack_tuples() + */ +int +itemoffcompare(const void *item1, const void *item2) +{ + /* Sort in decreasing itemoff order */ + return ((itemIdCompactData *) item2)->itemoff - + ((itemIdCompactData *) item1)->itemoff; +} + +static bool +was_32bit_xid(PageHeader page) +{ + return PageGetPageLayoutVersion(page) < 5; +} + +void +convert_page(Relation rel, Page page, Buffer buf, BlockNumber blkno) +{ + PageHeader hdr = (PageHeader) page; + GenericXLogState *state = NULL; + Page tmp_page = page; + uint16 checksum; + + if (!rel) + return; + + /* Verify checksum */ + if (hdr->pd_checksum) + { + checksum = pg_checksum_page((char *) page, blkno); + if (checksum != hdr->pd_checksum) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("page verification failed, calculated checksum %u but expected %u", + checksum, hdr->pd_checksum))); + } + + /* Start xlog record */ + if (!XactReadOnly && XLogIsNeeded() && RelationNeedsWAL(rel)) + { + state = GenericXLogStart(rel); + tmp_page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + } + + PageSetPageSizeAndVersion((hdr), PageGetPageSize(hdr), + PG_PAGE_LAYOUT_VERSION); + + if (was_32bit_xid(hdr)) + { + switch (rel->rd_rel->relkind) + { + case 'r': + case 'p': + case 't': + case 'm': + convert_heap(rel, tmp_page, buf, blkno); + break; + case 'i': + /* no need to convert index */ + case 'S': + /* no real need to convert sequences */ + break; + default: + elog(ERROR, + "Conversion for relkind '%c' is not implemented", + rel->rd_rel->relkind); + } + } + + /* + * Mark buffer dirty unless this is a read-only transaction (e.g. query + * is running on hot standby instance) + */ + if (!XactReadOnly) + { + /* Finish xlog record */ + if (XLogIsNeeded() && RelationNeedsWAL(rel)) + { + Assert(state != NULL); + GenericXLogFinish(state); + } + + MarkBufferDirty(buf); + } + + hdr = (PageHeader) page; + hdr->pd_checksum = pg_checksum_page((char *) page, blkno); +} + +static void +convert_heap(Relation rel, Page page, Buffer buf, BlockNumber blkno) +{ + PageHeader page_hdr = (PageHeader) page; + bool heap_special_fits; + + /* Is there enough space to fit new page format? */ + heap_special_fits = page_hdr->pd_upper - page_hdr->pd_lower >= SizeOfPageSpecial; + repack_heap_tuples(rel, page, buf, blkno, !heap_special_fits); +} + +/* + * Convert possibly wrapped around heap tuple's transaction and + * multixact IDs after pg_upgrade. + */ +static void +convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base, + MultiXactId mxid_base, bool double_xmax) +{ + TransactionId xid; + + /* Convert xmin xid */ + if (double_xmax) + tuple_set_double_xmax(tuple); + else + { + /* Subtract xid_base from normal xmin */ + xid = tuple->t_choice.t_heap.t_xmin; + + if (TransactionIdIsNormal(xid)) + { + Assert(xid >= xid_base + FirstNormalTransactionId); + tuple->t_choice.t_heap.t_xmin = xid - xid_base; + } + } + + /* If tuple has multixact flag, handle mxid wraparound */ + if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) && + !(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + MultiXactId mxid = tuple->t_choice.t_heap.t_xmax; + + /* Handle mxid wraparound */ + if (mxid < mxid_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= mxid_base); + } + + if (double_xmax) + { + /* Save converted mxid into xmin/max */ + HeapTupleHeaderSetDoubleXmax(tuple, mxid); + } + else + { + /* + * Save converted mxid offset relative to (minmxid - 1), + * which will be page's mxid base. + */ + Assert(mxid - mxid_base + FirstMultiXactId <= PG_UINT32_MAX); + tuple->t_choice.t_heap.t_xmax = + (uint32) (mxid - mxid_base + FirstMultiXactId); + } + } + /* Convert xmax xid */ + else if (!(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + xid = tuple->t_choice.t_heap.t_xmax; + + if (double_xmax) + { + /* Save converted mxid into xmin/max */ + HeapTupleHeaderSetDoubleXmax(tuple, xid); + } + else if (TransactionIdIsNormal(xid)) + { + /* Subtract xid_base from normal xmax */ + Assert(xid >= xid_base + FirstNormalTransactionId); + tuple->t_choice.t_heap.t_xmax = xid - xid_base; + } + } +} + +/* + * Compute page's [m]xid min/max values for based/"double xmax" + * format conversions + */ +static void +compute_xid_min_max(HeapTuple tuple, MultiXactId mxid_base, + TransactionId *xid_min, TransactionId *xid_max, + MultiXactId *mxid_min, MultiXactId *mxid_max) +{ + if (!HeapTupleHeaderXminInvalid(tuple->t_data) && + !HeapTupleHeaderXminFrozen(tuple->t_data)) + { + TransactionId xid = HeapTupleGetRawXmin(tuple); + + if (TransactionIdIsNormal(xid)) + { + if (*xid_max < xid) + *xid_max = xid; + if (*xid_min > xid) + *xid_min = xid; + } + } + + if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xid; + + if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + { + MultiXactId mxid = HeapTupleGetRawXmax(tuple); + + Assert(MultiXactIdIsValid(mxid)); + + /* Handle mxid wraparound */ + if (mxid < mxid_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= mxid_base); + } + + if (*mxid_max < mxid) + *mxid_max = mxid; + if (*mxid_min > mxid) + *mxid_min = mxid; + + /* + * Also take into account hidden update xid, which can be + * extracted by the vacuum. + */ + if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY) + xid = InvalidTransactionId; + else + xid = HeapTupleGetUpdateXid(tuple); + } + else + { + xid = HeapTupleGetRawXmax(tuple); + } + + if (TransactionIdIsNormal(xid)) + { + if (*xid_max < xid) + *xid_max = xid; + if (*xid_min > xid) + *xid_min = xid; + } + } +} + +/* Returns true, if "double xmax" format */ +static bool +init_heap_page_header(Relation rel, BlockNumber blkno, PageHeader new_hdr, + TransactionId prune_xid, bool header_fits, + TransactionId xid_min, TransactionId xid_max, + MultiXactId mxid_min, MultiXactId mxid_max, + TransactionId *xid_base, MultiXactId *mxid_base) +{ + if (header_fits && + (xid_max == InvalidTransactionId || + xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId) && + (mxid_max == InvalidMultiXactId || + mxid_max - mxid_min <= MaxShortTransactionId - FirstMultiXactId)) + { + Assert(xid_max == InvalidTransactionId || xid_max >= xid_min); + Assert(mxid_max == InvalidMultiXactId || mxid_max >= mxid_min); + *xid_base = xid_max == InvalidTransactionId ? InvalidTransactionId : xid_min - FirstNormalTransactionId; + *mxid_base = mxid_max == InvalidMultiXactId ? InvalidMultiXactId : mxid_min - FirstMultiXactId; + + InitHeapPageSpecial(new_hdr, *xid_base, *mxid_base, prune_xid); + return false; + } + else + { + /* No space for special area, switch to "double xmax" format */ + new_hdr->pd_special = BLCKSZ; + + *xid_base = InvalidTransactionId; + *mxid_base = InvalidMultiXactId; + + elog(DEBUG2, "convert heap page %u of relation %u to double xmax format", + blkno, RelationGetRelid(rel)); + return true; + } +} + +/* + * repack_heap_tuples + * Convert heap page format reusing space of dead tuples + */ +static void +repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno, + bool perhaps_double_xmax) +{ + itemIdCompactData items[MaxHeapTuplesPerPage]; + itemIdCompact itemPtr = items; + ItemId lp; + int nitems = 0; + int maxoff = PageGetMaxOffsetNumber(page); + Offset upper; + int idx; + bool double_xmax; + + PageHeader hdr; + PageHeader new_hdr; + char new_page[BLCKSZ]; + MultiXactId mxid_base = rel->rd_rel->relminmxid; + MultiXactId mxid_min = MaxMultiXactId; + MultiXactId mxid_max = InvalidMultiXactId; + TransactionId xid_base = rel->rd_rel->relfrozenxid; + TransactionId xid_min = MaxTransactionId; + TransactionId xid_max = InvalidTransactionId; + + int occupied_space = 0; + + hdr = (PageHeader) page; + + if (TransactionIdIsNormal(hdr->pd_prune_xid)) + xid_min = xid_max = hdr->pd_prune_xid; + + for (idx = 0; idx < maxoff; idx++) + { + HeapTupleData tuple; + + lp = PageGetItemId(page, idx + 1); + + /* Skip redirects and items without storage */ + if (!ItemIdHasStorage(lp)) + continue; + + /* Build in-memory tuple representation */ + tuple.t_tableOid = 1; /* doesn't matter in this case */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); + tuple.t_xid_base = 0; + tuple.t_multi_base = 0; + tuple.t_len = ItemIdGetLength(lp); + ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp)); + + /* + * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or + * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read + * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See + * HeapTupleSatisfiesVacuum() for details + */ + if (perhaps_double_xmax && + HeapTupleSatisfiesVacuum(&tuple, FirstUpgradedTransactionId, buf) == HEAPTUPLE_DEAD) + { + ItemIdSetDead(lp); + } + + if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp)) + { + itemPtr->offsetindex = idx; + itemPtr->itemoff = ItemIdGetOffset(lp); + if (unlikely(itemPtr->itemoff < hdr->pd_upper || + itemPtr->itemoff >= hdr->pd_special)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item pointer: %u", + itemPtr->itemoff))); + itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp)); + occupied_space += itemPtr->alignedlen; + nitems++; + itemPtr++; + if (perhaps_double_xmax) + { + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple.t_data); + } + + compute_xid_min_max(&tuple, mxid_base, + &xid_min, &xid_max, + &mxid_min, &mxid_max); + } + } + + /* Write new header */ + new_hdr = (PageHeader) new_page; + *new_hdr = *hdr; + new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData); + + double_xmax = init_heap_page_header(rel, blkno, new_hdr, + hdr->pd_prune_xid, + BLCKSZ - new_hdr->pd_lower - occupied_space >= sizeof(HeapPageSpecialData), + xid_min, xid_max, + mxid_min, mxid_max, + &xid_base, &mxid_base); + if (!perhaps_double_xmax && double_xmax) + return repack_heap_tuples(rel, page, buf, blkno, true); + + /* Copy ItemIds with an offset */ + memcpy(new_page + SizeOfPageHeaderData, + page + SizeOfPageHeaderData, + hdr->pd_lower - SizeOfPageHeaderData); + + /* Move live tuples */ + upper = new_hdr->pd_special; + for (idx = 0; idx < nitems; idx++) + { + HeapTupleHeader tuple; + + itemPtr = &items[idx]; + lp = PageGetItemId(new_page, itemPtr->offsetindex + 1); + upper -= itemPtr->alignedlen; + + memcpy((char *) new_page + upper, + (char *) page + itemPtr->itemoff, + itemPtr->alignedlen); + + tuple = (HeapTupleHeader) (((char *) new_page) + upper); + + convert_heap_tuple_xids(tuple, xid_base, mxid_base, double_xmax); + + lp->lp_off = upper; + + occupied_space -= itemPtr->alignedlen; + } + Assert(occupied_space == 0); + + new_hdr->pd_upper = upper; + if (new_hdr->pd_lower > new_hdr->pd_upper) + elog(ERROR, "cannot convert block %u of relation '%s'", + blkno, RelationGetRelationName(rel)); + + memcpy(page, new_page, BLCKSZ); +} + +/* + * Convert tuple for "double xmax" page format. + */ +static void +tuple_set_double_xmax(HeapTupleHeader tuple) +{ + tuple->t_infomask |= HEAP_XMIN_FROZEN; + tuple->t_choice.t_heap.t_xmin = 0; +} diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index a9945c80eb4..ab13a35e7fe 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -66,7 +66,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" -#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) +#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var)))) /* Our shared memory area */ typedef struct ProcArrayStruct @@ -351,9 +351,6 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); static void MaintainLatestCompletedXid(TransactionId latestXid); static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); - -static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, - TransactionId xid); static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons); /* @@ -953,10 +950,7 @@ MaintainLatestCompletedXid(TransactionId latestXid) Assert(LWLockHeldByMe(ProcArrayLock)); if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) - { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(cur_latest, latestXid); - } + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); Assert(IsBootstrapProcessingMode() || FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); @@ -969,7 +963,6 @@ static void MaintainLatestCompletedXidRecovery(TransactionId latestXid) { FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid; - FullTransactionId rel; Assert(AmStartupProcess() || !IsUnderPostmaster); Assert(LWLockHeldByMe(ProcArrayLock)); @@ -979,14 +972,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid) * latestCompletedXid to be initialized in recovery. But in recovery it's * safe to access nextXid without a lock for the startup process. */ - rel = ShmemVariableCache->nextXid; Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid)); if (!FullTransactionIdIsValid(cur_latest) || TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(rel, latestXid); + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); @@ -1096,7 +1087,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " - "until oldest active xid on standby is at least %u (now %u)", + "until oldest active xid on standby is at least " XID_FMT " (now " XID_FMT ")", standbySnapshotPendingXmin, running->oldestRunningXid); return; @@ -1177,7 +1168,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i])) { elog(DEBUG1, - "found duplicated transaction %u for KnownAssignedXids insertion", + "found duplicated transaction " XID_FMT " for KnownAssignedXids insertion", xids[i]); continue; } @@ -1262,7 +1253,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " - "until oldest active xid on standby is at least %u (now %u)", + "until oldest active xid on standby is at least " XID_FMT " (now " XID_FMT ")", standbySnapshotPendingXmin, running->oldestRunningXid); } @@ -1455,7 +1446,7 @@ TransactionIdIsInProgress(TransactionId xid) continue; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + pxid = UINT64_ACCESS_ONCE(other_xids[pgxactoff]); if (!TransactionIdIsValid(pxid)) continue; @@ -1487,7 +1478,7 @@ TransactionIdIsInProgress(TransactionId xid) for (j = pxids - 1; j >= 0; j--) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]); + TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]); if (TransactionIdEquals(cxid, xid)) { @@ -1611,7 +1602,7 @@ TransactionIdIsActive(TransactionId xid) TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[i]); + pxid = UINT64_ACCESS_ONCE(other_xids[i]); if (!TransactionIdIsValid(pxid)) continue; @@ -1762,8 +1753,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionId xmin; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); - xmin = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(other_xids[index]); + xmin = UINT64_ACCESS_ONCE(proc->xmin); /* * Consider both the transaction's Xmin, and its Xid. @@ -2304,7 +2295,7 @@ GetSnapshotData(Snapshot snapshot) for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + TransactionId xid = UINT64_ACCESS_ONCE(other_xids[pgxactoff]); uint8 statusFlags; Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff); @@ -2454,12 +2445,7 @@ GetSnapshotData(Snapshot snapshot) FullTransactionId def_vis_fxid_data; FullTransactionId oldestfxid; - /* - * Converting oldestXid is only safe when xid horizon cannot advance, - * i.e. holding locks. While we don't hold the lock anymore, all the - * necessary data has been gathered with lock held. - */ - oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); + oldestfxid = FullTransactionIdFromXid(oldestxid); /* apply vacuum_defer_cleanup_age */ def_vis_xid_data = @@ -2482,8 +2468,8 @@ GetSnapshotData(Snapshot snapshot) def_vis_xid = TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid); - def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid); - def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data); + def_vis_fxid = FullTransactionIdFromXid(def_vis_xid); + def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data); /* * Check if we can increase upper bound. As a previous @@ -2502,7 +2488,7 @@ GetSnapshotData(Snapshot snapshot) /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ if (TransactionIdIsNormal(myxid)) GlobalVisTempRels.definitely_needed = - FullXidRelativeTo(latest_completed, myxid); + FullTransactionIdFromXid(myxid); else { GlobalVisTempRels.definitely_needed = latest_completed; @@ -2609,7 +2595,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, /* * Likewise, let's just make real sure its xmin does cover us. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(proc->xmin); if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin)) continue; @@ -2664,7 +2650,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * can't go backwards. Also, make sure it's running in the same database, * so that the per-database xmin cannot go backwards. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(proc->xmin); if (proc->databaseId == MyDatabaseId && TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, xmin)) @@ -2782,7 +2768,7 @@ GetRunningTransactionData(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); /* * We don't need to store transactions that don't have a TransactionId @@ -2921,7 +2907,7 @@ GetOldestActiveTransactionId(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); if (!TransactionIdIsNormal(xid)) continue; @@ -3019,7 +3005,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); if (!TransactionIdIsNormal(xid)) continue; @@ -3301,7 +3287,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = UINT64_ACCESS_ONCE(proc->xmin); if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; @@ -3396,7 +3382,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) proc->databaseId == dbOid) { /* Fetch xmin just once - can't change on us, but good coding */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = UINT64_ACCESS_ONCE(proc->xmin); /* * We ignore an invalid pxmin because this means that backend has @@ -3978,7 +3964,7 @@ XidCacheRemoveRunningXids(TransactionId xid, * debug warning. */ if (j < 0 && !MyProc->subxidStatus.overflowed) - elog(WARNING, "did not find subXID %u in MyProc", anxid); + elog(WARNING, "did not find subXID " XID_FMT " in MyProc", anxid); } for (j = MyProc->subxidStatus.count - 1; j >= 0; j--) @@ -3994,7 +3980,7 @@ XidCacheRemoveRunningXids(TransactionId xid, } /* Ordinarily we should have found it, unless the cache has overflowed */ if (j < 0 && !MyProc->subxidStatus.overflowed) - elog(WARNING, "did not find subXID %u in MyProc", xid); + elog(WARNING, "did not find subXID " XID_FMT " in MyProc", xid); /* Also advance global latestCompletedXid while holding the lock */ MaintainLatestCompletedXid(latestXid); @@ -4101,17 +4087,13 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) { GlobalVisSharedRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->shared_oldest_nonremovable); + FullTransactionIdFromXid(horizons->shared_oldest_nonremovable); GlobalVisCatalogRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->catalog_oldest_nonremovable); + FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable); GlobalVisDataRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->data_oldest_nonremovable); + FullTransactionIdFromXid(horizons->data_oldest_nonremovable); GlobalVisTempRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->temp_oldest_nonremovable); + FullTransactionIdFromXid(horizons->temp_oldest_nonremovable); /* * In longer running transactions it's possible that transactions we @@ -4200,15 +4182,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid) { FullTransactionId fxid; - /* - * Convert 32 bit argument to FullTransactionId. We can do so safely - * because we know the xid has to, at the very least, be between - * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking - * a lock to determine either, we can just compare with - * state->definitely_needed, which was based on those value at the time - * the current snapshot was built. - */ - fxid = FullXidRelativeTo(state->definitely_needed, xid); + fxid = FullTransactionIdFromXid(xid); return GlobalVisTestIsRemovableFullXid(state, fxid); } @@ -4271,32 +4245,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) return GlobalVisTestIsRemovableXid(state, xid); } -/* - * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it - * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). - * - * Be very careful about when to use this function. It can only safely be used - * when there is a guarantee that xid is within MaxTransactionId / 2 xids of - * rel. That e.g. can be guaranteed if the caller assures a snapshot is - * held by the backend and xid is from a table (where vacuum/freezing ensures - * the xid has to be within that range), or if xid is from the procarray and - * prevents xid wraparound that way. - */ -static inline FullTransactionId -FullXidRelativeTo(FullTransactionId rel, TransactionId xid) -{ - TransactionId rel_xid = XidFromFullTransactionId(rel); - - Assert(TransactionIdIsValid(xid)); - Assert(TransactionIdIsValid(rel_xid)); - - /* not guaranteed to find issues, but likely to catch mistakes */ - AssertTransactionIdInAllowableRange(xid); - - return FullTransactionIdFromU64(U64FromFullTransactionId(rel) - + (int32) (xid - rel_xid)); -} - /* ---------------------------------------------- * KnownAssignedTransactionIds sub-module @@ -4371,7 +4319,7 @@ RecordKnownAssignedTransactionIds(TransactionId xid) Assert(TransactionIdIsValid(xid)); Assert(TransactionIdIsValid(latestObservedXid)); - elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u", + elog(trace_recovery(DEBUG4), "record known xact " XID_FMT " latestObservedXid " XID_FMT, xid, latestObservedXid); /* @@ -4882,7 +4830,7 @@ KnownAssignedXidsRemove(TransactionId xid) { Assert(TransactionIdIsValid(xid)); - elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); + elog(trace_recovery(DEBUG4), "remove KnownAssignedXid " XID_FMT, xid); /* * Note: we cannot consider it an error to remove an XID that's not @@ -4942,7 +4890,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid) return; } - elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid); + elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to " XID_FMT, removeXid); /* * Mark entries invalid starting at the tail. Since array is sorted, we @@ -5130,7 +5078,7 @@ KnownAssignedXidsDisplay(int trace_level) if (KnownAssignedXidsValid[i]) { nxids++; - appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]); + appendStringInfo(&buf, "[%d]=" XID_FMT " ", i, KnownAssignedXids[i]); } } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 14968559255..6495b157da4 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -36,7 +36,7 @@ #include "utils/timestamp.h" /* User-settable GUC parameters */ -int vacuum_defer_cleanup_age; +int64 vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; bool log_recovery_conflict_waits = false; @@ -994,13 +994,13 @@ StandbyReleaseLockList(List *locks) LOCKTAG locktag; elog(trace_recovery(DEBUG4), - "releasing recovery lock: xid %u db %u rel %u", + "releasing recovery lock: xid " XID_FMT " db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) { elog(LOG, - "RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + "RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid " XID_FMT " database %u relation %u", lock->xid, lock->dbOid, lock->relOid); Assert(false); } @@ -1305,7 +1305,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), - "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", + "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid " XID_FMT " latest complete " XID_FMT " next xid " XID_FMT ")", CurrRunningXacts->xcnt, LSN_FORMAT_ARGS(recptr), CurrRunningXacts->oldestRunningXid, @@ -1313,7 +1313,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) CurrRunningXacts->nextXid); else elog(trace_recovery(DEBUG2), - "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)", + "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid " XID_FMT " latest complete " XID_FMT " next xid " XID_FMT ")", CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt, LSN_FORMAT_ARGS(recptr), CurrRunningXacts->oldestRunningXid, diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 2db0424ad94..a8d34452cb4 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -1135,10 +1135,17 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field1); break; case LOCKTAG_TRANSACTION: - appendStringInfo(buf, - _("transaction %u"), - tag->locktag_field1); + { + char xid_str[32]; + + /* make translatable string */ + snprintf(xid_str, sizeof(xid_str), XID_FMT, + (TransactionId) tag->locktag_field1 | + ((TransactionId) tag->locktag_field2 << 32)); + + appendStringInfo(buf, _("transaction %s"), xid_str); break; + } case LOCKTAG_VIRTUALTRANSACTION: appendStringInfo(buf, _("virtual transaction %d/%u"), diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 4f4d5b0d20f..2cf89234278 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -438,7 +438,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool SerialPagePrecedesLogically(int page1, int page2); +static bool SerialPagePrecedesLogically(int64 page1, int64 page2); static void SerialInit(void); static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid); @@ -788,27 +788,21 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) * Analogous to CLOGPagePrecedes(). */ static bool -SerialPagePrecedesLogically(int page1, int page2) +SerialPagePrecedesLogically(int64 page1, int64 page2) { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * SERIAL_ENTRIESPERPAGE; - xid1 += FirstNormalTransactionId + 1; - xid2 = ((TransactionId) page2) * SERIAL_ENTRIESPERPAGE; - xid2 += FirstNormalTransactionId + 1; - - return (TransactionIdPrecedes(xid1, xid2) && - TransactionIdPrecedes(xid1, xid2 + SERIAL_ENTRIESPERPAGE - 1)); + return page1 < page2; } #ifdef USE_ASSERT_CHECKING +#define SerialPagePrecedesLogically(p1, p2) ((p1) < (p2)) + static void SerialPagePrecedesLogicallyUnitTests(void) { +#if 0 /* XXX remove unit tests */ int per_page = SERIAL_ENTRIESPERPAGE, offset = per_page / 2; - int newestPage, + int64 newestPage, oldestPage, headPage, targetPage; @@ -856,9 +850,11 @@ SerialPagePrecedesLogicallyUnitTests(void) #if 0 Assert(SerialPagePrecedesLogically(headPage, targetPage)); #endif +#endif } #endif + /* * Initialize for the tracking of old serializable committed xids. */ @@ -4168,7 +4164,7 @@ CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot s ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), - errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid), + errdetail_internal("Reason code: Canceled on conflict out to old pivot " XID_FMT ".", xid), errhint("The transaction might succeed if retried."))); if (SxactHasSummaryConflictIn(MySerializableXact) @@ -4176,7 +4172,7 @@ CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot s ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), - errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid), + errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction " XID_FMT ".", xid), errhint("The transaction might succeed if retried."))); MySerializableXact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT; @@ -4806,7 +4802,7 @@ OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader, ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), - errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid), + errdetail_internal("Reason code: Canceled on conflict out to pivot " XID_FMT ", during read.", writer->topXid), errhint("The transaction might succeed if retried."))); } writer->flags |= SXACT_FLAG_DOOMED; diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index a5c94b0a7ee..df614744ea1 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -21,11 +21,26 @@ #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" /* GUC variable */ bool ignore_checksum_failure = false; +/* + * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format + * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then + * all xmin's are frozen (can do this for all live tuples after pg_upgrade), + * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax. + * This is so-called "double xmax" format. + */ +static HeapPageSpecialData doubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId, + .pd_multi_base = MaxTransactionId +}; +HeapPageSpecial doubleXmaxSpecial = &doubleXmaxSpecialData; + /* ---------------------------------------------------------------- * Page support functions @@ -432,15 +447,131 @@ PageRestoreTempPage(Page tempPage, Page oldPage) } /* - * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + * Get minimum and maximum values of xid and multixact on "double xmax" page. */ -typedef struct itemIdCompactData +static void +heap_page_double_xmax_get_min_max(Page page, + TransactionId *xid_min, + TransactionId *xid_max, + MultiXactId *multi_min, + MultiXactId *multi_max) { - uint16 offsetindex; /* linp array index */ - int16 itemoff; /* page offset of item data */ - uint16 alignedlen; /* MAXALIGN(item data len) */ -} itemIdCompactData; -typedef itemIdCompactData *itemIdCompact; + bool xid_found = false, + multi_found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + TransactionId xmax; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + xmax = HeapTupleHeaderGetDoubleXmax(htup); + + if (!TransactionIdIsNormal(xmax)) + continue; + + if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!xid_found) + { + *xid_min = *xid_max = xmax; + xid_found = true; + } + else + { + *xid_min = Min(*xid_min, xmax); + *xid_max = Max(*xid_max, xmax); + } + } + else + { + if (!multi_found) + { + *multi_min = *multi_max = xmax; + multi_found = true; + } + else + { + *multi_min = Min(*multi_min, xmax); + *multi_max = Max(*multi_max, xmax); + } + } + } +} + +/* + * Add special area to heap page, so convert from "double xmax" to normal + * format. + */ +static void +heap_page_add_special_area(itemIdCompact itemidbase, int nitems, Page page, + TransactionId xid_base, MultiXactId multi_base) +{ + char newPage[BLCKSZ]; + PageHeader phdr = (PageHeader) page; + PageHeader new_phdr = (PageHeader) newPage; + HeapPageSpecial special; + Offset upper; + int i; + + memcpy(newPage, page, phdr->pd_lower); + + /* Add special area */ + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData); + special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + + /* sort itemIdSortData array into decreasing itemoff order */ + qsort((char *) itemidbase, nitems, sizeof(itemIdCompactData), + itemoffcompare); + + upper = new_phdr->pd_special; + for (i = 0; i < nitems; i++) + { + itemIdCompact itemidptr = &itemidbase[i]; + ItemId lp; + HeapTupleHeader old_htup; + HeapTupleHeader new_htup; + TransactionId xmax; + + lp = PageGetItemId(page, itemidptr->offsetindex + 1); + old_htup = (HeapTupleHeader) PageGetItem(page, lp); + upper -= itemidptr->alignedlen; + memcpy((Pointer) newPage + upper, + (Pointer) page + itemidptr->itemoff, + itemidptr->alignedlen); + lp = PageGetItemId(newPage, itemidptr->offsetindex + 1); + lp->lp_off = upper; + new_htup = (HeapTupleHeader) PageGetItem(newPage, lp); + + /* Convert xmax value */ + new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId; + xmax = HeapTupleHeaderGetDoubleXmax(old_htup); + if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI)) + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax); + else + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax); + } + + new_phdr->pd_upper = upper; + + memcpy(page, newPage, PageGetPageSize(newPage)); + elog(DEBUG2, "convert heap page from double xmax to normal format"); +} /* * After removing or marking some line pointers unused, move the tuples to @@ -471,7 +602,8 @@ typedef itemIdCompactData *itemIdCompact; * Callers must ensure that nitems is > 0 */ static void -compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted) +compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, + bool presorted, bool addspecial) { PageHeader phdr = (PageHeader) page; Offset upper; @@ -483,9 +615,33 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte /* Code within will not work correctly if nitems == 0 */ Assert(nitems > 0); - if (presorted) + /* Add special area to the heap page if possible */ + if (addspecial) { + TransactionId xid_min = FirstNormalTransactionId, + xid_max = FirstNormalTransactionId; + MultiXactId multi_min = FirstNormalTransactionId, + multi_max = FirstNormalTransactionId; + + Assert(phdr->pd_special == PageGetPageSize(page)); + heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max, + &multi_min, &multi_max); + + if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) && + multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId)) + { + Assert(xid_min >= FirstNormalTransactionId); + Assert(multi_min >= FirstNormalTransactionId); + heap_page_add_special_area(itemidbase, nitems, page, + xid_min - FirstNormalTransactionId, + multi_min - FirstNormalTransactionId); + return; + } + } + + if (presorted) + { #ifdef USE_ASSERT_CHECKING { /* @@ -783,11 +939,21 @@ PageRepairFragmentation(Page page) nstorage = itemidptr - itemidbase; if (nstorage == 0) { + if (pd_special == PageGetPageSize(page)) + { + pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + HeapPageGetSpecial(page)->pd_xid_base = 0; + HeapPageGetSpecial(page)->pd_multi_base = 0; + } + /* Page is completely empty, so just reset it quickly */ ((PageHeader) page)->pd_upper = pd_special; } else { + bool addspecial = false; + /* Need to compact the page the hard way */ if (totallen > (Size) (pd_special - pd_lower)) ereport(ERROR, @@ -795,7 +961,15 @@ PageRepairFragmentation(Page page) errmsg("corrupted item lengths: total %u, available space %u", (unsigned int) totallen, pd_special - pd_lower))); - compactify_tuples(itemidbase, nstorage, page, presorted); + /* + * Try to add special area to the heap page if it has enough of free + * space. + */ + if (pd_special == PageGetPageSize(page) && + (Size) (pd_special - pd_lower) - totallen >= sizeof(HeapPageSpecialData)) + addspecial = true; + + compactify_tuples(itemidbase, nstorage, page, presorted, addspecial); } /* Set hint bit for PageAddItemExtended */ @@ -985,6 +1159,9 @@ PageGetHeapFreeSpace(Page page) { Size space; + if (HeapPageIsDoubleXmax(page)) + return 0; + space = PageGetFreeSpace(page); if (space > 0) { @@ -1268,7 +1445,7 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) /* and compactify the tuple data */ if (nused > 0) - compactify_tuples(itemidbase, nused, page, presorted); + compactify_tuples(itemidbase, nused, page, presorted, false); else phdr->pd_upper = pd_special; } diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index 0d892132a84..299dd6698c5 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup) * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ - xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); + xmin = HeapTupleGetXmin(enumval_tup); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 6335845d08e..c952f98d953 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -3340,6 +3340,7 @@ populate_record(TupleDesc tupdesc, tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = defaultval; /* Break down the tuple into fields */ @@ -3776,6 +3777,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj) tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = tuphead; tuplestore_puttuple(state->tuple_store, &tuple); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 5dc0a5882cf..f2a2f33c26e 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -77,9 +77,9 @@ VXIDGetDatum(BackendId bid, LocalTransactionId lxid) * The representation is "/", decimal and unsigned decimal * respectively. Note that elog.c also knows how to format a vxid. */ - char vxidstr[32]; + char vxidstr[64]; - snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid); + snprintf(vxidstr, sizeof(vxidstr), "%d/" XID_FMT, bid, lxid); return CStringGetTextDatum(vxidstr); } @@ -292,7 +292,9 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_TRANSACTION: values[6] = - TransactionIdGetDatum(instance->locktag.locktag_field1); + TransactionIdGetDatum( + (TransactionId)instance->locktag.locktag_field1 | + ((TransactionId)instance->locktag.locktag_field2 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; @@ -304,7 +306,8 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, - instance->locktag.locktag_field2); + (TransactionId)instance->locktag.locktag_field2 | + ((TransactionId)instance->locktag.locktag_field3 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 6a9c00fdd3e..b93096f288f 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -606,3 +606,25 @@ pg_ultostr(char *str, uint32 value) return str + len; } + +/* + * pg_strtouint64 + * Converts 'str' into an unsigned 64-bit integer. + * + * This has the identical API to strtoul(3), except that it will handle + * 64-bit ints even where "long" is narrower than that. + * + * For the moment it seems sufficient to assume that the platform has + * such a function somewhere; let's not roll our own. + */ +uint64 +pg_strtouint64(const char *str, char **endptr, int base) +{ +#ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, endptr, base); +#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, endptr, base); +#else + return strtoul(str, endptr, base); +#endif +} diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index f529c1561ab..6c51dc5f795 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/xact.h" #include "access/xlog.h" #include "catalog/pg_authid.h" #include "catalog/pg_type.h" diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index 1a71fdbc33f..37c903b163f 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -327,6 +327,7 @@ record_out(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* @@ -694,6 +695,7 @@ record_send(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* @@ -844,10 +846,12 @@ record_cmp(FunctionCallInfo fcinfo) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple1); tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple2); tuple2.t_data = record2; /* @@ -1089,10 +1093,12 @@ record_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1351,10 +1357,12 @@ record_image_cmp(FunctionCallInfo fcinfo) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1597,10 +1605,12 @@ record_image_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1800,6 +1810,7 @@ hash_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroBase(&tuple); /* * We arrange to look up the needed hashing info just once per series of @@ -1921,6 +1932,7 @@ hash_record_extended(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroBase(&tuple); /* * We arrange to look up the needed hashing info just once per series of diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index a09096d0187..92753557cd9 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -32,16 +32,16 @@ xidin(PG_FUNCTION_ARGS) { char *str = PG_GETARG_CSTRING(0); - PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0)); + PG_RETURN_TRANSACTIONID((TransactionId) pg_strtouint64(str, NULL, 0)); } Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); - char *result = (char *) palloc(16); + char *result = (char *) palloc(32); - snprintf(result, 16, "%lu", (unsigned long) transactionId); + snprintf(result, 32, XID_FMT, transactionId); PG_RETURN_CSTRING(result); } @@ -52,8 +52,12 @@ Datum xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint32 lo, hi; + + lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); + hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); - PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); + PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32)); } /* @@ -64,9 +68,14 @@ xidsend(PG_FUNCTION_ARGS) { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; + uint32 lo, hi; + + lo = (uint32) (arg1 & 0xFFFFFFFF); + hi = (uint32) (arg1 >> 32); pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); + pq_sendint(&buf, lo, sizeof(lo)); + pq_sendint(&buf, hi, sizeof(hi)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -105,9 +114,9 @@ xid_age(PG_FUNCTION_ARGS) /* Permanent XIDs are always infinitely old */ if (!TransactionIdIsNormal(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -120,9 +129,9 @@ mxid_age(PG_FUNCTION_ARGS) MultiXactId now = ReadNextMultiXactId(); if (!MultiXactIdIsValid(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 68985dca5a6..abeaa33d901 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -79,8 +79,7 @@ typedef struct * It is an ERROR if the xid is in the future. Otherwise, returns true if * the transaction is still new enough that we can determine whether it * committed and false otherwise. If *extracted_xid is not NULL, it is set - * to the low 32 bits of the transaction ID (i.e. the actual XID, without the - * epoch). + * to the actual transaction ID. * * The caller must hold XactTruncationLock since it's dealing with arbitrary * XIDs, and must continue to hold it until it's done with any clog lookups @@ -89,15 +88,10 @@ typedef struct static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { - uint32 xid_epoch = EpochFromFullTransactionId(fxid); TransactionId xid = XidFromFullTransactionId(fxid); - uint32 now_epoch; - TransactionId now_epoch_next_xid; FullTransactionId now_fullxid; now_fullxid = ReadNextFullTransactionId(); - now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); - now_epoch = EpochFromFullTransactionId(now_fullxid); if (extracted_xid != NULL) *extracted_xid = xid; @@ -127,48 +121,15 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) Assert(LWLockHeldByMe(XactTruncationLock)); /* - * If the transaction ID has wrapped around, it's definitely too old to - * determine the commit status. Otherwise, we can compare it to - * ShmemVariableCache->oldestClogXid to determine whether the relevant - * CLOG entry is guaranteed to still exist. + * We compare xid to ShmemVariableCache->oldestClogXid to determine whether + * the relevant CLOG entry is guaranteed to still exist. */ - if (xid_epoch + 1 < now_epoch - || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid) - || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) + if (TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) return false; return true; } -/* - * Convert a TransactionId obtained from a snapshot held by the caller to a - * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that - * we can compute the high order bits. It must have been obtained by the - * caller with ReadNextFullTransactionId() after the snapshot was created. - */ -static FullTransactionId -widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid) -{ - TransactionId next_xid = XidFromFullTransactionId(next_fxid); - uint32 epoch = EpochFromFullTransactionId(next_fxid); - - /* Special transaction ID. */ - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - /* - * The 64 bit result must be <= next_fxid, since next_fxid hadn't been - * issued yet when the snapshot was created. Every TransactionId in the - * snapshot must therefore be from the same epoch as next_fxid, or the - * epoch before. We know this because next_fxid is never allow to get - * more than one epoch ahead of the TransactionIds in any snapshot. - */ - if (xid > next_xid) - epoch--; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - /* * txid comparator for qsort/bsearch */ @@ -397,7 +358,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS) uint32 nxip, i; Snapshot cur; - FullTransactionId next_fxid = ReadNextFullTransactionId(); cur = GetActiveSnapshot(); if (cur == NULL) @@ -415,11 +375,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS) snap = palloc(PG_SNAPSHOT_SIZE(nxip)); /* fill */ - snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid); - snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid); + snap->xmin = FullTransactionIdFromXid(cur->xmin); + snap->xmax = FullTransactionIdFromXid(cur->xmax); snap->nxip = nxip; for (i = 0; i < nxip; i++) - snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid); + snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]); /* * We want them guaranteed to be in ascending order. This also removes @@ -655,8 +615,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS) * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * - * The passed epoch-qualified xid is treated as a normal xid, not a - * multixact id. + * The passed xid is treated as a normal xid, not a multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 4fbdc62d8c7..e1ff4ef7be3 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1845,6 +1845,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, memcpy((char *) ct->tuple.t_data, (const char *) dtp->t_data, dtp->t_len); + HeapTupleCopyBase(&ct->tuple, dtp); MemoryContextSwitchTo(oldcxt); if (dtp != ntp) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 105d8d4601c..4ecaa2ee7d3 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2284,8 +2284,9 @@ RelationReloadIndexInfo(Relation relation) relation->rd_index->indislive = index->indislive; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ - HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, - HeapTupleHeaderGetXmin(tuple->t_data)); + relation->rd_indextuple->t_xid_base = tuple->t_xid_base; + HeapTupleSetXmin(relation->rd_indextuple, + HeapTupleGetXmin(tuple)); ReleaseSysCache(tuple); } diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index f33729513a0..83fa8ede63f 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -2691,12 +2691,12 @@ log_line_prefix(StringInfo buf, ErrorData *edata) { char strfbuf[128]; - snprintf(strfbuf, sizeof(strfbuf) - 1, "%d/%u", + snprintf(strfbuf, sizeof(strfbuf) - 1, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); appendStringInfo(buf, "%*s", padding, strfbuf); } else - appendStringInfo(buf, "%d/%u", MyProc->backendId, MyProc->lxid); + appendStringInfo(buf, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); } else if (padding != 0) appendStringInfoSpaces(buf, @@ -2704,9 +2704,9 @@ log_line_prefix(StringInfo buf, ErrorData *edata) break; case 'x': if (padding != 0) - appendStringInfo(buf, "%*u", padding, GetTopTransactionIdIfAny()); + appendStringInfo(buf, "%*" INT64_MODIFIER "u", padding, GetTopTransactionIdIfAny()); else - appendStringInfo(buf, "%u", GetTopTransactionIdIfAny()); + appendStringInfo(buf, XID_FMT, GetTopTransactionIdIfAny()); break; case 'e': if (padding != 0) @@ -2861,11 +2861,11 @@ write_csvlog(ErrorData *edata) /* Virtual transaction id */ /* keep VXID format in sync with lockfuncs.c */ if (MyProc != NULL && MyProc->backendId != InvalidBackendId) - appendStringInfo(&buf, "%d/%u", MyProc->backendId, MyProc->lxid); + appendStringInfo(&buf, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); appendStringInfoChar(&buf, ','); /* Transaction id */ - appendStringInfo(&buf, "%u", GetTopTransactionIdIfAny()); + appendStringInfo(&buf, XID_FMT, GetTopTransactionIdIfAny()); appendStringInfoChar(&buf, ','); /* Error severity */ diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 3dfe6e58252..ce66431e86e 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -540,7 +540,7 @@ lookup_C_func(HeapTuple procedureTuple) NULL); if (entry == NULL) return NULL; /* no such entry */ - if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) && + if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ @@ -576,7 +576,7 @@ record_C_func(HeapTuple procedureTuple, HASH_ENTER, &found); /* OID is already filled in */ - entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data); + entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 8761fe068cf..ea117a34608 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2639,74 +2639,6 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, - { - {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a table row."), - NULL - }, - &vacuum_freeze_min_age, - 50000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), - NULL - }, - &vacuum_multixact_freeze_min_age, - 5000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_multixact_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY, - gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), - NULL - }, - &vacuum_defer_cleanup_age, - 0, 0, 1000000, /* see ComputeXidHorizons */ - NULL, NULL, NULL - }, - { - {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - { - {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_multixact_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - /* * See also CheckRequiredParameterValues() if this parameter changes */ @@ -3305,28 +3237,6 @@ static struct config_int ConfigureNamesInt[] = 50, 0, INT_MAX, NULL, NULL, NULL }, - { - /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), - NULL - }, - &autovacuum_freeze_max_age, - - /* see vacuum_failsafe_age if you change the upper-limit value. */ - 200000000, 100000, 2000000000, - NULL, NULL, NULL - }, - { - /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), - NULL - }, - &autovacuum_multixact_freeze_max_age, - 400000000, 10000, 2000000000, - NULL, NULL, NULL - }, { /* see max_connections */ {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, @@ -3873,6 +3783,100 @@ static struct config_real ConfigureNamesReal[] = static struct config_int64 ConfigureNamesInt64[] = { + { + {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a table row."), + NULL + }, + &vacuum_freeze_min_age, + INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), + NULL + }, + &vacuum_multixact_freeze_min_age, + INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_multixact_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY, + gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), + NULL + }, + &vacuum_defer_cleanup_age, + INT64CONST(0), INT64CONST(0), INT64CONST(1000000), + NULL, NULL, NULL + }, + + { + {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_multixact_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + + { + /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), + NULL + }, + &autovacuum_freeze_max_age, + + /* see vacuum_failsafe_age if you change the upper-limit value. */ + INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), + NULL + }, + &autovacuum_multixact_freeze_max_age, + INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c index d97243ddc8b..310b32ec2fb 100644 --- a/src/backend/utils/misc/help_config.c +++ b/src/backend/utils/misc/help_config.c @@ -33,6 +33,7 @@ typedef union struct config_bool _bool; struct config_real real; struct config_int integer; + struct config_int64 integer8; struct config_string string; struct config_enum _enum; } mixedStruct; @@ -107,7 +108,12 @@ printMixedStruct(mixedStruct *structToPrint) structToPrint->integer.min, structToPrint->integer.max); break; - + case PGC_INT64: + printf("INT64\t" INT64_FORMAT "\t" INT64_FORMAT "\t" INT64_FORMAT "\t", + structToPrint->integer8.reset_val, + structToPrint->integer8.min, + structToPrint->integer8.max); + break; case PGC_REAL: printf("REAL\t%g\t%g\t%g\t", structToPrint->real.reset_val, diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index b1db9a8d07e..023573a9a57 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -164,8 +164,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) values[5] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); nulls[5] = false; - values[6] = CStringGetTextDatum(psprintf("%u:%u", - EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), + values[6] = CStringGetTextDatum(psprintf(XID_FMT, XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid))); nulls[6] = false; diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index a1acd46b611..6a08a6f6fec 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -639,9 +639,9 @@ #autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table # size before insert vacuum #autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze -#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum +#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum # (change requires restart) -#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age +#autovacuum_multixact_freeze_max_age = 20000000000 # maximum multixact age # before forced vacuum # (change requires restart) #autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 90e26745dff..9417c6244cc 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -4078,11 +4078,13 @@ static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) { HeapTuple tuple = (HeapTuple) stup->tuple; - unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + 2 * sizeof(TransactionId) + sizeof(int); /* We need to store t_self, but not other fields of HeapTupleData */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(tape, &tuple->t_xid_base, sizeof(TransactionId)); + LogicalTapeWrite(tape, &tuple->t_multi_base, sizeof(TransactionId)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); if (state->randomAccess) /* need trailing length word? */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); @@ -4098,7 +4100,7 @@ static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int tuplen) { - unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + unsigned int t_len = tuplen - sizeof(ItemPointerData) - 2 * sizeof(TransactionId) - sizeof(int); HeapTuple tuple = (HeapTuple) readtup_alloc(state, t_len + HEAPTUPLESIZE); @@ -4106,6 +4108,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); tuple->t_len = t_len; LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeReadExact(tape, &tuple->t_xid_base, sizeof(TransactionId)); + LogicalTapeReadExact(tape, &tuple->t_multi_base, sizeof(TransactionId)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 44fe2f3dbe1..538582d6b59 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid); */ CommandId -HeapTupleHeaderGetCmin(HeapTupleHeader tup) +HeapTupleGetCmin(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); - Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))); + Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); @@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(HeapTupleHeader tup) } CommandId -HeapTupleHeaderGetCmax(HeapTupleHeader tup) +HeapTupleGetCmax(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); @@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * things too much. */ Assert(CritSectionCount > 0 || - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup))); + TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); @@ -150,7 +152,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * changes the tuple in shared buffers. */ void -HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, +HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo) { @@ -160,10 +162,10 @@ HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ - if (!HeapTupleHeaderXminCommitted(tup) && - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup))) + if (!HeapTupleHeaderXminCommitted(tup->t_data) && + TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup))) { - CommandId cmin = HeapTupleHeaderGetCmin(tup); + CommandId cmin = HeapTupleGetCmin(tup); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 5001efdf7a2..fe89f030743 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -1172,8 +1172,9 @@ ExportSnapshot(Snapshot snapshot) * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ - snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", - MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1); + snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d", + MyProc->backendId, (uint32) (MyProc->lxid >> 32), + (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the @@ -1200,14 +1201,14 @@ ExportSnapshot(Snapshot snapshot) */ initStringInfo(&buf); - appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid); + appendStringInfo(&buf, "vxid:%d/" XID_FMT "\n", MyProc->backendId, MyProc->lxid); appendStringInfo(&buf, "pid:%d\n", MyProcPid); appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); appendStringInfo(&buf, "ro:%d\n", XactReadOnly); - appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin); - appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax); + appendStringInfo(&buf, "xmin:" XID_FMT "\n", snapshot->xmin); + appendStringInfo(&buf, "xmax:" XID_FMT "\n", snapshot->xmax); /* * We must include our own top transaction ID in the top-xid data, since @@ -1224,9 +1225,9 @@ ExportSnapshot(Snapshot snapshot) TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0; appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); for (i = 0; i < snapshot->xcnt; i++) - appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]); + appendStringInfo(&buf, "xip:" XID_FMT "\n", snapshot->xip[i]); if (addTopXid) - appendStringInfo(&buf, "xip:%u\n", topXid); + appendStringInfo(&buf, "xip:" XID_FMT "\n", topXid); /* * Similarly, we add our subcommitted child XIDs to the subxid data. Here, @@ -1240,9 +1241,9 @@ ExportSnapshot(Snapshot snapshot) appendStringInfoString(&buf, "sof:0\n"); appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) - appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); + appendStringInfo(&buf, "sxp:" XID_FMT "\n", snapshot->subxip[i]); for (i = 0; i < nchildren; i++) - appendStringInfo(&buf, "sxp:%u\n", children[i]); + appendStringInfo(&buf, "sxp:" XID_FMT "\n", children[i]); } appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); @@ -1345,7 +1346,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%u", &val) != 1) + if (sscanf(ptr, XID_FMT, &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1370,7 +1371,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2) + if (sscanf(ptr, "%d/" XID_FMT, &vxid->backendId, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 03b80f95757..a566937ad3e 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -148,6 +148,9 @@ static bool data_checksums = false; static char *xlog_dir = NULL; static char *str_wal_segment_size_mb = NULL; static int wal_segment_size_mb; +static TransactionId start_xid = 0; +static MultiXactId start_mx_id = 0; +static MultiXactOffset start_mx_offset = 0; /* internal vars */ @@ -1408,10 +1411,13 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -X %d %s %s %s %s", + "\"%s\" --boot -X %d %s %s " XID_FMT " %s " XID_FMT " %s " XID_FMT " %s %s %s", backend_exec, wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", + "-Z", start_xid, + "-m", start_mx_id, + "-o", start_mx_offset, boot_options, extra_options, debug ? "-d 5" : ""); @@ -2193,13 +2199,21 @@ usage(const char *progname) printf(_(" -W, --pwprompt prompt for a password for the new superuser\n")); printf(_(" -X, --waldir=WALDIR location for the write-ahead log directory\n")); printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n")); + printf(_(" -x, --xid=START_XID specify start xid value in decimal format for new db instance to test 64-bit xids,\n" + " default value is 0, max value is 2^62-1\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); printf(_(" --discard-caches set debug_discard_caches=1\n")); printf(_(" -L DIRECTORY where to find the input files\n")); + printf(_(" -m, --multixact-id=START_MX_ID\n" + " specify start multixact id value in decimal format for new db instance\n" + " to test 64-bit xids, default value is 0, max value is 2^62-1\n")); printf(_(" -n, --no-clean do not clean up after errors\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); + printf(_(" -o, --multixact-offset=START_MX_OFFSET\n" + " specify start multixact offset value in decimal format for new db instance\n" + " to test 64-bit xids, default value is 0, max value is 2^62-1\n")); printf(_(" -s, --show show internal settings\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); printf(_("\nOther options:\n")); @@ -2876,6 +2890,10 @@ main(int argc, char *argv[]) {"data-checksums", no_argument, NULL, 'k'}, {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, + {"no-data-checksums", no_argument, NULL, 15}, + {"xid", required_argument, NULL, 'x'}, + {"multixact-id", required_argument, NULL, 'm'}, + {"multixact-offset", required_argument, NULL, 'o'}, {NULL, 0, NULL, 0} }; @@ -2917,7 +2935,7 @@ main(int argc, char *argv[]) /* process command-line options */ - while ((c = getopt_long(argc, argv, "A:dD:E:gkL:nNsST:U:WX:", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "A:dD:E:gkL:m:nNo:sST:U:Wx:X:", long_options, &option_index)) != -1) { switch (c) { @@ -2956,6 +2974,20 @@ main(int argc, char *argv[]) debug = true; printf(_("Running in debug mode.\n")); break; + case 'm': + if (sscanf(optarg, XID_FMT, &start_mx_id) != 1) + { + fprintf(stderr, "%s: invalid decimal START_MX_ID value\n", + progname); + exit(1); + } + if (!StartMultiXactIdIsValid(start_mx_id)) + { + fprintf(stderr, "%s: out-of-range START_MX_ID value (the value must be less than 2^62)\n", + progname); + exit(1); + } + break; case 'n': noclean = true; printf(_("Running in no-clean mode. Mistakes will not be cleaned up.\n")); @@ -2963,6 +2995,20 @@ main(int argc, char *argv[]) case 'N': do_sync = false; break; + case 'o': + if (sscanf(optarg, XID_FMT, &start_mx_offset) != 1) + { + fprintf(stderr, "%s: invalid decimal START_MX_OFFSET value\n", + progname); + exit(1); + } + if (!StartMultiXactOffsetIsValid(start_mx_offset)) + { + fprintf(stderr, "%s: out-of-range START_MX_OFFSET value (the value must be less than 2^62)\n", + progname); + exit(1); + } + break; case 'S': sync_only = true; break; @@ -3022,6 +3068,23 @@ main(int argc, char *argv[]) extra_options, "-c debug_discard_caches=1"); break; + case 15: + data_checksums = false; + break; + case 'x': + if (sscanf(optarg, XID_FMT, &start_xid) != 1) + { + fprintf(stderr, "%s: invalid decimal START_XID value\n", + progname); + exit(1); + } + if (!StartTransactionIdIsValid(start_xid)) + { + fprintf(stderr, "%s: out-of-range START_XID value (the value must be less than 2^62)\n", + progname); + exit(1); + } + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index 4ca7ed297c0..7975dbfe8e4 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -10,6 +10,8 @@ use PostgreSQL::Test::Utils; use Fcntl qw(:seek); use Test::More; +use Data::Dumper; + # This regression test demonstrates that the pg_amcheck binary correctly # identifies specific kinds of corruption within pages. To test this, we need # a mechanism to create corrupt pages with predictable, repeatable corruption. @@ -86,6 +88,60 @@ use Test::More; use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL'; use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size +use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ'; +use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16; +use constant HEAPPAGE_SIZE => 8192; + +# Some #define constants from access/htup_details.h for use while corrupting. +use constant HEAP_HASNULL => 0x0001; +use constant HEAP_XMAX_LOCK_ONLY => 0x0080; +use constant HEAP_XMIN_COMMITTED => 0x0100; +use constant HEAP_XMIN_INVALID => 0x0200; +use constant HEAP_XMAX_COMMITTED => 0x0400; +use constant HEAP_XMAX_INVALID => 0x0800; +use constant HEAP_NATTS_MASK => 0x07FF; +use constant HEAP_XMAX_IS_MULTI => 0x1000; +use constant HEAP_KEYS_UPDATED => 0x2000; + +use constant FIRST_NORMAL_TRANSACTION_ID => 3; + +# Read page special data +sub read_special_data +{ + my ($fh, $offset) = @_; + my ($buffer, %special); + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + seek($fh, $offset, SEEK_SET) + or BAIL_OUT("seek failed: $!"); + defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("sysread failed: $!"); + + @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer); + %special = ( + pd_xid_base => shift, + pd_multi_base => shift); + return \%special; +} + +# Write page special data +sub write_special_data +{ + my ($fh, $offset, $special) = @_; + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + my $buffer = pack( + HEAPPAGE_SPECIAL_PACK_CODE, + $special->{pd_xid_base}, $special->{pd_multi_base}); + + seek($fh, $offset, SEEK_SET) + or BAIL_OUT("seek failed: $!"); + defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("syswrite failed: $!"); + return; +} # Read a tuple of our table from a heap page. # @@ -97,7 +153,7 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size # sub read_tuple { - my ($fh, $offset) = @_; + my ($fh, $offset, $raw) = @_; my ($buffer, %tup); seek($fh, $offset, SEEK_SET) or BAIL_OUT("seek failed: $!"); @@ -134,6 +190,18 @@ sub read_tuple c_va_toastrelid => shift); # Stitch together the text for column 'b' $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7)); + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + $tup{t_xmin} += $special->{pd_xid_base}; + my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup{t_xmax} += !$is_multi ? + $special->{pd_xid_base} : + $special->{pd_multi_base}; + } + return \%tup; } @@ -149,7 +217,32 @@ sub read_tuple # sub write_tuple { - my ($fh, $offset, $tup) = @_; + my ($fh, $offset, $tup, $raw) = @_; + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + my $xmin = $tup->{t_xmin} - $special->{pd_xid_base}; + die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmin < 3; + $tup->{t_xmin} = $xmin; + + if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0) + { + my $xmax = $tup->{t_xmax} - $special->{pd_xid_base}; + die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + else + { + my $xmax = $tup->{t_xmax} - $special->{pd_multi_base}; + die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + } + my $buffer = pack( HEAPTUPLE_PACK_CODE, $tup->{t_xmin}, $tup->{t_xmax}, @@ -172,6 +265,41 @@ sub write_tuple return; } +# move pd_xid_base and pd_multi_base to more suitable position for tests. +sub fixup_page +{ + my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_; + my $offset = $page * HEAPPAGE_SIZE; + my $special = read_special_data($fh, $offset); + + die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}" + if ($xid_base > $special->{pd_xid_base}); + die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}" + if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0); + return if ($xid_base == $special->{pd_xid_base} && + $multi_base == $special->{pd_multi_base}); + + my $xid_delta = $special->{pd_xid_base} - $xid_base; + my $multi_delta = $special->{pd_multi_base} - $multi_base; + + for my $off (@$lp_off) + { + # change only tuples on this page. + next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE); + + my $tup = read_tuple($fh, $off, 1); + $tup->{t_xmin} += $xid_delta; + my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta; + write_tuple($fh, $off, $tup, 1); + } + + $special->{pd_xid_base} = $xid_base; + $special->{pd_multi_base} = $multi_base; + + write_special_data($fh, $offset, $special); +} + # Set umask so test directories and files are created with default permissions umask(0077); @@ -234,6 +362,10 @@ my $relfrozenxid = $node->safe_psql('postgres', q(select relfrozenxid from pg_class where relname = 'test')); my $datfrozenxid = $node->safe_psql('postgres', q(select datfrozenxid from pg_database where datname = 'postgres')); +my $datminmxid = $node->safe_psql('postgres', + q(select datminmxid from pg_database where datname = 'postgres')); +my $txid_current = $node->safe_psql('postgres', + q(select txid_current())); # Sanity check that our 'test' table has a relfrozenxid newer than the # datfrozenxid for the database, and that the datfrozenxid is greater than the @@ -292,6 +424,11 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) # Determine endianness of current platform from the 1-byte varlena header $ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big"; } + +# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit +# in the past +fixup_page($file, 0, $datfrozenxid - 100, $datminmxid - 100, \@lp_off); + close($file) or BAIL_OUT("close failed: $!"); $node->start; @@ -310,17 +447,6 @@ $node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ], $node->stop; -# Some #define constants from access/htup_details.h for use while corrupting. -use constant HEAP_HASNULL => 0x0001; -use constant HEAP_XMAX_LOCK_ONLY => 0x0080; -use constant HEAP_XMIN_COMMITTED => 0x0100; -use constant HEAP_XMIN_INVALID => 0x0200; -use constant HEAP_XMAX_COMMITTED => 0x0400; -use constant HEAP_XMAX_INVALID => 0x0800; -use constant HEAP_NATTS_MASK => 0x07FF; -use constant HEAP_XMAX_IS_MULTI => 0x1000; -use constant HEAP_KEYS_UPDATED => 0x2000; - # Helper function to generate a regular expression matching the header we # expect verify_heapam() to return given which fields we expect to be non-null. sub header @@ -364,39 +490,39 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) # Expected corruption report push @expected, - qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/; + qr/${header}xmin $xmin precedes relation freeze threshold \d+/; } if ($offnum == 2) { # Corruptly set xmin < datfrozenxid - my $xmin = 3; + my $xmin = $datfrozenxid - 10; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin $xmin precedes oldest valid transaction ID 0:\d+/; + qr/${$header}xmin $xmin precedes oldest valid transaction ID \d+/; } elsif ($offnum == 3) { - # Corruptly set xmin < datfrozenxid, further back, noting circularity - # of xid comparison. For a new cluster with epoch = 0, the corrupt - # xmin will be interpreted as in the future - $tup->{t_xmin} = 4026531839; + # Corruptly set xmin > next transaction id. + my $xmin = $relfrozenxid + 1000000; + $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + qr/${$header}xmin $xmin equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 4) { - # Corruptly set xmax < relminmxid; - $tup->{t_xmax} = 4026531839; + # Corruptly set xmax > next transaction id. + my $xmax = $relfrozenxid + 1000000; + $tup->{t_xmax} = $xmax; $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; push @expected, - qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + qr/${$header}xmax $xmax equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 5) { @@ -499,20 +625,22 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; - $tup->{t_xmax} = 4; + my $xmax = $datminmxid + 1000000; + $tup->{t_xmax} = $xmax; push @expected, - qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/; + qr/${header}multitransaction ID $xmax equals or exceeds next valid multitransaction ID \d+/; } elsif ($offnum == 15) # Last offnum must equal ROWCOUNT { # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; - $tup->{t_xmax} = 4000000000; + my $xmax = $datminmxid - 10; + $tup->{t_xmax} = $xmax; push @expected, - qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; + qr/${header}multitransaction ID $xmax precedes relation minimum multitransaction ID threshold \d+/; } write_tuple($file, $offset, $tup); } diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index f911f98d946..997df822e8e 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -247,28 +247,27 @@ main(int argc, char *argv[]) ControlFile->checkPointCopy.PrevTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); - printf(_("Latest checkpoint's NextXID: %u:%u\n"), - EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), + printf(_("Latest checkpoint's NextXID: " XID_FMT "\n"), XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); - printf(_("Latest checkpoint's NextMultiXactId: %u\n"), + printf(_("Latest checkpoint's NextMultiXactId: " XID_FMT "\n"), ControlFile->checkPointCopy.nextMulti); - printf(_("Latest checkpoint's NextMultiOffset: %u\n"), + printf(_("Latest checkpoint's NextMultiOffset: " XID_FMT "\n"), ControlFile->checkPointCopy.nextMultiOffset); - printf(_("Latest checkpoint's oldestXID: %u\n"), + printf(_("Latest checkpoint's oldestXID: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile->checkPointCopy.oldestXidDB); - printf(_("Latest checkpoint's oldestActiveXID: %u\n"), + printf(_("Latest checkpoint's oldestActiveXID: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestActiveXid); - printf(_("Latest checkpoint's oldestMultiXid: %u\n"), + printf(_("Latest checkpoint's oldestMultiXid: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestMulti); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile->checkPointCopy.oldestMultiDB); - printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"), + printf(_("Latest checkpoint's oldestCommitTsXid:" XID_FMT "\n"), ControlFile->checkPointCopy.oldestCommitTsXid); - printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), + printf(_("Latest checkpoint's newestCommitTsXid:" XID_FMT "\n"), ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b52f3ccda25..44de9924b3b 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -306,6 +306,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, static char *get_synchronized_snapshot(Archive *fout); static void setupDumpWorker(Archive *AHX); static TableInfo *getRootTableInfo(const TableInfo *tbinfo); +static uint64 pg_strtouint64(const char *str, char **endptr, int base); int @@ -2760,7 +2761,7 @@ dumpDatabase(Archive *fout) *datistemplate, *datconnlimit, *tablespace; - uint32 frozenxid, + uint64 frozenxid, minmxid; char *qdatname; @@ -2824,8 +2825,8 @@ dumpDatabase(Archive *fout) encoding = PQgetvalue(res, 0, i_encoding); collate = PQgetvalue(res, 0, i_collate); ctype = PQgetvalue(res, 0, i_ctype); - frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); - minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); + frozenxid = pg_strtouint64(PQgetvalue(res, 0, i_frozenxid), NULL, 0); + minmxid = pg_strtouint64(PQgetvalue(res, 0, i_minmxid), NULL, 0); dbdacl.acl = PQgetvalue(res, 0, i_datacl); dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault); datistemplate = PQgetvalue(res, 0, i_datistemplate); @@ -3010,7 +3011,7 @@ dumpDatabase(Archive *fout) { appendPQExpBufferStr(creaQry, "\n-- For binary upgrade, set datfrozenxid and datminmxid.\n"); appendPQExpBuffer(creaQry, "UPDATE pg_catalog.pg_database\n" - "SET datfrozenxid = '%u', datminmxid = '%u'\n" + "SET datfrozenxid = '" XID_FMT "', datminmxid = '" XID_FMT "'\n" "WHERE datname = ", frozenxid, minmxid); appendStringLiteralAH(creaQry, datname, fout); @@ -3060,10 +3061,10 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n"); appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '%s', relminmxid = '%s'\n" "WHERE oid = %u;\n", - atooid(PQgetvalue(lo_res, 0, i_relfrozenxid)), - atooid(PQgetvalue(lo_res, 0, i_relminmxid)), + (PQgetvalue(lo_res, 0, i_relfrozenxid)), + (PQgetvalue(lo_res, 0, i_relminmxid)), LargeObjectRelationId); ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = "pg_largeobject", @@ -6231,11 +6232,11 @@ getTables(Archive *fout, int *numTables) tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident)); tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0); tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0); - tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid)); - tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid)); + tblinfo[i].frozenxid = pg_strtouint64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0); + tblinfo[i].toast_frozenxid = pg_strtouint64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0); tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid)); - tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid)); - tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid)); + tblinfo[i].minmxid = pg_strtouint64(PQgetvalue(res, i, i_relminmxid), NULL, 0); + tblinfo[i].toast_minmxid = pg_strtouint64(PQgetvalue(res, i, i_toastminmxid), NULL, 0); tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions)); if (PQgetisnull(res, i, i_checkoption)) tblinfo[i].checkoption = NULL; @@ -15310,7 +15311,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) { appendPQExpBufferStr(q, "\n-- For binary upgrade, set heap's relfrozenxid and relminmxid\n"); appendPQExpBuffer(q, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '" XID_FMT "', relminmxid = '" XID_FMT "'\n" "WHERE oid = ", tbinfo->frozenxid, tbinfo->minmxid); appendStringLiteralAH(q, qualrelname, fout); @@ -15324,7 +15325,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) */ appendPQExpBufferStr(q, "\n-- For binary upgrade, set toast's relfrozenxid and relminmxid\n"); appendPQExpBuffer(q, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '" XID_FMT "', relminmxid = '" XID_FMT "'\n" "WHERE oid = '%u';\n", tbinfo->toast_frozenxid, tbinfo->toast_minmxid, tbinfo->toast_oid); @@ -17883,3 +17884,27 @@ appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, if (!res) pg_log_warning("could not parse %s array", "reloptions"); } + +/* + * Copied from src/backend/utils/adt/numutils.c + * + * pg_strtouint64 + * Converts 'str' into an unsigned 64-bit integer. + * + * This has the identical API to strtoul(3), except that it will handle + * 64-bit ints even where "long" is narrower than that. + * + * For the moment it seems sufficient to assume that the platform has + * such a function somewhere; let's not roll our own. + */ +static uint64 +pg_strtouint64(const char *str, char **endptr, int base) +{ +#ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, endptr, base); +#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, endptr, base); +#else + return strtoul(str, endptr, base); +#endif +} diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index f011ace8a80..ed58487ed81 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -298,11 +298,11 @@ typedef struct _tableInfo bool rowsec; /* is row security enabled? */ bool forcerowsec; /* is row security forced? */ bool hasoids; /* does it have OIDs? */ - uint32 frozenxid; /* table's relfrozenxid */ - uint32 minmxid; /* table's relminmxid */ + uint64 frozenxid; /* table's relfrozenxid */ + uint64 minmxid; /* table's relminmxid */ Oid toast_oid; /* toast table's OID, or 0 if none */ - uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */ - uint32 toast_minmxid; /* toast table's relminmxid */ + uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */ + uint64 toast_minmxid; /* toast table's relminmxid */ int ncheck; /* # of CHECK expressions */ Oid reltype; /* OID of table's composite type, if any */ Oid reloftype; /* underlying type for typed table */ diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index c0ab392c3a2..c24b3bebab4 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */ static XLogSegNo newXlogSegNo; /* new XLOG segment # */ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; -static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_oldest_xid = 0; static TransactionId set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; @@ -87,6 +86,7 @@ static void KillExistingXLOG(void); static void KillExistingArchiveStatus(void); static void WriteEmptyXLOG(void); static void usage(void); +static uint64 str2uint64(const char *str, char **endptr, int base); int @@ -95,7 +95,6 @@ main(int argc, char *argv[]) static struct option long_options[] = { {"commit-timestamp-ids", required_argument, NULL, 'c'}, {"pgdata", required_argument, NULL, 'D'}, - {"epoch", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"next-wal-file", required_argument, NULL, 'l'}, {"multixact-ids", required_argument, NULL, 'm'}, @@ -137,7 +136,7 @@ main(int argc, char *argv[]) } - while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1) { switch (c) { @@ -153,27 +152,9 @@ main(int argc, char *argv[]) noupdate = true; break; - case 'e': - errno = 0; - set_xid_epoch = strtoul(optarg, &endptr, 0); - if (endptr == optarg || *endptr != '\0' || errno != 0) - { - /*------ - translator: the second %s is a command line argument (-e, etc) */ - pg_log_error("invalid argument for option %s", "-e"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); - } - if (set_xid_epoch == -1) - { - pg_log_error("transaction ID epoch (-e) must not be -1"); - exit(1); - } - break; - case 'u': errno = 0; - set_oldest_xid = strtoul(optarg, &endptr, 0); + set_oldest_xid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-u"); @@ -182,14 +163,14 @@ main(int argc, char *argv[]) } if (!TransactionIdIsNormal(set_oldest_xid)) { - pg_log_error("oldest transaction ID (-u) must be greater than or equal to %u", FirstNormalTransactionId); + pg_log_error("oldest transaction ID (-u) must be greater than or equal to " XID_FMT, FirstNormalTransactionId); exit(1); } break; case 'x': errno = 0; - set_xid = strtoul(optarg, &endptr, 0); + set_xid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-x"); @@ -198,21 +179,21 @@ main(int argc, char *argv[]) } if (!TransactionIdIsNormal(set_xid)) { - pg_log_error("transaction ID (-x) must be greater than or equal to %u", FirstNormalTransactionId); + pg_log_error("transaction ID (-x) must be greater than or equal to " XID_FMT, FirstNormalTransactionId); exit(1); } break; case 'c': errno = 0; - set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0); + set_oldest_commit_ts_xid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } - set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0); + set_newest_commit_ts_xid = str2uint64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); @@ -253,7 +234,7 @@ main(int argc, char *argv[]) case 'm': errno = 0; - set_mxid = strtoul(optarg, &endptr, 0); + set_mxid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -261,7 +242,7 @@ main(int argc, char *argv[]) exit(1); } - set_oldestmxid = strtoul(endptr + 1, &endptr2, 0); + set_oldestmxid = str2uint64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -287,7 +268,7 @@ main(int argc, char *argv[]) case 'O': errno = 0; - set_mxoff = strtoul(optarg, &endptr, 0); + set_mxoff = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-O"); @@ -448,11 +429,6 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ - if (set_xid_epoch != -1) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(set_xid_epoch, - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - if (set_oldest_xid != 0) { ControlFile.checkPointCopy.oldestXid = set_oldest_xid; @@ -460,9 +436,7 @@ main(int argc, char *argv[]) } if (set_xid != 0) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - set_xid); + ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid); if (set_oldest_commit_ts_xid != 0) ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid; @@ -702,7 +676,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.PrevTimeLineID = 1; ControlFile.checkPointCopy.fullPageWrites = false; ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + FullTransactionIdFromXid(FirstNormalTransactionId); ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; ControlFile.checkPointCopy.nextMultiOffset = 0; @@ -753,10 +727,17 @@ GuessControlValues(void) * * NB: this display should be just those fields that will not be * reset by RewriteControlFile(). + * + * Special macros help to make translatable strings. */ static void PrintControlValues(bool guessed) { + char xid_str[32]; + +#define FORMAT_XID(val) \ + (snprintf(xid_str, sizeof(xid_str), XID_FMT, (val)), xid_str) + if (guessed) printf(_("Guessed pg_control values:\n\n")); else @@ -772,29 +753,28 @@ PrintControlValues(bool guessed) ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); - printf(_("Latest checkpoint's NextXID: %u:%u\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); + printf(_("Latest checkpoint's NextXID: %s\n"), + FORMAT_XID(XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid))); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); - printf(_("Latest checkpoint's NextMultiXactId: %u\n"), - ControlFile.checkPointCopy.nextMulti); - printf(_("Latest checkpoint's NextMultiOffset: %u\n"), + printf(_("Latest checkpoint's NextMultiXactId: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.nextMulti)); + printf(_("Latest checkpoint's NextMultiOffset: " INT64_FORMAT "\n"), ControlFile.checkPointCopy.nextMultiOffset); - printf(_("Latest checkpoint's oldestXID: %u\n"), - ControlFile.checkPointCopy.oldestXid); + printf(_("Latest checkpoint's oldestXID: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestXid)); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); - printf(_("Latest checkpoint's oldestActiveXID: %u\n"), - ControlFile.checkPointCopy.oldestActiveXid); - printf(_("Latest checkpoint's oldestMultiXid: %u\n"), - ControlFile.checkPointCopy.oldestMulti); + printf(_("Latest checkpoint's oldestActiveXID: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestActiveXid)); + printf(_("Latest checkpoint's oldestMultiXid: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestMulti)); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); - printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"), - ControlFile.checkPointCopy.oldestCommitTsXid); - printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), - ControlFile.checkPointCopy.newestCommitTsXid); + printf(_("Latest checkpoint's oldestCommitTsXid:%s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestCommitTsXid)); + printf(_("Latest checkpoint's newestCommitTsXid:%s\n"), + FORMAT_XID(ControlFile.checkPointCopy.newestCommitTsXid)); printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); /* we don't print floatFormat since can't say much useful about it */ @@ -830,6 +810,7 @@ PrintControlValues(bool guessed) static void PrintNewControlValues(void) { + char xid_str[32]; char fname[MAXFNAMELEN]; /* This will be always printed in order to keep format same. */ @@ -841,18 +822,18 @@ PrintNewControlValues(void) if (set_mxid != 0) { - printf(_("NextMultiXactId: %u\n"), - ControlFile.checkPointCopy.nextMulti); - printf(_("OldestMultiXid: %u\n"), - ControlFile.checkPointCopy.oldestMulti); + printf(_("NextMultiXactId: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.nextMulti)); + printf(_("OldestMultiXid: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestMulti)); printf(_("OldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); } if (set_mxoff != -1) { - printf(_("NextMultiOffset: %u\n"), - ControlFile.checkPointCopy.nextMultiOffset); + printf(_("NextMultiOffset: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.nextMultiOffset)); } if (set_oid != 0) @@ -863,29 +844,23 @@ PrintNewControlValues(void) if (set_xid != 0) { - printf(_("NextXID: %u\n"), - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - printf(_("OldestXID: %u\n"), - ControlFile.checkPointCopy.oldestXid); + printf(_("NextXID: %s\n"), + FORMAT_XID(XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid))); + printf(_("OldestXID: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestXid)); printf(_("OldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); } - if (set_xid_epoch != -1) - { - printf(_("NextXID epoch: %u\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - } - if (set_oldest_commit_ts_xid != 0) { - printf(_("oldestCommitTsXid: %u\n"), - ControlFile.checkPointCopy.oldestCommitTsXid); + printf(_("oldestCommitTsXid: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.oldestCommitTsXid)); } if (set_newest_commit_ts_xid != 0) { - printf(_("newestCommitTsXid: %u\n"), - ControlFile.checkPointCopy.newestCommitTsXid); + printf(_("newestCommitTsXid: %s\n"), + FORMAT_XID(ControlFile.checkPointCopy.newestCommitTsXid)); } if (set_wal_segsize != 0) @@ -1228,7 +1203,6 @@ usage(void) " set oldest and newest transactions bearing\n" " commit timestamp (zero means no change)\n")); printf(_(" [-D, --pgdata=]DATADIR data directory\n")); - printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n")); printf(_(" -f, --force force update to be done\n")); printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n")); printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n")); @@ -1243,3 +1217,20 @@ usage(void) printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); } + +/* + * str2uint64() + * + * convert string to 64-bit unsigned int + */ +static uint64 +str2uint64(const char *str, char **endptr, int base) +{ +#ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, endptr, base); +#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, endptr, base); +#else + return strtoul(str, endptr, base); +#endif +} diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index 44d06be5a61..77bf5fc955d 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -20,6 +20,7 @@ OBJS = \ parallel.o \ pg_upgrade.o \ relfilenode.o \ + segresize.o \ server.o \ tablespace.o \ util.o \ diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index bc5fbd93c6c..597a832731e 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -31,6 +31,7 @@ static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(ClusterInfo *new_cluster); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); static char *get_canonical_locale_name(int category, const char *locale); +static void check_for_32bit_xid_usage(ClusterInfo *cluster); /* @@ -159,6 +160,17 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903) old_9_3_check_for_line_data_type_usage(&old_cluster); + /* Prepare for 64bit xid */ + if (!ALREADY_64bit_XID(old_cluster)) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -233,6 +245,17 @@ issue_warnings_and_set_wal_level(void) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) old_9_6_invalidate_hash_indexes(&new_cluster, false); + /* Raindex for 64bit xid */ + if (!ALREADY_64bit_XID(old_cluster)) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + report_extension_updates(&new_cluster); stop_postmaster(false); @@ -1323,3 +1346,94 @@ get_canonical_locale_name(int category, const char *locale) return res; } + +/* + * check_for_32bit_xid_usage() + * + * Postgres Pro Enterprise changes xid storage format to 64-bit. Check if + * xid type is used in tables. + */ +static void +check_for_32bit_xid_usage(ClusterInfo *cluster) +{ + int dbnum; + FILE *script = NULL; + bool found = false; + char output_path[MAXPGPATH]; + + prep_status("Checking for incompatible \"xid\" data type"); + + snprintf(output_path, sizeof(output_path), "tables_using_xid.txt"); + + for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + PGresult *res; + bool db_used = false; + int ntups; + int rowno; + int i_nspname, + i_relname, + i_attname; + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + /* + * While several relkinds don't store any data, e.g. views, they can + * be used to define data types of other columns, so we check all + * relkinds. + */ + res = executeQueryOrDie(conn, + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " a.attnum >= 1 AND " + " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname NOT IN ('pg_catalog', 'information_schema')"); + + ntups = PQntuples(res); + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + for (rowno = 0; rowno < ntups; rowno++) + { + found = true; + if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %s\n", + output_path, strerror(errno)); + if (!db_used) + { + fprintf(script, "Database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + PQclear(res); + + PQfinish(conn); + } + + if (script) + fclose(script); + + if (found) + { + pg_log(PG_REPORT, "fatal\n"); + pg_fatal("Your installation contains the \"xid\" data type in user tables.\n" + "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n" + "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n" + "You can remove the problem tables and restart the upgrade.\n" + "A list of the problem columns is in the file:\n" + " %s\n\n", output_path); + } + else + check_ok(); +} \ No newline at end of file diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index a4b6375403a..c5b6b198342 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -269,9 +269,11 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtepoch = str2uint(p); + cluster->controldata.chkpnt_nxtxid = str2uint64(p); /* + * Try to read 32-bit XID format 'epoch:xid'. + * * Delimiter changed from '/' to ':' in 9.6. We don't test for * the catalog version of the change because the catalog version * is pulled from pg_controldata too, and it isn't worth adding an @@ -284,11 +286,19 @@ get_control_data(ClusterInfo *cluster, bool live_check) else p = NULL; - if (p == NULL || strlen(p) <= 1) - pg_fatal("%d: controldata retrieval problem\n", __LINE__); + if (p) + { + /* Read lowest 32 bits of xid, epoch was read previously */ + if (strlen(p) <= 1) + pg_fatal("%d: controldata retrieval problem\n", __LINE__); + + p++; /* remove '/' or ':' char */ + + Assert((cluster->controldata.chkpnt_nxtxid >> 32) == 0); + cluster->controldata.chkpnt_nxtxid <<= 32; + cluster->controldata.chkpnt_nxtxid |= str2uint(p); + } - p++; /* remove '/' or ':' char */ - cluster->controldata.chkpnt_nxtxid = str2uint(p); got_xid = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextOID:")) != NULL) @@ -310,7 +320,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmulti = str2uint(p); + cluster->controldata.chkpnt_nxtmulti = str2uint64(p); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL) @@ -321,7 +331,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstxid = str2uint(p); + cluster->controldata.chkpnt_oldstxid = str2uint64(p); got_oldestxid = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) @@ -332,7 +342,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstMulti = str2uint(p); + cluster->controldata.chkpnt_oldstMulti = str2uint64(p); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) @@ -343,7 +353,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmxoff = str2uint(p); + cluster->controldata.chkpnt_nxtmxoff = str2uint64(p); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 1b34ee09fa6..0dafbde9abd 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -25,6 +25,7 @@ #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "storage/fsm_internals.h" /* @@ -152,6 +153,112 @@ linkFile(const char *src, const char *dst, schemaName, relName, src, dst, strerror(errno)); } +/* Context for file rewriting */ +typedef struct FileRewriteContext +{ + const char *fromfile; + const char *tofile; + const char *schemaName; + const char *relName; + int src_fd; + int dst_fd; + ssize_t src_filesize; + ssize_t totalBytesRead; + BlockNumber last_blkno; + bool old_lastblk; +} FileRewriteContext; + +/* Initialize context for file rewriting */ +static void +rewriteFileInit(FileRewriteContext *cxt, + const char *fromfile, const char *tofile, + const char *schemaName, const char *relName) +{ + struct stat statbuf; + + cxt->fromfile = fromfile; + cxt->tofile = tofile; + cxt->schemaName = schemaName; + cxt->relName = relName; + cxt->totalBytesRead = 0; + cxt->last_blkno = InvalidBlockNumber; + cxt->old_lastblk = false; + + /* Open old and new files */ + if ((cxt->src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", + schemaName, relName, fromfile, strerror(errno)); + + if (fstat(cxt->src_fd, &statbuf) != 0) + pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n", + schemaName, relName, fromfile, strerror(errno)); + + if ((cxt->dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", + schemaName, relName, tofile, strerror(errno)); + + /* Save old file size */ + cxt->src_filesize = statbuf.st_size; +} + +/* Clean up file rewriting context */ +static void +rewriteFileCleanup(FileRewriteContext *cxt) +{ + close(cxt->dst_fd); + close(cxt->src_fd); +} + +/* Read old page of the rewritten file */ +static ssize_t +rewriteFileReadPage(FileRewriteContext *cxt, Page page) +{ + ssize_t bytesRead; + + if (cxt->totalBytesRead >= cxt->src_filesize) + return 0; + + if ((bytesRead = read(cxt->src_fd, page, BLCKSZ)) != BLCKSZ) + { + if (bytesRead < 0) + pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", + cxt->schemaName, cxt->relName, cxt->fromfile, strerror(errno)); + else + pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n", + cxt->schemaName, cxt->relName, cxt->fromfile); + } + + cxt->totalBytesRead += BLCKSZ; + cxt->old_lastblk = (cxt->totalBytesRead == cxt->src_filesize); + + return bytesRead; +} + +/* Write new page of the rewritten file */ +static void +rewriteFileWritePage(FileRewriteContext *cxt, Page page, BlockNumber blkno) +{ + /* Set new checksum for page, if enabled */ + if (new_cluster.controldata.data_checksum_version != 0) + ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno); + + /* Write page */ + errno = 0; + + if ((blkno != (cxt->last_blkno == InvalidBlockNumber ? 0 : cxt->last_blkno + 1) && + lseek(cxt->dst_fd, (off_t) BLCKSZ * blkno, SEEK_SET) != (off_t) BLCKSZ * blkno) || + write(cxt->dst_fd, page, BLCKSZ) != BLCKSZ) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", + cxt->schemaName, cxt->relName, cxt->tofile, strerror(errno)); + } + + cxt->last_blkno = blkno; +} /* * rewriteVisibilityMap() @@ -171,36 +278,19 @@ linkFile(const char *src, const char *dst, */ void rewriteVisibilityMap(const char *fromfile, const char *tofile, - const char *schemaName, const char *relName) + const char *schemaName, const char *relName) { - int src_fd; - int dst_fd; + FileRewriteContext cxt; PGAlignedBlock buffer; PGAlignedBlock new_vmbuf; - ssize_t totalBytesRead = 0; - ssize_t src_filesize; int rewriteVmBytesPerPage; BlockNumber new_blkno = 0; - struct stat statbuf; + ssize_t bytesRead; /* Compute number of old-format bytes per new page */ rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; - if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) - pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - - if (fstat(src_fd, &statbuf) != 0) - pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - - if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, - pg_file_create_mode)) < 0) - pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", - schemaName, relName, tofile, strerror(errno)); - - /* Save old file size */ - src_filesize = statbuf.st_size; + rewriteFileInit(&cxt, fromfile, tofile, schemaName, relName); /* * Turn each visibility map page into 2 pages one by one. Each new page @@ -208,27 +298,12 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, * last page is empty, we skip it, mostly to avoid turning one-page * visibility maps for small relations into two pages needlessly. */ - while (totalBytesRead < src_filesize) + while ((bytesRead = rewriteFileReadPage(&cxt, buffer.data)) > 0) { - ssize_t bytesRead; char *old_cur; char *old_break; char *old_blkend; PageHeaderData pageheader; - bool old_lastblk; - - if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ) - { - if (bytesRead < 0) - pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - else - pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n", - schemaName, relName, fromfile); - } - - totalBytesRead += BLCKSZ; - old_lastblk = (totalBytesRead == src_filesize); /* Save the page header data */ memcpy(&pageheader, buffer.data, SizeOfPageHeaderData); @@ -253,7 +328,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData); /* Rewriting the last part of the last old page? */ - old_lastpart = old_lastblk && (old_break == old_blkend); + old_lastpart = cxt.old_lastblk && (old_break == old_blkend); new_cur = new_vmbuf.data + SizeOfPageHeaderData; @@ -287,20 +362,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, if (old_lastpart && empty) break; - /* Set new checksum for visibility map page, if enabled */ - if (new_cluster.controldata.data_checksum_version != 0) - ((PageHeader) new_vmbuf.data)->pd_checksum = - pg_checksum_page(new_vmbuf.data, new_blkno); - - errno = 0; - if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", - schemaName, relName, tofile, strerror(errno)); - } + rewriteFileWritePage(&cxt, new_vmbuf.data, new_blkno); /* Advance for next new page */ old_break += rewriteVmBytesPerPage; @@ -308,9 +370,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, } } - /* Clean up */ - close(dst_fd); - close(src_fd); + rewriteFileCleanup(&cxt); } void @@ -371,4 +431,4 @@ check_hard_link(void) strerror(errno)); unlink(new_link_file); -} +} \ No newline at end of file diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index f85cb2e2620..5299fa9a1a5 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -41,6 +41,9 @@ #include #endif +#include "access/multixact.h" +#include "access/transam.h" +#include "access/xlog_internal.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -260,7 +263,6 @@ setup(char *argv0, bool *live_check) } } - static void prepare_new_cluster(void) { @@ -403,11 +405,10 @@ create_new_objects(void) check_ok(); /* - * We don't have minmxids for databases or relations in pre-9.3 clusters, - * so set those after we have restored the schema. + * Refix datfrozenxid and datminmxid */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902) - set_frozenxids(true); + if (ALREADY_64bit_XID(old_cluster) != ALREADY_64bit_XID(new_cluster)) + set_frozenxids(false); /* update new_cluster info now that we have objects in the databases */ get_db_and_rel_infos(&new_cluster); @@ -461,18 +462,36 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) static void copy_xact_xlog_xid(void) { - /* - * Copy old commit logs to new data dir. pg_clog has been renamed to - * pg_xact in post-10 clusters. - */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); + TransactionId next_xid; + +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + + /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */ + next_xid = ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster) ? + old_cluster.controldata.chkpnt_nxtxid : + FirstUpgradedTransactionId; + + if (ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster)) + { + /* + * Copy old commit logs to new data dir. pg_clog has been renamed to + * pg_xact in post-10 clusters. + */ + copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster)); + } + else + { + /* Convert commit logs and copy to the new data dir */ + prep_status("Transforming commit log segments"); + convert_clog(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)), + psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster))); + check_ok(); + } prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/pg_resetwal\" -f -u %u \"%s\"", + "\"%s/pg_resetwal\" -f -u " XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_oldstxid, new_cluster.pgdata); check_ok(); @@ -480,19 +499,21 @@ copy_xact_xlog_xid(void) /* set the next transaction id and epoch of the new cluster */ prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/pg_resetwal\" -f -x %u \"%s\"", - new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, + "\"%s/pg_resetwal\" -f -x " XID_FMT " \"%s\"", + new_cluster.bindir, next_xid, new_cluster.pgdata); +#ifdef NOT_USED exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); +#endif /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/pg_resetwal\" -f -c %u,%u \"%s\"", + "\"%s/pg_resetwal\" -f -c " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, - old_cluster.controldata.chkpnt_nxtxid, - old_cluster.controldata.chkpnt_nxtxid, + next_xid, + next_xid, new_cluster.pgdata); check_ok(); @@ -505,8 +526,45 @@ copy_xact_xlog_xid(void) if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER && new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) { - copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); - copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff; + + if (ALREADY_64bit_XID(old_cluster)) + { + copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); + copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + } + else + { + MultiXactOffset oldest_mxoff; + + remove_new_subdir("pg_multixact/offsets", false); + oldest_mxoff = convert_multixact_offsets("pg_multixact/offsets", "pg_multixact/offsets"); + + remove_new_subdir("pg_multixact/members", false); + convert_multixact_members("pg_multixact/members", "pg_multixact/members", oldest_mxoff); + + /* + * Handle wraparound if we're upgrading from 32 bit postgres. + * Invalid 0 mxids/offsets are skipped, so 1 becomes 2^32. + */ + if (oldest_mxoff) + { + if (next_mxid < oldest_mxid) + next_mxid += ((uint64) 1 << 32) - FirstMultiXactId; + + if (next_mxoff < oldest_mxoff) + next_mxoff += ((uint64) 1 << 32) - 1; + + /* Offsets and members were rewritten, oldest_mxoff = 1 */ + next_mxoff -= oldest_mxoff - 1; + oldest_mxoff = 1; + + /* Save converted next_mxid for possible usage in set_frozenxids() */ + old_cluster.controldata.chkpnt_nxtmulti = next_mxid; + } + } prep_status("Setting next multixact ID and offset for new cluster"); @@ -515,11 +573,9 @@ copy_xact_xlog_xid(void) * counters here and the oldest multi present on system. */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"", + "\"%s/pg_resetwal\" -O " XID_FMT " -m " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, - old_cluster.controldata.chkpnt_nxtmxoff, - old_cluster.controldata.chkpnt_nxtmulti, - old_cluster.controldata.chkpnt_oldstMulti, + next_mxoff, next_mxid, oldest_mxid, new_cluster.pgdata); check_ok(); } @@ -543,7 +599,7 @@ copy_xact_xlog_xid(void) * next=MaxMultiXactId, but multixact.c can cope with that just fine. */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/pg_resetwal\" -m %u,%u \"%s\"", + "\"%s/pg_resetwal\" -m " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtmulti + 1, old_cluster.controldata.chkpnt_nxtmulti, @@ -593,6 +649,8 @@ set_frozenxids(bool minmxid_only) int ntups; int i_datname; int i_datallowconn; + TransactionId frozen_xid; + MultiXactId minmxid; if (!minmxid_only) prep_status("Setting frozenxid and minmxid counters in new cluster"); @@ -601,18 +659,24 @@ set_frozenxids(bool minmxid_only) conn_template1 = connectToServer(&new_cluster, "template1"); + frozen_xid = ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster) ? + old_cluster.controldata.chkpnt_nxtxid : + FirstNormalTransactionId; + + minmxid = old_cluster.controldata.chkpnt_nxtmulti; + if (!minmxid_only) /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " - "SET datfrozenxid = '%u'", - old_cluster.controldata.chkpnt_nxtxid)); + "SET datfrozenxid = '" XID_FMT "'", + frozen_xid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " - "SET datminmxid = '%u'", - old_cluster.controldata.chkpnt_nxtmulti)); + "SET datminmxid = '" XID_FMT "'", + minmxid)); /* get database names */ dbres = executeQueryOrDie(conn_template1, @@ -646,24 +710,24 @@ set_frozenxids(bool minmxid_only) /* set pg_class.relfrozenxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " - "SET relfrozenxid = '%u' " + "SET relfrozenxid = '" XID_FMT "' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - old_cluster.controldata.chkpnt_nxtxid)); + frozen_xid)); /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " - "SET relminmxid = '%u' " + "SET relminmxid = '" XID_FMT "' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - old_cluster.controldata.chkpnt_nxtmulti)); + minmxid)); PQfinish(conn); /* Reset datallowconn flag */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 22169f10021..a93dfe72c7a 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -21,6 +21,7 @@ #define MESSAGE_WIDTH 60 #define GET_MAJOR_VERSION(v) ((v) / 100) +#define ALREADY_64bit_XID(cluster) (GET_MAJOR_VERSION((cluster).major_version) >= 1500) /* contains both global db information and CREATE DATABASE commands */ #define GLOBALS_DUMP_FILE "pg_upgrade_dump_globals.sql" @@ -190,13 +191,13 @@ typedef struct uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; - uint32 chkpnt_nxtxid; - uint32 chkpnt_nxtepoch; + uint64 chkpnt_nxtxid; + uint32 chkpnt_nxtepoch; /* for 32bit xids only */ uint32 chkpnt_nxtoid; - uint32 chkpnt_nxtmulti; - uint32 chkpnt_nxtmxoff; - uint32 chkpnt_oldstMulti; - uint32 chkpnt_oldstxid; + uint64 chkpnt_nxtmulti; + uint64 chkpnt_nxtmxoff; + uint64 chkpnt_oldstMulti; + uint64 chkpnt_oldstxid; uint32 align; uint32 blocksz; uint32 largesz; @@ -426,6 +427,7 @@ void end_progress_output(void); void prep_status(const char *fmt,...) pg_attribute_printf(1, 2); void check_ok(void); unsigned int str2uint(const char *str); +uint64 str2uint64(const char *str); /* version.c */ @@ -444,6 +446,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void report_extension_updates(ClusterInfo *cluster); +void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode); + /* parallel.c */ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); @@ -451,3 +457,9 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr char *old_pgdata, char *new_pgdata, char *old_tablespace); bool reap_child(bool wait_for_child); + +/* segresize.c */ +void convert_clog(const char *olddir, const char *newdir); +MultiXactOffset convert_multixact_offsets(const char *olddir, const char *newdir); +void convert_multixact_members(const char *olddir, const char *newdir, + MultiXactOffset oldest_mxoff); \ No newline at end of file diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c new file mode 100644 index 00000000000..dc3eb1f5915 --- /dev/null +++ b/src/bin/pg_upgrade/segresize.c @@ -0,0 +1,571 @@ +/* + * segresize.c + * Segment resize utility + * + * Copyright (c) 2015-2016, Postgres Professional + * src/bin/pg_upgrade/segresize.c + */ + + +#include "postgres_fe.h" + +#include "pg_upgrade.h" +#include "access/multixact.h" +#include "access/transam.h" + + +#define OldFileName(path, seg) \ + psprintf("%s/%04X", path, seg) + +#define NewFileName(path, seg) \ + psprintf("%s/%04X%08X", path, \ + (uint32) ((seg) >> 32), (uint32) ((seg) & (int64)0xFFFFFFFF)) + +#define SLRU_PAGES_PER_SEGMENT_OLD 32 +#define SLRU_PAGES_PER_SEGMENT_NEW 2048 /* XXX SLRU_PAGES_PER_SEGMENT */ + + +static FILE* +create_target_file(const char *dir, int64 segno, char **pfn) +{ + char *fn; + FILE *filedesc; + + fn = NewFileName(dir, segno); + filedesc = fopen(fn, "wb"); + + if (!filedesc) + pg_fatal("Cannot create file: %s", fn); + + if (pfn) + { + if (*pfn) + pfree(*pfn); + *pfn = fn; + } + else + pfree(fn); + + return filedesc; +} + +typedef struct SLRUSegmentState +{ + const char *dir; + char *fn; + FILE *file; + uint64 segno; + uint64 pageno; + bool leading_gap; +} SLRUSegmentState; + +static void +close_segment(SLRUSegmentState *state) +{ + if (state->file != NULL) + { + fclose(state->file); + state->file = NULL; + } + + if (state->fn) + { + pfree(state->fn); + state->fn = NULL; + } +} + +static int +read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty) +{ + size_t len; + + /* Open next segment file, if needed */ + if (!state->fn) + { + if (!state->segno) + state->leading_gap = true; + state->fn = OldFileName(state->dir, (uint32) state->segno); + state->file = fopen(state->fn, "rb"); + + /* Set position to the needed page */ + if (state->file && state->pageno > 0) + { + if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET)) + { + fclose(state->file); + state->file = NULL; + } + } + } + + if (state->file) + { + /* Segment file do exists, read page from it */ + state->leading_gap = false; + + len = fread(buf, sizeof(char), BLCKSZ, state->file); + + /* Are we done or was there an error? */ + if (len <= 0) + { + if (ferror(state->file)) + pg_fatal("Error reading file: %s", state->fn); + + if (feof(state->file)) + { + *is_empty = true; + len = -1; + fclose(state->file); + state->file = NULL; + } + } + else + *is_empty = false; + } + else if (!state->leading_gap) + { + /* We reached the last segment */ + len = -1; + *is_empty = true; + } + else + { + /* Skip few first segments if they were frozen and removed */ + len = BLCKSZ; + *is_empty = true; + } + + state->pageno++; + + if (state->pageno >= SLRU_PAGES_PER_SEGMENT_OLD) + { + /* Start new segment */ + state->segno++; + state->pageno = 0; + close_segment(state); + } + + return (int) len; +} + +static void +write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty) +{ + /* + * Create a new segment file if we still didn't. Creation is + * postponed until the first non-empty page is found. This helps + * not to create completely empty segments. + */ + if (!state->file && !is_empty) + { + state->file = create_target_file(state->dir, state->segno, &state->fn); + + /* Write zeroes to the previously skipped prefix */ + if (state->pageno > 0) + { + char zerobuf[BLCKSZ] = {0}; + + for (int64 i = 0; i < state->pageno; i++) + { + if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("Could not write file: %s", state->fn); + } + } + } + + /* Write page to the new segment (if it was created) */ + if (state->file) + { + if (is_empty) + memset(buf, 0, BLCKSZ); + + if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("Could not write file: %s", state->fn); + } + + state->pageno++; + + /* + * Did we reach the maximum page number? Then close segment file + * and create a new one on the next iteration + */ + if (state->pageno >= SLRU_PAGES_PER_SEGMENT_NEW) + { + state->segno++; + state->pageno = 0; + close_segment(state); + } +} + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) + +#define MaxTransactionIdOld ((TransactionId) 0xFFFFFFFF) + +/* + * Convert pg_xact segments. + */ +void +convert_clog(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid; + TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid; + TransactionId xid; + uint64 pageno; + char buf[BLCKSZ] = {0}; + + oldseg.dir = old_subdir; + newseg.dir = new_subdir; + + pageno = oldest_xid / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_NEW; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_NEW; + + if (next_xid < oldest_xid) + next_xid += FirstUpgradedTransactionId; /* wraparound */ + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + int len; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionIdOld / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD) { + pageno = (MaxTransactionIdOld + 1) / CLOG_XACTS_PER_PAGE; + + Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(!oldseg.pageno); + Assert(!oldseg.file && !oldseg.fn); + oldseg.segno = 0; + + Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_NEW); + Assert(!newseg.pageno); + Assert(!newseg.file); + newseg.segno = 0; + } + + len = read_old_segment_page(&oldseg, buf, &is_empty); + + /* + * Ignore read errors, copy all existing segment pages in the + * interesting xid range. + */ + is_empty |= len <= 0; + + if (!is_empty && len < BLCKSZ) + memset(&buf[len], 0, BLCKSZ - len); + + write_new_segment_page(&newseg, buf, is_empty); + } + + /* Release resources */ + close_segment(&oldseg); + close_segment(&newseg); +} + +typedef uint32 MultiXactIdOld; +typedef uint64 MultiXactIdNew; + +typedef uint32 MultiXactOffsetOld; +typedef uint64 MultiXactOffsetNew; + +#define MaxMultiXactIdOld ((MultiXactIdOld) 0xFFFFFFFF) +#define MaxMultiXactOffsetOld ((MultiXactOffsetOld) 0xFFFFFFFF) + +#define MXACT_OFFSETS_PER_BLOCK_OLD (BLCKSZ / sizeof(MultiXactOffsetOld)) +#define MXACT_OFFSETS_PER_BLOCK_NEW (BLCKSZ / sizeof(MultiXactOffsetNew)) + +/* + * Convert pg_multixact/offsets segments and return oldest mxid offset. + */ +MultiXactOffsetNew +convert_multixact_offsets(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + MultiXactOffsetOld oldbuf[MXACT_OFFSETS_PER_BLOCK_OLD] = {0}; + MultiXactOffsetNew newbuf[MXACT_OFFSETS_PER_BLOCK_NEW] = {0}; + MultiXactOffsetOld oldest_mxoff = 0; + MultiXactId oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + MultiXactId next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + MultiXactId mxid; + uint64 old_entry; + uint64 new_entry; + bool oldest_mxoff_known = false; + + oldseg.dir = psprintf("%s/%s", old_cluster.pgdata, old_subdir); + newseg.dir = psprintf("%s/%s", new_cluster.pgdata, new_subdir); + + old_entry = oldest_mxid % MXACT_OFFSETS_PER_BLOCK_OLD; + oldseg.pageno = oldest_mxid / MXACT_OFFSETS_PER_BLOCK_OLD; + oldseg.segno = oldseg.pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno %= SLRU_PAGES_PER_SEGMENT_OLD; + + new_entry = oldest_mxid % MXACT_OFFSETS_PER_BLOCK_NEW; + newseg.pageno = oldest_mxid / MXACT_OFFSETS_PER_BLOCK_NEW; + newseg.segno = newseg.pageno / SLRU_PAGES_PER_SEGMENT_NEW; + newseg.pageno %= SLRU_PAGES_PER_SEGMENT_NEW; + + if (next_mxid < oldest_mxid) + next_mxid += (uint64) 1 << 32; /* wraparound */ + + prep_status("Converting old %s to new format", old_subdir); + + /* Copy mxid offsets reading only needed segment pages */ + for (mxid = oldest_mxid; mxid < next_mxid; old_entry = 0) + { + int oldlen; + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxMultiXactIdOld / MXACT_OFFSETS_PER_BLOCK_OLD / SLRU_PAGES_PER_SEGMENT_OLD) /* 0xFFFF */ + oldseg.segno = 0; + + oldlen = read_old_segment_page(&oldseg, oldbuf, &is_empty); + + if (oldlen <= 0 || is_empty) + { + char pageno_str[32]; + + snprintf(pageno_str, sizeof(pageno_str), UINT64_FORMAT, oldseg.pageno); + pg_fatal("Cannot read page %s from segment: %s\n", + pageno_str, oldseg.fn); + } + + if (oldlen < BLCKSZ) + memset((char *) oldbuf + oldlen, 0, BLCKSZ - oldlen); + + /* Save oldest mxid offset */ + if (!oldest_mxoff_known) + { + oldest_mxoff = oldbuf[old_entry]; + oldest_mxoff_known = true; + } + + /* Skip wrapped-around invalid MultiXactIds */ + if (mxid == (MultiXactId) 1 << 32) + { + Assert(oldseg.segno == 0); + Assert(oldseg.pageno == 1); + Assert(old_entry == 0); + mxid += FirstMultiXactId; + old_entry = FirstMultiXactId; + } + + /* Copy entries to the new page */ + for (; mxid < next_mxid && old_entry < MXACT_OFFSETS_PER_BLOCK_OLD; + mxid++, old_entry++) + { + MultiXactOffsetNew mxoff = oldbuf[old_entry]; + + /* Handle possible offset wraparound (1 becomes 2^32) */ + if (mxoff < oldest_mxoff) + mxoff += ((uint64) 1 << 32) - 1; + + /* Subtract oldest_mxoff, so new offsets will start from 1 */ + newbuf[new_entry++] = mxoff - oldest_mxoff + 1; + + if (new_entry >= MXACT_OFFSETS_PER_BLOCK_NEW) + { + /* Write new page */ + write_new_segment_page(&newseg, newbuf, false); + new_entry = 0; + } + } + } + + /* Write the last incomplete page */ + if (new_entry > 0 || oldest_mxid == next_mxid) + { + memset(&newbuf[new_entry], 0, + sizeof(newbuf[0]) * (MXACT_OFFSETS_PER_BLOCK_NEW - new_entry)); + write_new_segment_page(&newseg, newbuf, false); + } + + /* Use next_mxoff as oldest_mxoff, if oldest_mxid == next_mxid */ + if (!oldest_mxoff_known) + { + Assert(oldest_mxid == next_mxid); + oldest_mxoff = (MultiXactOffsetNew) old_cluster.controldata.chkpnt_nxtmxoff; + } + + /* Release resources */ + close_segment(&oldseg); + close_segment(&newseg); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); + + return oldest_mxoff; +} + +typedef uint32 TransactionIdOld; +typedef uint64 TransactionIdNew; + +#define MXACT_MEMBERS_FLAG_BYTES 1 + +#define MXACT_MEMBERS_PER_GROUP_OLD 4 +#define MXACT_MEMBERS_GROUP_SIZE_OLD (MXACT_MEMBERS_PER_GROUP_OLD * (sizeof(TransactionIdOld) + MXACT_MEMBERS_FLAG_BYTES)) +#define MXACT_MEMBER_GROUPS_PER_PAGE_OLD (BLCKSZ / MXACT_MEMBERS_GROUP_SIZE_OLD) +#define MXACT_MEMBERS_PER_PAGE_OLD (MXACT_MEMBERS_PER_GROUP_OLD * MXACT_MEMBER_GROUPS_PER_PAGE_OLD) +#define MXACT_MEMBER_FLAG_BYTES_PER_GROUP_OLD MXACT_MEMBERS_FLAG_BYTES * MXACT_MEMBERS_PER_GROUP_OLD + +#define MXACT_MEMBERS_PER_GROUP_NEW 8 +#define MXACT_MEMBERS_GROUP_SIZE_NEW (MXACT_MEMBERS_PER_GROUP_NEW * (sizeof(TransactionIdNew) + MXACT_MEMBERS_FLAG_BYTES)) +#define MXACT_MEMBER_GROUPS_PER_PAGE_NEW (BLCKSZ / MXACT_MEMBERS_GROUP_SIZE_NEW) + +/* + * Convert pg_multixact/members segments, offsets will start from 1. + */ +void +convert_multixact_members(const char *old_subdir, const char *new_subdir, + MultiXactOffset oldest_mxoff) +{ + MultiXactOffsetNew next_mxoff = (MultiXactOffsetNew) old_cluster.controldata.chkpnt_nxtmxoff; + MultiXactOffsetNew mxoff; + SLRUSegmentState oldseg = { 0 }; + SLRUSegmentState newseg = { 0 }; + char oldbuf[BLCKSZ] = { 0 }; + char newbuf[BLCKSZ] = { 0 }; + int newgroup; + int newmember; + char *newflag = newbuf; + TransactionIdNew *newxid = (TransactionIdNew *)(newflag + MXACT_MEMBERS_FLAG_BYTES * MXACT_MEMBERS_PER_GROUP_NEW); + int newidx; + int oldidx; + + oldseg.dir = psprintf("%s/%s", old_cluster.pgdata, old_subdir); + newseg.dir = psprintf("%s/%s", new_cluster.pgdata, new_subdir); + + prep_status("Converting old %s to new format", old_subdir); + + if (next_mxoff < oldest_mxoff) + next_mxoff += (uint64) 1 << 32; + + /* Initialize old starting position */ + oldidx = oldest_mxoff % MXACT_MEMBERS_PER_PAGE_OLD; + oldseg.pageno = oldest_mxoff / MXACT_MEMBERS_PER_PAGE_OLD; + oldseg.segno = oldseg.pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno %= SLRU_PAGES_PER_SEGMENT_OLD; + + /* Initialize new starting position (skip invalid zero offset) */ + newgroup = 0; + newidx = 1; + newmember = 1; + newflag++; + newxid++; + + /* Iterate through the original directory */ + for (mxoff = oldest_mxoff; mxoff < next_mxoff; oldidx = 0) + { + bool old_is_empty; + int oldlen = read_old_segment_page(&oldseg, oldbuf, &old_is_empty); + int ngroups; + int oldgroup; + int oldmember; + + if (old_is_empty || oldlen <= 0) + { + char pageno_str[32]; + + snprintf(pageno_str, sizeof(pageno_str), UINT64_FORMAT, oldseg.pageno); + pg_fatal("Cannot read page %s from segment: %s\n", + pageno_str, oldseg.fn); + } + + if (oldlen < BLCKSZ) + { + memset(oldbuf + oldlen, 0, BLCKSZ - oldlen); + oldlen = BLCKSZ; + } + + ngroups = oldlen / MXACT_MEMBERS_GROUP_SIZE_OLD; + + /* Iterate through old member groups */ + for (oldgroup = oldidx / MXACT_MEMBERS_PER_GROUP_OLD, + oldmember = oldidx % MXACT_MEMBERS_PER_GROUP_OLD; + oldgroup < ngroups && mxoff < next_mxoff; + oldgroup++, oldmember = 0) + { + char *oldflag = (char *) oldbuf + oldgroup * MXACT_MEMBERS_GROUP_SIZE_OLD; + TransactionIdOld *oldxid = (TransactionIdOld *)(oldflag + MXACT_MEMBER_FLAG_BYTES_PER_GROUP_OLD); + + oldxid += oldmember; + oldflag += oldmember; + + /* Iterate through old members */ + for (int j = 0; + j < MXACT_MEMBERS_PER_GROUP_OLD && mxoff < next_mxoff; + j++) + { + /* Copy member's xid and flags to the new page */ + *newflag++ = *oldflag++; + *newxid++ = (TransactionIdNew) *oldxid++; + + newidx++; + oldidx++; + mxoff++; + + if (++newmember >= MXACT_MEMBERS_PER_GROUP_NEW) + { + /* Start next member group */ + newmember = 0; + + if (++newgroup >= MXACT_MEMBER_GROUPS_PER_PAGE_NEW) + { + /* Write current page and start new */ + newgroup = 0; + newidx = 0; + write_new_segment_page(&newseg, newbuf, false); + memset(newbuf, 0, BLCKSZ); + } + + newflag = (char *) newbuf + newgroup * MXACT_MEMBERS_GROUP_SIZE_NEW; + newxid = (TransactionIdNew *)(newflag + MXACT_MEMBERS_FLAG_BYTES * MXACT_MEMBERS_PER_GROUP_NEW); + } + + /* Handle offset wraparound */ + if (mxoff > MaxMultiXactOffsetOld) + { + Assert(mxoff == (uint64) 1 << 32); + Assert(oldseg.segno == MaxMultiXactOffsetOld / MXACT_MEMBERS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldseg.pageno == MaxMultiXactOffsetOld / MXACT_MEMBERS_PER_PAGE_OLD % SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldmember == MaxMultiXactOffsetOld % MXACT_MEMBERS_PER_PAGE_OLD); + + /* Switch to segment 0000 */ + close_segment(&oldseg); + oldseg.segno = 0; + oldseg.pageno = 0; + + oldidx = 1; /* skip invalid zero mxid offset */ + } + } + } + } + + /* Write last page, unless it is empty */ + if (newflag > (char *) newbuf || oldest_mxoff == next_mxoff) + write_new_segment_page(&newseg, newbuf, false); + + /* Release resources */ + close_segment(&oldseg); + close_segment(&newseg); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); +} diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index d6a318367ab..c6361e3c085 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -24,7 +24,7 @@ standard_initdb() { # without increasing test runtime, run these tests with a custom setting. # Also, specify "-A trust" explicitly to suppress initdb's warning. # --allow-group-access and --wal-segsize have been added in v11. - "$1" -N --wal-segsize 1 --allow-group-access -A trust + "$1" -N --wal-segsize 1 --allow-group-access -A trust -x 21000000000 if [ -n "$TEMP_CONFIG" -a -r "$TEMP_CONFIG" ] then cat "$TEMP_CONFIG" >> "$PGDATA/postgresql.conf" @@ -195,6 +195,12 @@ if "$MAKE" -C "$oldsrc" installcheck-parallel; then fi fi + psql -X -d regression << EOF + CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4)); + INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000); +EOF + psql -X -d regression -c"SELECT relfrozenxid, relminmxid FROM pg_class WHERE relname = 't1';" > "$temp_root"/old_xids.txt + pg_dumpall $extra_dump_options --no-sync \ -f "$temp_root"/dump1.sql || pg_dumpall1_status=$? @@ -255,6 +261,17 @@ esac pg_ctl start -l "$logdir/postmaster2.log" -o "$POSTMASTER_OPTS" -w +psql -X -d regression -c"SELECT relfrozenxid, relminmxid FROM pg_class WHERE relname = 't1';" > "$temp_root"/new_xids.txt + +if diff -u "$temp_root"/new_xids.txt "$temp_root"/old_xids.txt > "$temp_root"/xids.diff; then + rm "$temp_root"/xids.diff + echo "xids are identical, PASSED" +else + echo "Files $temp_root/new_xids.txt and $temp_root/old_xids.txt differ" + echo "See $temp_root/xids.diff" + exit 1 +fi + pg_dumpall $extra_dump_options --no-sync \ -f "$temp_root"/dump2.sql || pg_dumpall2_status=$? pg_ctl -m fast stop diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c index fc20472fe7b..a04b04aa34c 100644 --- a/src/bin/pg_upgrade/util.c +++ b/src/bin/pg_upgrade/util.c @@ -241,3 +241,20 @@ str2uint(const char *str) { return strtoul(str, NULL, 10); } + +/* + * str2uint64() + * + * convert string to 64-bit unsigned int + */ +uint64 +str2uint64(const char *str) +{ +#ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, NULL, 10); +#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, NULL, 10); +#else + return strtoul(str, NULL, 10); +#endif +} diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 0c00bc542c6..105d9b0ebf4 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,6 +9,7 @@ #include "postgres_fe.h" +#include "access/transam.h" #include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" @@ -238,19 +239,21 @@ old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) } /* - * old_9_6_invalidate_hash_indexes() - * 9.6 -> 10 - * Hash index binary format has changed from 9.6->10.0 + * invalidate_indexes() + * Invalidates all indexes satisfying given predicate. */ -void -old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +static void +invalidate_indexes(ClusterInfo *cluster, bool check_mode, + const char *name, const char *pred) { int dbnum; FILE *script = NULL; bool found = false; - char *output_path = "reindex_hash.sql"; + char output_path[MAXPGPATH]; + + snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name); - prep_status("Checking for hash indexes"); + prep_status("Checking for %s indexes", name); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) { @@ -263,18 +266,28 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; PGconn *conn = connectToServer(cluster, active_db->db_name); - /* find hash indexes */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_index i, " - " pg_catalog.pg_am a, " - " pg_catalog.pg_namespace n " - "WHERE i.indexrelid = c.oid AND " - " c.relam = a.oid AND " - " c.relnamespace = n.oid AND " - " a.amname = 'hash'" - ); + + /* + * Find indexes satisfying predicate. + * + * System indexes (with oids < FirstNormalObjectId) are excluded from + * the search as they are recreated in the new cluster during initdb. + */ + res = executeQueryOrDie( + conn, + "SELECT n.nspname, c.relname, i.indexrelid " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_index i, " + " pg_catalog.pg_am a, " + " pg_catalog.pg_namespace n " + "WHERE i.indexrelid = c.oid AND " + " c.relam = a.oid AND " + " c.relnamespace = n.oid AND " + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s " + "ORDER BY i.indexrelid ASC", + FirstNormalObjectId, + pred); ntups = PQntuples(res); i_nspname = PQfnumber(res, "nspname"); @@ -307,17 +320,26 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) if (!check_mode && db_used) { - /* mark hash indexes as invalid */ - PQclear(executeQueryOrDie(conn, - "UPDATE pg_catalog.pg_index i " - "SET indisvalid = false " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_am a, " - " pg_catalog.pg_namespace n " - "WHERE i.indexrelid = c.oid AND " - " c.relam = a.oid AND " - " c.relnamespace = n.oid AND " - " a.amname = 'hash'")); + /* + * Mark indexes satisfying predicate as invalid. + * + * System indexes (with oids < FirstNormalObjectId) are excluded + * from the search (see above). + */ + PQclear(executeQueryOrDie( + conn, + "UPDATE pg_catalog.pg_index i " + "SET indisvalid = false " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_am a, " + " pg_catalog.pg_namespace n " + "WHERE i.indexrelid = c.oid AND " + " c.relam = a.oid AND " + " c.relnamespace = n.oid AND " + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s", + FirstNormalObjectId, + pred)); } PQfinish(conn); @@ -331,24 +353,37 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) report_status(PG_WARNING, "warning"); if (check_mode) pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. After upgrading, you will be given\n" - "REINDEX instructions.\n\n"); + "REINDEX instructions.\n\n", + name); else pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. The file\n" " %s\n" "when executed by psql by the database superuser will recreate all invalid\n" "indexes; until then, none of these indexes will be used.\n\n", + name, output_path); } else check_ok(); } +/* + * old_9_6_invalidate_hash_indexes() + * 9.6 -> 10 + * Hash index binary format has changed from 9.6->10.0 + */ +void +old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'"); +} + /* * old_11_check_for_sql_identifier_data_type_usage() * 11 -> 12 @@ -458,3 +493,36 @@ report_extension_updates(ClusterInfo *cluster) else check_ok(); } + +/* + * invalidate_spgist_indexes() + * 32bit -> 64bit + * SP-GIST contains xids. + */ +void +invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'"); +} + +/* + * invalidate_gin_indexes() + * 32bit -> 64bit + * Gin indexes contains xids in deleted pages. + */ +void +invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'"); +} + +/* + * invalidate_external_indexes() + * Generate script to REINDEX non standard external indexes (like RUM etc) + */ +void +invalidate_external_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "external", + "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')"); +} diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl index 7a8c5d7a040..276d761e695 100644 --- a/src/bin/pg_verifybackup/t/003_corruption.pl +++ b/src/bin/pg_verifybackup/t/003_corruption.pl @@ -176,7 +176,7 @@ sub mutilate_extra_tablespace_file sub mutilate_missing_file { my ($backup_path) = @_; - my $pathname = "$backup_path/pg_xact/0000"; + my $pathname = "$backup_path/pg_xact/0000000048C0"; unlink($pathname) || die "$pathname: $!"; return; } diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f66b5a8dba0..c92d876e5d1 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -490,7 +490,7 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) XLogDumpRecordLen(record, &rec_len, &fpi_len); - printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", + printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: " XID_FMT ", lsn: %X/%08X, prev %X/%08X, ", desc->rm_name, rec_len, XLogRecGetTotalLen(record), XLogRecGetXid(record), @@ -950,7 +950,7 @@ main(int argc, char **argv) } break; case 'x': - if (sscanf(optarg, "%u", &config.filter_by_xid) != 1) + if (sscanf(optarg, XID_FMT, &config.filter_by_xid) != 1) { pg_log_error("could not parse \"%s\" as a transaction ID", optarg); diff --git a/src/include/access/clog.h b/src/include/access/clog.h index 39b8e4afa8a..2c98ad2b906 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -31,7 +31,7 @@ typedef int XidStatus; typedef struct xl_clog_truncate { - int pageno; + int64 pageno; TransactionId oldestXact; Oid oldestXactDb; } xl_clog_truncate; diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h index a1538978c62..43de856904c 100644 --- a/src/include/access/commit_ts.h +++ b/src/include/access/commit_ts.h @@ -60,7 +60,7 @@ typedef struct xl_commit_ts_set typedef struct xl_commit_ts_truncate { - int pageno; + int64 pageno; TransactionId oldestXid; } xl_commit_ts_truncate; diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h index 37d650ac2a0..32e21daa747 100644 --- a/src/include/access/ginblock.h +++ b/src/include/access/ginblock.h @@ -133,8 +133,15 @@ typedef struct GinMetaPageData * We should reclaim deleted page only once every transaction started before * its deletion is over. */ -#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageGetDeleteXid(page) ( \ + (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \ + InvalidTransactionId ) +#define GinPageSetDeleteXid(page, xid) \ + do { \ + ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \ + } while (false) extern bool GinPageIsRecyclable(Page page); /* diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 4b06575d987..79cfb07bd28 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -223,7 +223,7 @@ GistPageGetDeleteXid(Page page) return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; } else - return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + return FullTransactionIdFromXid(FirstNormalTransactionId); } /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index f3fb1e93a59..0fb28841a76 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -146,6 +146,10 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); +extern bool heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi); +extern bool rewrite_page_prepare_for_xid(Page page, TransactionId xid, + bool multi); extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); @@ -164,10 +168,10 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, struct TM_FailureData *tmfd); extern void heap_inplace_update(Relation relation, HeapTuple tuple); -extern bool heap_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi); -extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, +extern bool heap_tuple_needs_freeze(HeapTuple tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); @@ -187,11 +191,13 @@ extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId old_snap_xmin, TimestampTz old_snap_ts_ts, int *nnewlpdead, - OffsetNumber *off_loc); + OffsetNumber *off_loc, + bool repairFragmentation); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused); + OffsetNumber *nowunused, int nunused, + bool repairFragmentation); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/vacuumlazy.c */ @@ -210,7 +216,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple stup, Buffer buffer TransactionId *dead_after); extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); -extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +extern bool HeapTupleIsOnlyLocked(HeapTuple htup); extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index ab9e873bc0d..9d9d53959c8 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -59,6 +59,8 @@ #define XLOG_HEAP2_LOCK_UPDATED 0x60 #define XLOG_HEAP2_NEW_CID 0x70 +#define XLOG_HEAP3_BASE_SHIFT 0x00 + /* * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available. */ @@ -389,7 +391,16 @@ typedef struct xl_heap_rewrite_mapping XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; -extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, +/* shift the base of xids on heap page */ +typedef struct xl_heap_base_shift +{ + int64 delta; /* delta value to shift the base */ + bool multi; /* true to shift multixact base */ +} xl_heap_base_shift; + +#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, multi) + sizeof(bool)) + +extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid); extern void heap_redo(XLogReaderState *record); @@ -399,19 +410,24 @@ extern void heap_mask(char *pagedata, BlockNumber blkno); extern void heap2_redo(XLogReaderState *record); extern void heap2_desc(StringInfo buf, XLogReaderState *record); extern const char *heap2_identify(uint8 info); +extern void heap3_redo(XLogReaderState *record); +extern void heap3_desc(StringInfo buf, XLogReaderState *record); +extern const char *heap3_identify(uint8 info); extern void heap_xlog_logical_rewrite(XLogReaderState *r); extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples); -extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_prepare_freeze_tuple(HeapTuple htup, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen); -extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, +extern void heap_execute_freeze_tuple(HeapTuple tuple, + xl_heap_freeze_tuple *xlrec_tp); +extern void heap_execute_freeze_tuple_page(Page page, HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags); diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h index 8b29f1a9862..db8cbf5bb79 100644 --- a/src/include/access/heaptoast.h +++ b/src/include/access/heaptoast.h @@ -22,7 +22,7 @@ */ #define MaximumBytesPerTuple(tuplesPerPage) \ MAXALIGN_DOWN((BLCKSZ - \ - MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \ / (tuplesPerPage)) /* diff --git a/src/include/access/htup.h b/src/include/access/htup.h index cf0bbd70455..d8812c0efd4 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -54,6 +54,10 @@ typedef MinimalTupleData *MinimalTuple; * this can't be told apart from case #1 by inspection; code setting up * or destroying this representation has to know what it's doing. * + * t_xid_base and t_multi_base are base values for calculation of transaction + * identifiers from t_xmin and t_xmax in heap tuple header. Normally they + * are copied from page header or another tuple. + * * t_len should always be valid, except in the pointer-to-nothing case. * t_self and t_tableOid should be valid if the HeapTupleData points to * a disk buffer, or if it represents a copy of a tuple on disk. They @@ -62,9 +66,11 @@ typedef MinimalTupleData *MinimalTuple; typedef struct HeapTupleData { uint32 t_len; /* length of *t_data */ + TransactionId t_xid_base; /* base value for normal transaction ids */ + TransactionId t_multi_base; /* base value for mutlixact */ ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ -#define FIELDNO_HEAPTUPLEDATA_DATA 3 +#define FIELDNO_HEAPTUPLEDATA_DATA 5 HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; @@ -78,12 +84,12 @@ typedef HeapTupleData *HeapTuple; #define HeapTupleIsValid(tuple) PointerIsValid(tuple) /* HeapTupleHeader functions implemented in utils/time/combocid.c */ -extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); -extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); -extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, +extern CommandId HeapTupleGetCmin(HeapTuple tup); +extern CommandId HeapTupleGetCmax(HeapTuple tup); +extern void HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ -extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple); +extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple); #endif /* HTUP_H */ diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 960772f76b9..519362cf25c 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -120,13 +120,13 @@ typedef struct HeapTupleFields { - TransactionId t_xmin; /* inserting xact ID */ - TransactionId t_xmax; /* deleting or locking xact ID */ + ShortTransactionId t_xmin; /* inserting xact ID */ + ShortTransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ - TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ + ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; @@ -222,7 +222,7 @@ struct HeapTupleHeaderData * HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is * not a multi and the EXCL_LOCK bit is set. * - * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible + * See also HeapTupleIsOnlyLocked, which also checks for a possible * aborted updater transaction. * * Beware of multiple evaluations of the argument. @@ -297,6 +297,63 @@ struct HeapTupleHeaderData * macros evaluate their other argument only once. */ +/* + * Copy base values for xid and multixacts from page to heap tuple. Should be + * called each time tuple is read from page. Otherwise, it would be impossible + * to correctly read tuple xmin and xmax. + */ +#define HeapTupleCopyBaseFromPage(tup, page) \ +{ \ + (tup)->t_xid_base = HeapPageGetSpecial(page)->pd_xid_base; \ + (tup)->t_multi_base = HeapPageGetSpecial(page)->pd_multi_base; \ +} + +/* + * Copy base values for xid and multixacts from one heap tuple to heap tuple. + * Should be called on tuple copy or making desc tuple on the base on src tuple + * saving visibility information. + */ +#define HeapTupleCopyBase(dest, src) \ +{ \ + (dest)->t_xid_base = (src)->t_xid_base; \ + (dest)->t_multi_base = (src)->t_multi_base; \ +} + +/* + * Set base values for tuple xids/multixacts to zero. Used when visibility + * infromation is negligible or will be set later. + */ +#define HeapTupleSetZeroBase(tup) \ +{ \ + (tup)->t_xid_base = InvalidTransactionId; \ + (tup)->t_multi_base = InvalidTransactionId; \ +} + +/* + * Macros for accessing "double xmax". On pg_upgraded instances, it might + * happend that we can't fit new special area to the page. But we still + * might neep to write xmax of tuples for updates and deletes. The trick is + * that we actually don't need xmin field. After pg_upgrade (wich implies + * restart) no insertions went to this page yet (otherwise special area could + * fit). So, if tuple is visible (othewise it would be deleted), then it's + * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use + * both t_xmin and t_xmax to store 64-bit xmax. + * + * See heap_convert.c for details. + */ +#define HeapTupleIsDoubleXmax(tup) \ + ((tup)->t_xid_base == MaxTransactionId) + +#define HeapTupleHeaderGetDoubleXmax(tup) \ + ((TransactionId)(tup)->t_choice.t_heap.t_xmax + \ + ((TransactionId)(tup)->t_choice.t_heap.t_xmin << 32)) + +#define HeapTupleHeaderSetDoubleXmax(tup, xid) \ +do { \ + (tup)->t_choice.t_heap.t_xmax = (TransactionId) (xid) & 0xFFFFFFFF; \ + (tup)->t_choice.t_heap.t_xmin = ((TransactionId) (xid) >> 32) & 0xFFFFFFFF; \ +} while (0) + /* * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid * originally used to insert the tuple. However, the tuple might actually @@ -305,20 +362,29 @@ struct HeapTupleHeaderData * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ -#define HeapTupleHeaderGetRawXmin(tup) \ +#define HeapTupleGetRawXmin(tup) \ ( \ - (tup)->t_choice.t_heap.t_xmin \ + HeapTupleIsDoubleXmax(tup) ? \ + FrozenTransactionId : \ + ShortTransactionIdToNormal((tup)->t_xid_base, (tup)->t_data->t_choice.t_heap.t_xmin) \ ) -#define HeapTupleHeaderGetXmin(tup) \ +#define HeapTupleGetXmin(tup) \ ( \ - HeapTupleHeaderXminFrozen(tup) ? \ - FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \ + HeapTupleHeaderXminFrozen((tup)->t_data) ? \ + FrozenTransactionId : HeapTupleGetRawXmin(tup) \ ) -#define HeapTupleHeaderSetXmin(tup, xid) \ +#define HeapTupleSetXmin(tup, xid) \ ( \ - (tup)->t_choice.t_heap.t_xmin = (xid) \ + AssertMacro(!HeapTupleIsDoubleXmax(tup)), \ + (tup)->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort((tup)->t_xid_base, (xid)) \ +) + +#define HeapTupleHeaderSetXmin(page, tup, xid) \ +( \ + AssertMacro(!HeapPageIsDoubleXmax(page)), \ + (tup)->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (xid)) \ ) #define HeapTupleHeaderXminCommitted(tup) \ @@ -362,30 +428,60 @@ struct HeapTupleHeaderData * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ -#define HeapTupleHeaderGetUpdateXid(tup) \ +#define HeapTupleGetUpdateXidAny(tup) \ ( \ - (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \ - ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \ - !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ + (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \ + !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ HeapTupleGetUpdateXid(tup) \ : \ - HeapTupleHeaderGetRawXmax(tup) \ + HeapTupleGetRawXmax(tup) \ ) -#define HeapTupleHeaderGetRawXmax(tup) \ +#define HeapTupleGetRawXmax(tup) \ ( \ - (tup)->t_choice.t_heap.t_xmax \ + HeapTupleIsDoubleXmax(tup) ? \ + HeapTupleHeaderGetDoubleXmax((tup)->t_data) : \ + ShortTransactionIdToNormal( \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? (tup)->t_multi_base : (tup)->t_xid_base, \ + (tup)->t_data->t_choice.t_heap.t_xmax) \ ) -#define HeapTupleHeaderSetXmax(tup, xid) \ +#define HeapTupleHeaderGetRawXmax(page, tup) \ ( \ - (tup)->t_choice.t_heap.t_xmax = (xid) \ + HeapPageIsDoubleXmax(page) ? \ + HeapTupleHeaderGetDoubleXmax(tup) : \ + ShortTransactionIdToNormal( \ + ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ + (tup)->t_choice.t_heap.t_xmax) \ ) +#define HeapTupleSetXmax(tup, xid) \ +do { \ + if (HeapTupleIsDoubleXmax(tup)) \ + HeapTupleHeaderSetDoubleXmax((tup)->t_data, (xid)); \ + else \ + (tup)->t_data->t_choice.t_heap.t_xmax = \ + NormalTransactionIdToShort( \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? (tup)->t_multi_base : (tup)->t_xid_base, \ + (xid)); \ +} while (0) + +#define HeapTupleHeaderSetXmax(page, tup, xid) \ +do { \ + if (HeapPageIsDoubleXmax(tup)) \ + HeapTupleHeaderSetDoubleXmax((tup), (xid)); \ + else \ + (tup)->t_choice.t_heap.t_xmax = \ + NormalTransactionIdToShort( \ + ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ + (xid)); \ +} while (0) + /* * HeapTupleHeaderGetRawCommandId will give you what's in the header whether - * it is useful or not. Most code should use HeapTupleHeaderGetCmin or - * HeapTupleHeaderGetCmax instead, but note that those Assert that you can + * it is useful or not. Most code should use HeapTupleGetCmin or + * HeapTupleGetCmax instead, but note that those Assert that you can * get a legitimate result, ie you are in the originating transaction! */ #define HeapTupleHeaderGetRawCommandId(tup) \ @@ -556,7 +652,7 @@ do { \ * ItemIds and tuples have different alignment requirements, don't assume that * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page. */ -#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) +#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) #define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader) /* diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4bbb035eaea..0f7597fc755 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -18,16 +18,16 @@ /* * The first two MultiXactId values are reserved to store the truncation Xid - * and epoch of the first segment, so we start assigning multixact values from + * and base of the first segment, so we start assigning multixact values from * 2. */ -#define InvalidMultiXactId ((MultiXactId) 0) -#define FirstMultiXactId ((MultiXactId) 1) -#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF) +#define InvalidMultiXactId UINT64CONST(0) +#define FirstMultiXactId UINT64CONST(1) +#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) -#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) +#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF) /* Number of SLRU buffers to use for multixact */ #define NUM_MULTIXACTOFFSET_BUFFERS 8 @@ -114,9 +114,6 @@ extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly); extern void MultiXactIdSetOldestMember(void); extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids, bool allow_old, bool isLockOnly); -extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2); -extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, - MultiXactId multi2); extern int multixactoffsetssyncfiletag(const FileTag *ftag, char *path); extern int multixactmemberssyncfiletag(const FileTag *ftag, char *path); @@ -146,7 +143,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti, extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); -extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 30a216e4c0d..496b9f39dbb 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -62,8 +62,10 @@ typedef uint16 BTCycleId; typedef struct BTPageOpaqueData { BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ + /* ... or next transaction ID (lower part) */ BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ uint32 btpo_level; /* tree level --- zero for leaf pages */ + /* ... or next transaction ID (lower part) */ uint16 btpo_flags; /* flag bits, see below */ BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ } BTPageOpaqueData; @@ -90,6 +92,14 @@ typedef BTPageOpaqueData *BTPageOpaque; */ #define MAX_BT_CYCLE_ID 0xFF7F +/* Macros for access xact */ +#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \ + (uint64) ((BTPageOpaque) opaque)->btpo_level) +#define BTP_SET_XACT(opaque, xact) \ +do { \ + ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \ + ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \ +} while (0) /* * The Meta page is always the first page in the btree index. diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index 121f5524051..57f5f5db578 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData * 6) xid of the xact performing the mapping * --- */ -#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" +#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x" void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index f582cf535f6..367b8cebf06 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL) +PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask) diff --git a/src/include/access/slru.h b/src/include/access/slru.h index dd52e8cec7e..81466f54d8d 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -31,7 +31,7 @@ * take no explicit notice of that fact in slru.c, except when comparing * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). */ -#define SLRU_PAGES_PER_SEGMENT 32 +#define SLRU_PAGES_PER_SEGMENT 2048 /* * Page status codes. Note that these do not include the "dirty" bit. @@ -64,7 +64,7 @@ typedef struct SlruSharedData char **page_buffer; SlruPageStatus *page_status; bool *page_dirty; - int *page_number; + int64 *page_number; int *page_lru_count; LWLockPadded *buffer_locks; @@ -95,7 +95,7 @@ typedef struct SlruSharedData * this is not critical data, since we use it only to avoid swapping out * the latest page. */ - int latest_page_number; + int64 latest_page_number; /* SLRU's index for statistics purposes (might not be unique) */ int slru_stats_idx; @@ -127,7 +127,7 @@ typedef struct SlruCtlData * the behavior of this callback has no functional implications.) Use * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria. */ - bool (*PagePrecedes) (int, int); + bool (*PagePrecedes) (int64, int64); /* * Dir is set during SimpleLruInit and does not change thereafter. Since @@ -143,10 +143,10 @@ extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler); -extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); -extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, +extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno); +extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid); -extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, +extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied); @@ -155,20 +155,20 @@ extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); #else #define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0) #endif -extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); -extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); +extern void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage); +extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno); -typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, - void *data); +typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int64 segpage, + void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); -extern void SlruDeleteSegment(SlruCtl ctl, int segno); +extern void SlruDeleteSegment(SlruCtl ctl, int64 segno); extern int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, - int segpage, void *data); -extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, + int64 segpage, void *data); +extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data); #endif /* SLRU_H */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 808c144a914..5c251974488 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -118,7 +118,7 @@ typedef enum TM_Result * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the * tuple); otherwise cmax is zero. (We make this restriction because - * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other + * HeapTupleGetCmax doesn't work for tuples outdated in other * transactions.) */ typedef struct TM_FailureData diff --git a/src/include/access/transam.h b/src/include/access/transam.h index d22de19c94c..e0f81e76e3f 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -17,6 +17,10 @@ #include "access/xlogdefs.h" +#ifndef FRONTEND +#include "utils/elog.h" +#endif + /* ---------------- * Special transaction ID values * @@ -28,11 +32,15 @@ * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ -#define InvalidTransactionId ((TransactionId) 0) -#define BootstrapTransactionId ((TransactionId) 1) -#define FrozenTransactionId ((TransactionId) 2) -#define FirstNormalTransactionId ((TransactionId) 3) -#define MaxTransactionId ((TransactionId) 0xFFFFFFFF) +#define InvalidTransactionId UINT64CONST(0) +#define BootstrapTransactionId UINT64CONST(1) +#define FrozenTransactionId UINT64CONST(2) +#define FirstNormalTransactionId UINT64CONST(3) +#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF) +#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF) + +/* First TransactionId after upgrade from 32bit xid */ +#define FirstUpgradedTransactionId ((TransactionId) 1 << 32) /* ---------------- * transaction ID manipulation macros @@ -44,8 +52,37 @@ #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) -#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32)) -#define XidFromFullTransactionId(x) ((uint32) (x).value) +/* + * Convert short xid from/to full xid. Assertion should fail if we full xid + * doesn't fit to xid base. + */ +#define ShortTransactionIdToNormal(base, xid) \ + (TransactionIdIsNormal(xid) ? (TransactionId)(xid) + (base) : (TransactionId)(xid)) + +#ifdef USE_ASSERT_CHECKING +static inline ShortTransactionId +NormalTransactionIdToShort(TransactionId base, TransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (ShortTransactionId)(xid); + +#ifndef FRONTEND + if (xid < base + FirstNormalTransactionId || + xid > base + MaxShortTransactionId) + elog(PANIC, "Xid " XID_FMT " does not fit into valid range for base " XID_FMT, xid, base); +#endif + + return (ShortTransactionId)(xid - base); +} +#else +#define NormalTransactionIdToShort(base, xid) \ + (TransactionIdIsNormal(xid) ? (ShortTransactionId)( \ + AssertMacro((xid) >= (base) + FirstNormalTransactionId), \ + AssertMacro((xid) <= (base) + MaxShortTransactionId), \ + (xid) - (base)) : (ShortTransactionId)(xid)) +#endif + +#define XidFromFullTransactionId(x) ((x).value) #define U64FromFullTransactionId(x) ((x).value) #define FullTransactionIdEquals(a, b) ((a).value == (b).value) #define FullTransactionIdPrecedes(a, b) ((a).value < (b).value) @@ -53,8 +90,8 @@ #define FullTransactionIdFollows(a, b) ((a).value > (b).value) #define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value) #define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x)) -#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId) -#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId) +#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId) +#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId) #define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId) /* @@ -68,11 +105,11 @@ typedef struct FullTransactionId } FullTransactionId; static inline FullTransactionId -FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid) +FullTransactionIdFromXid(TransactionId xid) { FullTransactionId result; - result.value = ((uint64) epoch) << 32 | xid; + result.value = xid; return result; } @@ -91,8 +128,7 @@ FullTransactionIdFromU64(uint64 value) #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ - if ((dest) < FirstNormalTransactionId) \ - (dest) = FirstNormalTransactionId; \ + Assert((dest) > FirstNormalTransactionId); \ } while(0) /* @@ -140,18 +176,19 @@ FullTransactionIdAdvance(FullTransactionId *dest) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ + Assert((dest) > FirstNormalTransactionId); \ (dest)--; \ - } while ((dest) < FirstNormalTransactionId) + } while(0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) < 0) + (int64) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) > 0) + (int64) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. @@ -221,9 +258,6 @@ typedef struct VariableCacheData TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ - TransactionId xidWarnLimit; /* start complaining here */ - TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ - TransactionId xidWrapLimit; /* where the world ends */ Oid oldestXidDB; /* database with minimum datfrozenxid */ /* @@ -277,10 +311,6 @@ extern bool TransactionIdIsKnownCompleted(TransactionId transactionId); extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); -extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); -extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); extern TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids); extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 5546d334ad1..e616588bce2 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -226,7 +226,7 @@ typedef struct xl_xact_xinfo * four so following records don't have to care about alignment. Commit * records can be large, so copying large portions isn't attractive. */ - uint32 xinfo; + uint64 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo @@ -258,7 +258,12 @@ typedef struct xl_xact_invals typedef struct xl_xact_twophase { - TransactionId xid; + /* + * TransactionId is split into 32-bit parts because + * xl_xact_twophase is only int-aligned. + */ + uint32 xid_lo; + uint32 xid_hi; } xl_xact_twophase; typedef struct xl_xact_origin @@ -276,7 +281,7 @@ typedef struct xl_xact_commit /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; @@ -291,7 +296,7 @@ typedef struct xl_xact_abort /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* No invalidation messages needed. */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_abort; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 34f6c89f067..82d701e3169 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -149,6 +149,14 @@ typedef enum RecoveryPauseState extern PGDLLIMPORT int wal_level; +/* + * these parameters specifies starting xid, multixact id and multixact offset + * for testing 64 bit xids + */ +extern TransactionId start_xid; +extern MultiXactId start_mx_id; +extern MultiXactOffset start_mx_offset; + /* Is WAL archiving enabled (always or only while server is running normally)? */ #define XLogArchivingActive() \ (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF) diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index de6fd791fe6..f151554589a 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -327,10 +327,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, #define XLogRecBlockImageApply(decoder, block_id) \ ((decoder)->blocks[block_id].apply_image) -#ifndef FRONTEND -extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); -#endif - extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, diff --git a/src/include/c.h b/src/include/c.h index 98c0b053c98..6a9ab9d3020 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -75,6 +75,10 @@ #include #endif +#if HAVE_INTTYPES_H +#include "inttypes.h" +#endif + /* ---------------------------------------------------------------- * Section 1: compiler characteristics @@ -483,6 +487,16 @@ typedef unsigned long long int uint64; #define INT64_FORMAT "%" INT64_MODIFIER "d" #define UINT64_FORMAT "%" INT64_MODIFIER "u" +/* + * Used to make translatable strings. + * Also on macOS externally defined PRIu64 doesn't match postgres int64 + * definition causing strom of printf warnings. So redefine it anyway. -- sk. + */ +#ifdef PRIu64 +#undef PRIu64 +#endif +#define PRIu64 INT64_MODIFIER "u" + /* * 128-bit signed and unsigned integers * There currently is only limited support for such types. @@ -584,19 +598,41 @@ typedef double float8; typedef Oid regproc; typedef regproc RegProcedure; -typedef uint32 TransactionId; +#define MAX_START_XID UINT64CONST(0x3fffffffffffffff) + +typedef uint64 TransactionId; -typedef uint32 LocalTransactionId; +#define TransactionIdPrecedes(id1, id2) ((id1) < (id2)) +#define TransactionIdPrecedesOrEquals(id1, id2) ((id1) <= (id2)) +#define TransactionIdFollows(id1, id2) ((id1) > (id2)) +#define TransactionIdFollowsOrEquals(id1, id2) ((id1) >= (id2)) -typedef uint32 SubTransactionId; +#define StartTransactionIdIsValid(start_xid) ((start_xid) <= MAX_START_XID) + +typedef uint32 ShortTransactionId; + +typedef uint64 LocalTransactionId; + +typedef uint64 SubTransactionId; #define InvalidSubTransactionId ((SubTransactionId) 0) #define TopSubTransactionId ((SubTransactionId) 1) +#define XID_FMT UINT64_FORMAT + /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; -typedef uint32 MultiXactOffset; +#define MultiXactIdPrecedes(id1, id2) ((id1) < (id2)) +#define MultiXactIdPrecedesOrEquals(id1, id2) ((id1) <= (id2)) +#define MultiXactIdFollows(id1, id2) ((id1) > (id2)) +#define MultiXactIdFollowsOrEquals(id1, id2) ((id1) >= (id2)) + +#define StartMultiXactIdIsValid(start_mx_id) ((start_mx_id) <= MAX_START_XID) + +typedef uint64 MultiXactOffset; + +#define StartMultiXactOffsetIsValid(start_mx_offset) ((start_mx_offset) <= MAX_START_XID) typedef uint32 CommandId; diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 5460aa24222..520a848b999 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -403,9 +403,9 @@ amprocrighttype => 'bytea', amprocnum => '2', amproc => 'hashvarlenaextended' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint8extended' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 749bce0cc6f..99feb7ffb6b 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -249,4 +249,10 @@ typedef struct ControlFileData */ #define PG_CONTROL_FILE_SIZE 8192 +#define CONTROLFILE_GET_OLDEDITION(control) \ + ((control)->pg_old_version >> 16) + +#define CONTROLFILE_SET_OLDEDITION(control, v) \ + (control)->pg_old_version = ((v) << 16) + #endif /* PG_CONTROL_H */ diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 2bc7cc35484..daff1ed4ff4 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -183,16 +183,16 @@ oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)', oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '353', descr => 'equal', - oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel', + oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '3315', descr => 'not equal', oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool', oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '3316', descr => 'not equal', - oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel', + oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '5068', descr => 'equal', oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 4d992dc2241..0487695b278 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2360,10 +2360,10 @@ { oid => '1181', descr => 'age of a transaction ID, in transactions before current transaction', proname => 'age', provolatile => 's', proparallel => 'r', - prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' }, + prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' }, { oid => '3939', descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction', - proname => 'mxid_age', provolatile => 's', prorettype => 'int4', + proname => 'mxid_age', provolatile => 's', prorettype => 'int8', proargtypes => 'xid', prosrc => 'mxid_age' }, { oid => '1188', @@ -2698,11 +2698,11 @@ prosrc => 'bpcharlen' }, { oid => '1319', - proname => 'xideqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xideq' }, + proname => 'xideqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xideq' }, { oid => '3309', - proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xidneq' }, + proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xidneq' }, { oid => '1326', proname => 'interval_div', prorettype => 'interval', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index f3d94f3cf5d..b649ee9fb96 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -95,9 +95,9 @@ typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv', typsend => 'tidsend', typalign => 's' }, { oid => '28', array_type_oid => '1011', descr => 'transaction id', - typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U', + typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U', typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv', - typsend => 'xidsend', typalign => 'i' }, + typsend => 'xidsend', typalign => 'd' }, { oid => '29', array_type_oid => '1012', descr => 'command identifier type, sequence in transaction id', typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U', diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 5a36049be6f..713b4220260 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -213,12 +213,12 @@ typedef enum VacOptValue */ typedef struct VacuumParams { - bits32 options; /* bitmask of VACOPT_* */ - int freeze_min_age; /* min freeze age, -1 to use default */ - int freeze_table_age; /* age at which to scan whole table */ - int multixact_freeze_min_age; /* min multixact freeze age, -1 to + bits32 options; /* bitmask of VacuumOption */ + int64 freeze_min_age; /* min freeze age, -1 to use default */ + int64 freeze_table_age; /* age at which to scan whole table */ + int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ - int multixact_freeze_table_age; /* multixact age at which to scan + int64 multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at @@ -252,12 +252,12 @@ typedef struct VacDeadItems /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ -extern int vacuum_freeze_min_age; -extern int vacuum_freeze_table_age; -extern int vacuum_multixact_freeze_min_age; -extern int vacuum_multixact_freeze_table_age; -extern int vacuum_failsafe_age; -extern int vacuum_multixact_failsafe_age; +extern int64 vacuum_freeze_min_age; +extern int64 vacuum_freeze_table_age; +extern int64 vacuum_multixact_freeze_min_age; +extern int64 vacuum_multixact_freeze_table_age; +extern int64 vacuum_failsafe_age; +extern int64 vacuum_multixact_failsafe_age; /* Variables for cost-based parallel vacuum */ extern pg_atomic_uint32 *VacuumSharedCostBalance; @@ -285,9 +285,9 @@ extern void vac_update_relstats(Relation relation, MultiXactId minmulti, bool in_outer_xact); extern void vacuum_set_xid_limits(Relation rel, - int freeze_min_age, int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, + int64 freeze_min_age, int64 freeze_table_age, + int64 multixact_freeze_min_age, + int64 multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, diff --git a/src/include/fmgr.h b/src/include/fmgr.h index cec663bdff0..61ea0381203 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n)) #define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n)) #define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n)) +#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) /* use this if you want the raw, possibly-toasted input datum: */ #define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n)) /* use this if you want the input datum de-toasted: */ @@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_RETURN_FLOAT8(x) return Float8GetDatum(x) #define PG_RETURN_INT64(x) return Int64GetDatum(x) #define PG_RETURN_UINT64(x) return UInt64GetDatum(x) +#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) /* RETURN macros for other pass-by-ref types will typically look like this: */ #define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x) #define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 7525c165974..05176b563cc 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -962,6 +962,9 @@ # endif #endif +/* Postgres Pro use 64bit xids */ +#undef XID_IS_64BIT + /* Size of a WAL file block. This need have no particular relation to BLCKSZ. XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O, XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O diff --git a/src/include/postgres.h b/src/include/postgres.h index 0446daa0e61..501a98aa116 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -555,21 +555,21 @@ typedef struct NullableDatum * Returns transaction identifier value of a datum. */ -#define DatumGetTransactionId(X) ((TransactionId) (X)) +#define DatumGetTransactionId(X) (DatumGetUInt64(X)) /* * TransactionIdGetDatum * Returns datum representation for a transaction identifier. */ -#define TransactionIdGetDatum(X) ((Datum) (X)) +#define TransactionIdGetDatum(X) (UInt64GetDatum(X)) /* * MultiXactIdGetDatum * Returns datum representation for a multixact identifier. */ -#define MultiXactIdGetDatum(X) ((Datum) (X)) +#define MultiXactIdGetDatum(X) (UInt64GetDatum(X)) /* * DatumGetCommandId diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index aacdd0f5753..2ef445a57f9 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -37,8 +37,8 @@ extern int autovacuum_vac_ins_thresh; extern double autovacuum_vac_ins_scale; extern int autovacuum_anl_thresh; extern double autovacuum_anl_scale; -extern int autovacuum_freeze_max_age; -extern int autovacuum_multixact_freeze_max_age; +extern int64 autovacuum_freeze_max_age; +extern int64 autovacuum_multixact_freeze_max_age; extern double autovacuum_vac_cost_delay; extern int autovacuum_vac_cost_limit; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index cfce23ecbc8..3c81e4430f0 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -245,6 +245,8 @@ extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation); extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); extern void FreeAccessStrategy(BufferAccessStrategy strategy); +/* old tuple format support */ +extern void convert_page(Relation rel, Page orig_page, Buffer buf, BlockNumber blkno); /* inline functions */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index c86ccdaf608..739f19f5f11 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -159,12 +159,84 @@ typedef struct PageHeaderData LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; - TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ + ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; + +/* + * HeapPageSpecialData -- data that stored at the end of each heap page. + * + * pd_xid_base - base value for transaction IDs on page + * pd_multi_base - base value for multixact IDs on page + * + * pd_xid_base and pd_multi_base are base values for calculation of transaction + * identifiers from t_xmin and t_xmax in each heap tuple header on the page. + */ +typedef struct HeapPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ + TransactionId pd_multi_base; /* base value for multixact IDs on page */ +} HeapPageSpecialData; + +#define SizeOfPageSpecial MAXALIGN(sizeof(HeapPageSpecialData)) + +typedef HeapPageSpecialData *HeapPageSpecial; + +extern PGDLLIMPORT HeapPageSpecial doubleXmaxSpecial; + +/* + * Get pointer to HeapPageSpecialData without using pd_special of the page + * (for the sake of speed) assuming all heap pages have same size of special + * data. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ. See comment in bufpage.c + * for details. + */ +#define HeapPageGetSpecial(page) ( \ + (((PageHeader) (page))->pd_special == BLCKSZ) ? \ + ((HeapPageSpecial) doubleXmaxSpecial) : \ + (AssertMacro(((PageHeader) (page))->pd_special == BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))), \ + (HeapPageSpecial) ((Pointer) (page) + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)))) \ +) + +/* + * Version of HeapPageGetSpecial() without assertions about pd_special. Used + * for non-consistent reads from non-locked pages. + */ +#define HeapPageGetSpecialNoAssert(page) ( \ + (((PageHeader) (page))->pd_special == BLCKSZ) ? \ + ((HeapPageSpecial) doubleXmaxSpecial) : \ + (HeapPageSpecial) ((Pointer) (page) + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))) \ +) + +ShortTransactionId HeapPageSetPruneXidInternal(Page page, TransactionId xid); + +#define HeapPageSetPruneXid(page, xid) \ + HeapPageSetPruneXidInternal((Page)(page), xid) + +#define HeapPageGetPruneXid(page) \ +( \ + ShortTransactionIdToNormal(HeapPageGetSpecial(page)->pd_xid_base, ((PageHeader) (page))->pd_prune_xid) \ +) + +/* + * Read pd_prune_xid from non-locked page. May return invalid value, but doen't + * causes assert failures. + */ +#define HeapPageGetPruneXidNoAssert(page) \ +( \ + ShortTransactionIdToNormal(HeapPageGetSpecialNoAssert(page)->pd_xid_base, ((PageHeader) (page))->pd_prune_xid) \ +) + +#define XidFitsPage(page, xid) \ +( \ + (xid) >= HeapPageGetSpecial(page)->pd_xid_base + FirstNormalTransactionId && \ + (xid) <= HeapPageGetSpecial(page)->pd_xid_base + MaxShortTransactionId \ +) + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. @@ -192,11 +264,13 @@ typedef PageHeaderData *PageHeader; * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and * added the pd_flags field (by stealing some bits from pd_tli), * as well as adding the pd_prune_xid field (which enlarges the header). + * PgPro Enterprise 10 uses version number (0x00FF - 1), and should not + * collide with vanilla versions due to page conversion after pg_upgrade. * * As of Release 9.3, the checksum version must also be considered when * handling pages. */ -#define PG_PAGE_LAYOUT_VERSION 4 +#define PG_PAGE_LAYOUT_VERSION 5 #define PG_DATA_CHECKSUM_VERSION 1 /* ---------------------------------------------------------------- @@ -389,16 +463,18 @@ PageValidateSpecialPointer(Page page) #define PageClearAllVisible(page) \ (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE) +/* Check if page is in "double xmax" format */ +#define HeapPageIsDoubleXmax(page) \ + (((PageHeader) (page))->pd_special == BLCKSZ) + #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ - if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ - TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ - ((PageHeader) (page))->pd_prune_xid = (xid); \ + if (!HeapPageIsDoubleXmax(page) && \ + (!TransactionIdIsValid(HeapPageGetPruneXid(page)) || \ + TransactionIdPrecedes(xid, HeapPageGetPruneXid(page)))) \ + HeapPageSetPruneXid(page, xid); \ } while (0) -#define PageClearPrunable(page) \ - (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) - /* ---------------------------------------------------------------- * extern declarations @@ -432,6 +508,19 @@ do { \ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); +/* + * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + */ +typedef struct itemIdCompactData +{ + uint16 offsetindex; /* linp array index */ + int16 itemoff; /* page offset of item data */ + uint16 alignedlen; /* MAXALIGN(item data len) */ +} itemIdCompactData; +typedef itemIdCompactData *itemIdCompact; + +extern int itemoffcompare(const void *item1, const void *item2); + extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h index d1f0e382485..0ecdc1150ce 100644 --- a/src/include/storage/itemid.h +++ b/src/include/storage/itemid.h @@ -78,6 +78,8 @@ typedef uint16 ItemLength; #define ItemIdGetRedirect(itemId) \ ((itemId)->lp_off) +#define ItemIdGetTupleEnd(itemId) \ + (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId))) /* * ItemIdIsValid * True iff item identifier is valid. diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index a5286fab893..e2ddac783b0 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -227,8 +227,8 @@ typedef struct LOCKTAG /* ID info for a transaction is its TransactionId */ #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ @@ -237,8 +237,8 @@ typedef struct LOCKTAG /* ID info for a virtual transaction is its VirtualTransactionId */ #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).backendId, \ - (locktag).locktag_field2 = (vxid).localTransactionId, \ - (locktag).locktag_field3 = 0, \ + (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \ + (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) @@ -248,9 +248,9 @@ typedef struct LOCKTAG * its speculative insert counter. */ #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = (token), \ - (locktag).locktag_field3 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ + (locktag).locktag_field3 = (token), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 44b477f49d7..a7a6046f8b0 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -238,7 +238,7 @@ struct PGPROC TransactionId clogGroupMemberXid; /* transaction id of clog group member */ XidStatus clogGroupMemberXidStatus; /* transaction status of clog * group member */ - int clogGroupMemberPage; /* clog page corresponding to + int64 clogGroupMemberPage; /* clog page corresponding to * transaction id of clog group member */ XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog * group member */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 38fd85a4316..e7fdab4889d 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -21,7 +21,7 @@ #include "storage/standbydefs.h" /* User-settable GUC parameters */ -extern int vacuum_defer_cleanup_age; +extern int64 vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; extern bool log_recovery_conflict_waits; diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h index 6fd50cfa7b7..5696a85e404 100644 --- a/src/include/storage/sync.h +++ b/src/include/storage/sync.h @@ -52,7 +52,7 @@ typedef struct FileTag int16 handler; /* SyncRequestHandler value, saving space */ int16 forknum; /* ForkNumber, saving space */ RelFileNode rnode; - uint32 segno; + uint64 segno; } FileTag; extern void InitSync(void); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index b07eefaf1ed..9f62f24f5da 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -53,6 +53,7 @@ extern int pg_ltoa(int32 l, char *a); extern int pg_lltoa(int64 ll, char *a); extern char *pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth); extern char *pg_ultostr(char *str, uint32 value); +extern uint64 pg_strtouint64(const char *s, char **endptr, int base); /* oid.c */ extern oidvector *buildoidvector(const Oid *oids, int n); diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h index 7f2e1f7019d..ef18d5b15d8 100644 --- a/src/include/utils/combocid.h +++ b/src/include/utils/combocid.h @@ -15,7 +15,7 @@ #define COMBOCID_H /* - * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes + * HeapTupleGetCmin and HeapTupleGetCmax function prototypes * are in access/htup.h, because that's where the macro definitions that * those functions replaced used to be. */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 31281279cf9..3a534b24930 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -295,12 +295,12 @@ typedef struct AutoVacOpts int vacuum_ins_threshold; int analyze_threshold; int vacuum_cost_limit; - int freeze_min_age; - int freeze_max_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_max_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_max_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_max_age; + int64 multixact_freeze_table_age; int log_min_duration; float8 vacuum_cost_delay; float8 vacuum_scale_factor; diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 1c77211ac45..d0113fb8972 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -2659,7 +2659,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup) * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ - uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) @@ -2783,7 +2783,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger) MemoryContextSetIdentifier(proc_cxt, prodesc->proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index f5b1d5c4fac..a2f3ef36bb8 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -171,7 +171,7 @@ recheck: if (function) { /* We have a compiled function, but is it still valid? */ - if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (function->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else @@ -348,7 +348,7 @@ do_compile(FunctionCallInfo fcinfo, function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_xmin = HeapTupleGetRawXmin(procTup); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 6dbfdb7be03..d2ac94038f7 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -7349,6 +7349,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup) tmptup->t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup->t_self)); tmptup->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(tmptup); tmptup->t_data = td; /* Extract rowtype info and find a tupdesc */ @@ -7523,6 +7524,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = td; /* Extract rowtype info */ diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c index 494f109b323..9884f74fa78 100644 --- a/src/pl/plpython/plpy_procedure.c +++ b/src/pl/plpython/plpy_procedure.c @@ -178,7 +178,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) proc->proname = pstrdup(NameStr(procStruct->proname)); MemoryContextSetIdentifier(cxt, proc->proname); proc->pyname = pstrdup(procName); - proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + proc->fn_xmin = HeapTupleGetRawXmin(procTup); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; @@ -419,7 +419,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup) return false; /* If the pg_proc tuple has changed, it's not valid */ - if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c index 7c045f45607..52361811dcd 100644 --- a/src/pl/tcl/pltcl.c +++ b/src/pl/tcl/pltcl.c @@ -1426,7 +1426,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, * function's pg_proc entry without changing its OID. ************************************************************/ if (prodesc != NULL && - prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ @@ -1493,7 +1493,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/test/Makefile b/src/test/Makefile index 46275915ff3..ec6558e33ba 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,8 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules authentication recovery subscription +SUBDIRS = perl regress isolation modules authentication recovery subscription \ + xid-64 # Test suites that are not safe by default but can be run if selected # by the user via the whitespace-separated list in variable diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index c061e850fb0..0a9c114f772 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -431,6 +431,7 @@ sub init mkdir $self->archive_dir; PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A', 'trust', '-N', + '-x', '1249835483136', '-m', '2422361554944', '-o', '3594887626752', @{ $params{extra} }); PostgreSQL::Test::Utils::system_or_bail($ENV{PG_REGRESS}, '--config-auth', $pgdata, @{ $params{auth_extra} }); diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 0d0636b85c0..e01e67b572e 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1); # Bump the transaction ID epoch. This is useful to stress the portability # of recovery_target_xid parsing. -system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir); +system_or_bail('pg_resetwal', $node_primary->data_dir); # Start it $node_primary->start; diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out index ab1fa5e707c..df2c354364a 100644 --- a/src/test/regress/expected/indirect_toast.out +++ b/src/test/regress/expected/indirect_toast.out @@ -153,6 +153,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; ("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 (5 rows) +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; +insert into toasttest_main (select random_string(len) from generate_series(8000,9000) len); DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index 5063a3dc221..fb7a1628332 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; -- -- check indirection (field/array assignment), cf bug #14265 @@ -1002,3 +1002,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest; (1 row) drop table returningwrtest; +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 562b586d8e0..0c47f94ee0b 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND ORDER BY 1, 2; proargtypes | proargtypes -----------------------------+-------------------------- - integer | xid + bigint | xid timestamp without time zone | timestamp with time zone bit | bit varying txid_snapshot | pg_snapshot @@ -704,7 +704,7 @@ int8(oid) tideq(tid,tid) timestamptz_cmp(timestamp with time zone,timestamp with time zone) interval_cmp(interval,interval) -xideqint4(xid,integer) +xideqint8(xid,bigint) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) @@ -818,7 +818,7 @@ pg_lsn_gt(pg_lsn,pg_lsn) pg_lsn_ne(pg_lsn,pg_lsn) pg_lsn_cmp(pg_lsn,pg_lsn) xidneq(xid,xid) -xidneqint4(xid,integer) +xidneqint8(xid,bigint) sha224(bytea) sha256(bytea) sha384(bytea) diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 1aeed8452bd..d3be84754c1 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -2,9 +2,22 @@ -- SELECT_VIEWS -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C"; name | thepath | cname ------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------- + 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland + 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland + 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette + 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley + 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland + 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette + 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley + 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley + 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland + 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland + 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland + 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland + 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland @@ -22,8 +35,8 @@ SELECT * FROM street; Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland - Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley + Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley @@ -37,14 +50,14 @@ SELECT * FROM street; Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley + Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland - Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland - California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley California St | [(-122.2032,37.005),(-122.2016,37.996)] | Lafayette + California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley Cameron Ave | [(-122.1316,37.502),(-122.1327,37.481)] | Oakland Campus Dr | [(-122.1704,37.905),(-122.1678,37.868),(-122.1671,37.865)] | Berkeley Capricorn Ave | [(-122.2176,37.404),(-122.2164,37.384)] | Lafayette @@ -55,8 +68,8 @@ SELECT * FROM street; Central Ave | [(-122.2343,37.602),(-122.2331,37.595)] | Berkeley Chambers Dr | [(-122.2004,37.352),(-122.1972,37.368)] | Lafayette Chambers Lane | [(-122.2001,37.359),(-122.1975,37.371)] | Lafayette - Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Lafayette + Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland @@ -77,9 +90,9 @@ SELECT * FROM street; Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland + Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland - Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette @@ -117,12 +130,12 @@ SELECT * FROM street; I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland - I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley + I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley - I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette + I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland I- 580 Ramp | [(-121.8743,37.014),(-121.8722,37.999),(-121.8714,37.999)] | Oakland @@ -136,8 +149,8 @@ SELECT * FROM street; I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley - I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley + I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland @@ -158,16 +171,16 @@ SELECT * FROM street; I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland - I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley - I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland + I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley + I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley - I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley + I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland @@ -202,28 +215,28 @@ SELECT * FROM street; Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Berkeley Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Lafayette Lakehurst Cir | [(-122.284729,37.89025),(-122.286096,37.90364)] | Berkeley - Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Lafayette + Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland - Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Lafayette + Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland - Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley + Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland - Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley - McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland + Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley + McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley @@ -242,8 +255,8 @@ SELECT * FROM street; Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland - Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley + Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland @@ -254,11 +267,11 @@ SELECT * FROM street; Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland - Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley + Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley - Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley + Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley @@ -266,8 +279,8 @@ SELECT * FROM street; Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland - Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley + Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley @@ -317,27 +330,14 @@ SELECT * FROM street; Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland - Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley + Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley - 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland - 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland - 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette - 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley - 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland - 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette - 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley - 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley - 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland - 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland - 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland - 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland - 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette (333 rows) SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out index 95ba66e95ee..2ea4434f513 100644 --- a/src/test/regress/expected/txid.out +++ b/src/test/regress/expected/txid.out @@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3'; (1 row) SELECT txid_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT txid_snapshot '1:9223372036854775808:3'; - ^ + txid_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test txid_current_if_assigned BEGIN; SELECT txid_current_if_assigned() IS NULL; diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out index b7a1ed0f9ee..a26aa0c47bc 100644 --- a/src/test/regress/expected/xid.out +++ b/src/test/regress/expected/xid.out @@ -8,9 +8,9 @@ select '010'::xid, '42'::xid8, '0xffffffffffffffff'::xid8, '-1'::xid8; - xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 ------+-----+------------+------------+------+------+----------------------+---------------------- - 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615 + xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 +-----+-----+------------+----------------------+------+------+----------------------+---------------------- + 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615 (1 row) -- garbage values are not yet rejected (perhaps they should be) @@ -373,9 +373,11 @@ SELECT pg_snapshot '1:9223372036854775807:3'; (1 row) SELECT pg_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT pg_snapshot '1:9223372036854775808:3'; - ^ + pg_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test pg_current_xact_id_if_assigned BEGIN; SELECT pg_current_xact_id_if_assigned() IS NULL; diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index 589357ba59c..efec897db41 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -2292,7 +2292,7 @@ regression_main(int argc, char *argv[], /* initdb */ header(_("initializing database system")); snprintf(buf, sizeof(buf), - "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1", + "\"%s%sinitdb\" -D \"%s/data\" -x 1249835483136 -m 2422361554944 -o 3594887626752 --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1", bindir ? bindir : "", bindir ? "/" : "", temp_instance, diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 351d79e1f09..c561c3d67df 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -561,6 +561,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql index 9156a44b7d9..c99c944cadb 100644 --- a/src/test/regress/sql/indirect_toast.sql +++ b/src/test/regress/sql/indirect_toast.sql @@ -65,7 +65,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; VACUUM FREEZE indtoasttest; SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; + +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; + +insert into toasttest_main (select random_string(len) from generate_series(8000,9000) len); + DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index bfaa8a3b277..589a5dcdc67 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; @@ -624,3 +624,18 @@ alter table returningwrtest2 drop c; alter table returningwrtest attach partition returningwrtest2 for values in (2); insert into returningwrtest values (2, 'foo') returning returningwrtest; drop table returningwrtest; + +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql index e742f136990..70e663e350c 100644 --- a/src/test/regress/sql/select_views.sql +++ b/src/test/regress/sql/select_views.sql @@ -3,7 +3,7 @@ -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C"; SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile new file mode 100644 index 00000000000..3b1e50dfc0d --- /dev/null +++ b/src/test/xid-64/Makefile @@ -0,0 +1,22 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/xid-64 +# +# Copyright (c) 2018, Postgres Professional +# +# src/test/xid-64/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/xid-64 +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/xid-64/README b/src/test/xid-64/README new file mode 100644 index 00000000000..01c0a1a1f74 --- /dev/null +++ b/src/test/xid-64/README @@ -0,0 +1,16 @@ +src/test/xid-64/README + +Regression tests for 64-bit XIDs +============================================= + +This directory contains a test suite for 64-bit xids. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/xid-64/t/001_test_large_xids.pl b/src/test/xid-64/t/001_test_large_xids.pl new file mode 100644 index 00000000000..6a039d60d08 --- /dev/null +++ b/src/test/xid-64/t/001_test_large_xids.pl @@ -0,0 +1,53 @@ +# Tests for large xid values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 9; + +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node with the random xid-related parameters +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +$node->start; + +# Initialize master node and check the xid-related parameters +my $pgcd_output = command_output( + [ 'pg_controldata', '-D', $node->data_dir ] ); +print($pgcd_output); print('\n'); +ok($pgcd_output =~ qr/Latest checkpoint's NextXID:\s*(\d+)/, "XID found"); +my ($nextxid) = ($1); +ok($nextxid >= $ixid && $nextxid < $ixid + 1000, + "Latest checkpoint's NextXID ($nextxid) is close to the initial xid ($ixid)."); +ok($pgcd_output =~ qr/Latest checkpoint's NextMultiXactId:\s*(\d+)/, "MultiXactId found"); +my ($nextmxid) = ($1); +ok($nextmxid >= $imxid && $nextmxid < $imxid + 1000, + "Latest checkpoint's NextMultiXactId ($nextmxid) is close to the initial multiXactId ($imxid)."); +ok($pgcd_output =~ qr/Latest checkpoint's NextMultiOffset:\s*(\d+)/, "MultiOffset found"); +my ($nextmoff) = ($1); +ok($nextmoff >= $imoff && $nextmoff < $imoff + 1000, + "Latest checkpoint's NextMultiOffset ($nextmoff) is close to the initial multiOffset ($imoff)."); + +# Run pgbench to check whether the database is working properly +$node->command_ok( + [ qw(pgbench --initialize --no-vacuum --scale=10) ], + 'pgbench finished without errors'); diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl new file mode 100644 index 00000000000..2b76374f012 --- /dev/null +++ b/src/test/xid-64/t/002_test_gucs.pl @@ -0,0 +1,78 @@ +# Tests for guc boundary values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 19; + +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +sub set_guc +{ + my ($node, $guc, $val) = @_; + print("SET $guc = $val\n"); + $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val"); + $node->restart(); +} + +sub test_pgbench +{ + my ($node) = @_; + $node->command_ok( + [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ], + 'pgbench finished without errors'); +} + +my @guc_vals = ( + [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ], + [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ], + [ "vacuum_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_freeze_table_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ] +); + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +# Disable logging of all statements to avoid log bloat during pgbench +$node->append_conf('postgresql.conf', "log_statement = none"); +$node->start; + +# Fill the test database with the pgbench data +$node->command_ok( + [ qw(pgbench --initialize --scale=10) ], + 'pgbench finished without errors'); + +# Test all GUCs with minimum, maximum and random value inbetween +# (run pgbench for every configuration setting) +foreach my $gi (0 .. $#guc_vals) { + print($guc_vals[$gi][0]); print("\n"); + my $guc = $guc_vals[$gi][0]; + my $minval = $guc_vals[$gi][1]; + my $maxval = $guc_vals[$gi][2]; + set_guc($node, $guc, $minval); + test_pgbench($node); + set_guc($node, $guc, $maxval); + test_pgbench($node); + set_guc($node, $guc, $minval + int(rand($maxval - $minval))); + test_pgbench($node); +} diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl new file mode 100644 index 00000000000..d984907aee8 --- /dev/null +++ b/src/test/xid-64/t/003_test_integrity.pl @@ -0,0 +1,56 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 5; +use File::Compare; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; +use bigint; + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(); +$node->start; + +# Create a database and fill it with the pgbench data +$node->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +$node->command_ok( + [ qw(pgbench --initialize --scale=2 pgbench_db) ], + 'pgbench finished without errors'); +# Dump the database (cluster the main table to put data in a determined order) +$node->safe_psql('pgbench_db', qq( + CREATE INDEX pa_aid_idx ON pgbench_accounts (aid); + CLUSTER pgbench_accounts USING pa_aid_idx)); +$node->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench.sql", "pgbench_db" ], + 'pgdump finished without errors'); +$node->stop('fast'); + +# Initialize second node +my $node2 = PostgreSQL::Test::Cluster->new('master2'); +$node2->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +# Disable logging of all statements to avoid log bloat during restore +$node2->append_conf('postgresql.conf', "log_statement = none"); +$node2->start; + +# Create a database and restore the previous dump +$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()'); +print("# Initial txid_current: $txid0\n"); +$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]); + +# Dump the database and compare the dumped content with the previous one +$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts'); +$node2->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench2.sql", "pgbench_db" ], + 'pgdump finished without errors'); +ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected"); diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index 2c8cd521e94..f501263d5b5 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -459,6 +459,7 @@ sub GenerateFiles PACKAGE_TARNAME => lc qq{"$package_name"}, PACKAGE_URL => qq{"$package_url"}, PACKAGE_VERSION => qq{"$package_version"}, + XID_IS_64BIT => 1, PG_INT128_TYPE => undef, PG_INT64_TYPE => 'long long int', PG_KRB_SRVNAM => qq{"postgres"}, -- 2.24.3 (Apple Git-128)