From 0c9cff6c06e5f9b719669811c44b886e740a55aa Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Mon, 14 Nov 2022 21:39:14 +0300 Subject: [PATCH v1] Operation log --- src/backend/access/transam/xlog.c | 9 + src/backend/utils/misc/pg_controldata.c | 114 ++++ src/bin/pg_resetwal/pg_resetwal.c | 3 + src/bin/pg_rewind/pg_rewind.c | 5 + src/bin/pg_upgrade/controldata.c | 53 ++ src/bin/pg_upgrade/exec.c | 9 +- src/bin/pg_upgrade/pg_upgrade.c | 2 + src/bin/pg_upgrade/pg_upgrade.h | 2 + src/common/controldata_utils.c | 554 ++++++++++++++++-- src/include/catalog/pg_control.h | 124 ++++ src/include/catalog/pg_proc.dat | 9 + src/include/common/controldata_utils.h | 12 + .../modules/test_misc/t/004_operation_log.pl | 109 ++++ 13 files changed, 964 insertions(+), 41 deletions(-) create mode 100644 src/test/modules/test_misc/t/004_operation_log.pl diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a31fbbff78..f671692c83 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4783,6 +4783,9 @@ BootStrapXLOG(void) /* some additional ControlFile fields are set in WriteControlFile() */ WriteControlFile(); + /* Put information into operation log. */ + put_operation_log_element(DataDir, OLT_BOOTSTRAP); + /* Bootstrap the commit log, too */ BootStrapCLOG(); BootStrapCommitTs(); @@ -5749,6 +5752,12 @@ StartupXLOG(void) SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); + + /* Put information into operation log. */ + if (promoted) + put_operation_log_element(DataDir, OLT_PROMOTED); + put_operation_log_element(DataDir, OLT_STARTUP); + LWLockRelease(ControlFileLock); /* diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 781f8b8758..ef95c6360b 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -342,3 +342,117 @@ pg_control_init(PG_FUNCTION_ARGS) PG_RETURN_DATUM(HeapTupleGetDatum(htup)); } + +/* + * pg_operation_log + * + * Returns list of operation log data. + * NOTE: this is a set-returning-function (SRF). + */ +Datum +pg_operation_log(PG_FUNCTION_ARGS) +{ +#define PG_OPERATION_LOG_COLS 6 + FuncCallContext *funcctx; + OperationLogBuffer *log_buffer; + + /* + * Initialize tuple descriptor & function call context. + */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcxt; + TupleDesc tupdesc; + bool crc_ok; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + oldcxt = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* read the control file */ + log_buffer = get_controlfile_log(DataDir, &crc_ok); + if (!crc_ok) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); + + tupdesc = CreateTemplateTupleDesc(PG_OPERATION_LOG_COLS); + + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "event", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "edition", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "version", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "lsn", + PG_LSNOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "last", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "count", + INT4OID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* The only state we need is the operation log buffer. */ + funcctx->user_fctx = (void *) log_buffer; + + MemoryContextSwitchTo(oldcxt); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + log_buffer = (OperationLogBuffer *) funcctx->user_fctx; + + if (funcctx->call_cntr < get_operation_log_count(log_buffer)) + { + Datum result; + Datum values[PG_OPERATION_LOG_COLS]; + bool nulls[PG_OPERATION_LOG_COLS]; + HeapTuple tuple; + OperationLogData *data = get_operation_log_element(log_buffer, (uint16) funcctx->call_cntr); + int major_version, + minor_version, + patch_version; + + /* + * Form tuple with appropriate data. + */ + MemSet(nulls, 0, sizeof(nulls)); + MemSet(values, 0, sizeof(values)); + + /* event */ + values[0] = CStringGetTextDatum(get_operation_log_type_name(data->ol_type)); + + /* edition */ + values[1] = CStringGetTextDatum(get_str_edition(data->ol_edition)); + + /* version */ + patch_version = data->ol_version % 100; + minor_version = (data->ol_version / 100) % 100; + major_version = data->ol_version / 10000; + if (major_version < 1000) + values[2] = CStringGetTextDatum(psprintf("%u.%u.%u.%u", major_version / 100, + major_version % 100, + minor_version, patch_version)); + else + values[2] = CStringGetTextDatum(psprintf("%u.%u.%u", major_version / 100, + minor_version, patch_version)); + + /* lsn */ + values[3] = LSNGetDatum(data->ol_lsn); + + /* last */ + values[4] = TimestampTzGetDatum(time_t_to_timestamptz(data->ol_timestamp)); + + /* count */ + values[5] = Int32GetDatum(data->ol_count); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + + /* done when there are no more elements left */ + SRF_RETURN_DONE(funcctx); +} diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 089063f471..622bf55455 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -885,6 +885,9 @@ RewriteControlFile(void) /* The control file gets flushed here. */ update_controlfile(".", &ControlFile, true); + + /* Put information into operation log. */ + put_operation_log_element(".", OLT_RESETWAL); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 3cd77c09b1..4fc4ea8d87 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -672,7 +672,12 @@ perform_rewind(filemap_t *filemap, rewind_source *source, ControlFile_new.minRecoveryPointTLI = endtli; ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY; if (!dry_run) + { update_controlfile(datadir_target, &ControlFile_new, do_sync); + + /* Put information into operation log. */ + put_operation_log_element(datadir_target, OLT_REWIND); + } } static void diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 73bfd14397..d94d38e2ab 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -15,6 +15,10 @@ #include "common/string.h" +#include "catalog/pg_control.h" + +#include "common/controldata_utils.h" + /* * get_control_data() * @@ -731,3 +735,52 @@ disable_old_cluster(void) "started once the new cluster has been started.", old_cluster.pgdata); } + + +/* + * copy_operation_log() + * + * Copy operation log from the old cluster to the new cluster and put info + * about upgrade. If operation log not exists in the old cluster then put + * startup message with version info of old cluster. + */ +void +copy_operation_log(void) +{ + OperationLogBuffer *log_buffer; + bool log_is_empty; + ClusterInfo *cluster; + bool crc_ok; + + /* Read operation log from the old cluster. */ + log_buffer = get_controlfile_log(old_cluster.pgdata, &crc_ok); + if (!crc_ok) + pg_fatal("pg_control operation log CRC value is incorrect"); + + /* + * Check operation log records in the old cluster. Need to put information + * about old version in case operation log is empty. + */ + log_is_empty = (get_operation_log_count(log_buffer) == 0); + + if (user_opts.transfer_mode == TRANSFER_MODE_LINK) + cluster = &old_cluster; + else + { + cluster = &new_cluster; + + /* Place operation log in the new cluster. */ + update_controlfile_log(cluster->pgdata, log_buffer, true); + } + + /* Put information about the old cluster if needed. */ + if (log_is_empty) + put_operation_log_element_version(cluster->pgdata, OLT_STARTUP, + ED_PG_ORIGINAL, + old_cluster.bin_version_num); + + /* + * Put information about upgrade in the operation log of the old cluster. + */ + put_operation_log_element(cluster->pgdata, OLT_UPGRADE); +} diff --git a/src/bin/pg_upgrade/exec.c b/src/bin/pg_upgrade/exec.c index 23fe50e33d..1bcd26f8b0 100644 --- a/src/bin/pg_upgrade/exec.c +++ b/src/bin/pg_upgrade/exec.c @@ -37,7 +37,8 @@ get_bin_version(ClusterInfo *cluster) FILE *output; int rc; int v1 = 0, - v2 = 0; + v2 = 0, + v3 = 0; snprintf(cmd, sizeof(cmd), "\"%s/pg_ctl\" --version", cluster->bindir); fflush(NULL); @@ -52,18 +53,20 @@ get_bin_version(ClusterInfo *cluster) pg_fatal("could not get pg_ctl version data using %s: %s", cmd, wait_result_to_str(rc)); - if (sscanf(cmd_output, "%*s %*s %d.%d", &v1, &v2) < 1) - pg_fatal("could not get pg_ctl version output from %s", cmd); + if (sscanf(cmd_output, "%*s %*s %d.%d.%d", &v1, &v2, &v3) < 1) + pg_fatal("could not get pg_ctl version output from %s\n", cmd); if (v1 < 10) { /* old style, e.g. 9.6.1 */ cluster->bin_version = v1 * 10000 + v2 * 100; + cluster->bin_version_num = (cluster->bin_version + v3) * 100; } else { /* new style, e.g. 10.1 */ cluster->bin_version = v1 * 10000; + cluster->bin_version_num = (cluster->bin_version + v2) * 100; } } diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 115faa222e..58069691b0 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -194,6 +194,8 @@ main(int argc, char **argv) check_ok(); } + copy_operation_log(); + create_script_for_old_cluster_deletion(&deletion_script_file_name); issue_warnings_and_set_wal_level(); diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 31589b0fdc..a902bfd8d1 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -264,6 +264,7 @@ typedef struct uint32 major_version; /* PG_VERSION of cluster */ char major_version_str[64]; /* string PG_VERSION of cluster */ uint32 bin_version; /* version returned from pg_ctl */ + uint32 bin_version_num; /* full version (incl. minor part) returned from pg_ctl */ const char *tablespace_suffix; /* directory specification */ } ClusterInfo; @@ -348,6 +349,7 @@ void create_script_for_old_cluster_deletion(char **deletion_script_file_name); void get_control_data(ClusterInfo *cluster, bool live_check); void check_control_data(ControlData *oldctrl, ControlData *newctrl); void disable_old_cluster(void); +void copy_operation_log(void); /* dump.c */ diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 2d1f35bbd1..10a62ff0c0 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -40,18 +40,34 @@ #endif /* - * get_controlfile() + * Descriptions of supported operations of operation log. + */ +OperationLogTypeDesc OperationLogTypesDescs[] = { + {OLT_BOOTSTRAP, OLM_INSERT, "bootstrap"}, + {OLT_STARTUP, OLM_MERGE, "startup"}, + {OLT_RESETWAL, OLM_MERGE, "pg_resetwal"}, + {OLT_REWIND, OLM_MERGE, "pg_rewind"}, + {OLT_UPGRADE, OLM_INSERT, "pg_upgrade"}, + {OLT_PROMOTED, OLM_INSERT, "promoted"} +}; + +/* + * get_controlfile_with_log() * - * Get controlfile values. The result is returned as a palloc'd copy of the - * control file data. + * Get controlfile values. The result is a palloc'd copy of the control file + * data. If log_buffer is not null then result is also a palloc'd buffer with + * operation log. * * crc_ok_p can be used by the caller to see whether the CRC of the control * file data is correct. */ -ControlFileData * -get_controlfile(const char *DataDir, bool *crc_ok_p) +static void +get_controlfile_with_log(const char *DataDir, bool *crc_ok_p, + ControlFileData **control_file, + OperationLogBuffer * *log_buffer) { - ControlFileData *ControlFile; + ControlFileData *ControlFile = NULL; + OperationLogBuffer *LogBuffer = NULL; int fd; char ControlFilePath[MAXPGPATH]; pg_crc32c crc; @@ -59,7 +75,6 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) Assert(crc_ok_p); - ControlFile = palloc_object(ControlFileData); snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); #ifndef FRONTEND @@ -74,27 +89,64 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) ControlFilePath); #endif - r = read(fd, ControlFile, sizeof(ControlFileData)); - if (r != sizeof(ControlFileData)) + if (control_file) { - if (r < 0) + ControlFile = palloc(sizeof(ControlFileData)); + + r = read(fd, ControlFile, sizeof(ControlFileData)); + if (r != sizeof(ControlFileData)) + { + if (r < 0) +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", ControlFilePath))); +#else + pg_fatal("could not read file \"%s\": %m", ControlFilePath); +#endif + else +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read file \"%s\": read %d of %zu", + ControlFilePath, r, sizeof(ControlFileData)))); +#else + pg_fatal("could not read file \"%s\": read %d of %zu", + ControlFilePath, r, sizeof(ControlFileData)); +#endif + } + *control_file = ControlFile; + } + + if (log_buffer) + { + if (lseek(fd, PG_OPERATION_LOG_POS, SEEK_SET) != PG_OPERATION_LOG_POS) #ifndef FRONTEND ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", ControlFilePath))); + errmsg("could not seek to position %d of file \"%s\": %m", + PG_OPERATION_LOG_POS, ControlFilePath))); #else - pg_fatal("could not read file \"%s\": %m", ControlFilePath); + pg_fatal("could not seek to position %d of file \"%s\": %m", + PG_OPERATION_LOG_POS, ControlFilePath); #endif - else + + LogBuffer = palloc(PG_OPERATION_LOG_FULL_SIZE); + + r = read(fd, LogBuffer, PG_OPERATION_LOG_FULL_SIZE); + if (r != PG_OPERATION_LOG_FULL_SIZE) + { #ifndef FRONTEND ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("could not read file \"%s\": read %d of %zu", - ControlFilePath, r, sizeof(ControlFileData)))); + errmsg("could not read operation log from the file \"%s\": read %d of %d", + ControlFilePath, r, PG_OPERATION_LOG_FULL_SIZE))); #else - pg_fatal("could not read file \"%s\": read %d of %zu", - ControlFilePath, r, sizeof(ControlFileData)); + pg_fatal("could not read operation log from the file \"%s\": read %d of %d", + ControlFilePath, r, PG_OPERATION_LOG_FULL_SIZE); #endif + } + *log_buffer = LogBuffer; } #ifndef FRONTEND @@ -108,30 +160,85 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) pg_fatal("could not close file \"%s\": %m", ControlFilePath); #endif - /* Check the CRC. */ - INIT_CRC32C(crc); - COMP_CRC32C(crc, - (char *) ControlFile, - offsetof(ControlFileData, crc)); - FIN_CRC32C(crc); + if (control_file) + { + /* Check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(crc); - *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); + *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); - /* Make sure the control file is valid byte order. */ - if (ControlFile->pg_control_version % 65536 == 0 && - ControlFile->pg_control_version / 65536 != 0) + /* Make sure the control file is valid byte order. */ + if (ControlFile->pg_control_version % 65536 == 0 && + ControlFile->pg_control_version / 65536 != 0) #ifndef FRONTEND - elog(ERROR, _("byte ordering mismatch")); + elog(ERROR, _("byte ordering mismatch")); #else - pg_log_warning("possible byte ordering mismatch\n" - "The byte ordering used to store the pg_control file might not match the one\n" - "used by this program. In that case the results below would be incorrect, and\n" - "the PostgreSQL installation would be incompatible with this data directory."); + pg_log_warning("possible byte ordering mismatch\n" + "The byte ordering used to store the pg_control file might not match the one\n" + "used by this program. In that case the results below would be incorrect, and\n" + "the PostgreSQL installation would be incompatible with this data directory."); #endif + } + else + *crc_ok_p = true; + + /* + * Do not check CRC of operation log if CRC of control file is damaged or + * operation log is not initialized. + */ + if (log_buffer && *crc_ok_p && LogBuffer->header.ol_count) + { + /* Check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, + (char *) LogBuffer + sizeof(pg_crc32c), + PG_OPERATION_LOG_FULL_SIZE - sizeof(pg_crc32c)); + FIN_CRC32C(crc); + + *crc_ok_p = EQ_CRC32C(crc, LogBuffer->header.ol_crc); + } +} +/* + * get_controlfile() + * + * Get controlfile values. The result is returned as a palloc'd copy of the + * control file data. + * + * crc_ok_p can be used by the caller to see whether the CRC of the control + * file data is correct. + */ +ControlFileData * +get_controlfile(const char *DataDir, bool *crc_ok_p) +{ + ControlFileData *ControlFile; + + get_controlfile_with_log(DataDir, crc_ok_p, &ControlFile, NULL); return ControlFile; } +/* + * get_controlfile_log() + * + * Get the operation log ring buffer from controlfile. The result is returned + * as a palloc'd copy of operation log buffer. + * + * crc_ok_p can be used by the caller to see whether the CRC of the operation + * log is correct. + */ +OperationLogBuffer * +get_controlfile_log(const char *DataDir, bool *crc_ok_p) +{ + OperationLogBuffer *log_buffer; + + get_controlfile_with_log(DataDir, crc_ok_p, NULL, &log_buffer); + return log_buffer; +} + /* * update_controlfile() * @@ -146,7 +253,7 @@ update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync) { int fd; - char buffer[PG_CONTROL_FILE_SIZE]; + char buffer[PG_CONTROL_FILE_SIZE_WO_LOG]; char ControlFilePath[MAXPGPATH]; /* @@ -168,11 +275,11 @@ update_controlfile(const char *DataDir, FIN_CRC32C(ControlFile->crc); /* - * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding - * the excess over sizeof(ControlFileData), to avoid premature EOF related - * errors when reading it. + * Write out PG_CONTROL_FILE_SIZE_WO_LOG bytes into pg_control by + * zero-padding the excess over sizeof(ControlFileData), to avoid + * premature EOF related errors when reading it. */ - memset(buffer, 0, PG_CONTROL_FILE_SIZE); + memset(buffer, 0, PG_CONTROL_FILE_SIZE_WO_LOG); memcpy(buffer, ControlFile, sizeof(ControlFileData)); snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); @@ -198,7 +305,7 @@ update_controlfile(const char *DataDir, #ifndef FRONTEND pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); #endif - if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) + if (write(fd, buffer, PG_CONTROL_FILE_SIZE_WO_LOG) != PG_CONTROL_FILE_SIZE_WO_LOG) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) @@ -245,3 +352,374 @@ update_controlfile(const char *DataDir, #endif } } + +/* + * update_controlfile_log() + * + * Update the operation log ring buffer. "do_sync" can be optionally used to + * flush the updated control file. Note that it is up to the caller to + * properly lock ControlFileLock when calling this routine in the backend. + */ +void +update_controlfile_log(const char *DataDir, + OperationLogBuffer * log_buffer, bool do_sync) +{ + int fd; + char ControlFilePath[MAXPGPATH]; + + snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); + + /* Recalculate CRC of operation log. */ + INIT_CRC32C(log_buffer->header.ol_crc); + COMP_CRC32C(log_buffer->header.ol_crc, + (char *) log_buffer + sizeof(pg_crc32c), + PG_OPERATION_LOG_FULL_SIZE - sizeof(pg_crc32c)); + FIN_CRC32C(log_buffer->header.ol_crc); + +#ifndef FRONTEND + + /* + * All errors issue a PANIC, so no need to use OpenTransientFile() and to + * worry about file descriptor leaks. + */ + if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + ControlFilePath))); +#else + if ((fd = open(ControlFilePath, O_WRONLY | PG_BINARY, + pg_file_create_mode)) == -1) + pg_fatal("could not open file \"%s\": %m", ControlFilePath); +#endif + + errno = 0; +#ifndef FRONTEND + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); +#endif + if (lseek(fd, PG_OPERATION_LOG_POS, SEEK_SET) != PG_OPERATION_LOG_POS) +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to position %d of file \"%s\": %m", + PG_OPERATION_LOG_POS, ControlFilePath))); +#else + pg_fatal("could not seek to position %d of file \"%s\": %m", + PG_OPERATION_LOG_POS, ControlFilePath); +#endif + + if (write(fd, log_buffer, PG_OPERATION_LOG_FULL_SIZE) != PG_OPERATION_LOG_FULL_SIZE) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + +#ifndef FRONTEND + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write operation log in the file \"%s\": %m", + ControlFilePath))); +#else + pg_fatal("could not write operation log in the file \"%s\": %m", + ControlFilePath); +#endif + } +#ifndef FRONTEND + pgstat_report_wait_end(); +#endif + + if (do_sync) + { +#ifndef FRONTEND + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE); + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + ControlFilePath))); + pgstat_report_wait_end(); +#else + if (fsync(fd) != 0) + pg_fatal("could not fsync file \"%s\": %m", ControlFilePath); +#endif + } + + if (close(fd) != 0) + { +#ifndef FRONTEND + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + ControlFilePath))); +#else + pg_fatal("could not close file \"%s\": %m", ControlFilePath); +#endif + } +} + +/* + * is_enum_value_correct() + * + * Function returns true in case value is correct value of enum. + * + * val - test value; + * minval - first enum value (correct value); + * maxval - last enum value (incorrect value). + */ +static bool +is_enum_value_correct(int16 val, int16 minval, int16 maxval) +{ + Assert(val >= minval || val < maxval); + + if (val < minval || val >= maxval) + return false; + return true; +} + +/* + * get_operation_log_type_desc() + * + * Function returns pointer to OperationLogTypeDesc struct for given type of + * operation ol_type. + */ +static OperationLogTypeDesc * +get_operation_log_type_desc(ol_type_enum ol_type) +{ + return &OperationLogTypesDescs[ol_type - 1]; +} + +/* + * fill_operation_log_element() + * + * Fill new operation log element. Value of ol_lsn is last checkpoint record + * pointer. + */ +static void +fill_operation_log_element(ControlFileData *ControlFile, + OperationLogTypeDesc * desc, + PgNumEdition edition, uint32 version_num, + OperationLogData * data) +{ + data->ol_type = desc->ol_type; + data->ol_edition = edition; + data->ol_count = 1; + data->ol_version = version_num; + data->ol_timestamp = (pg_time_t) time(NULL); + data->ol_lsn = ControlFile->checkPoint; +} + +/* + * find_operation_log_element_for_merge() + * + * Find element into operation log ring buffer by ol_type and version. + * Returns NULL in case element is not found. + */ +static OperationLogData * +find_operation_log_element_for_merge(ol_type_enum ol_type, + OperationLogBuffer * log_buffer, + PgNumEdition edition, uint32 version_num) +{ + uint32 first = log_buffer->header.ol_first; + uint32 count = get_operation_log_count(log_buffer); + OperationLogData *data; + uint32 i; + + Assert(first < PG_OPERATION_LOG_COUNT && count <= PG_OPERATION_LOG_COUNT); + + for (i = 0; i < count; i++) + { + data = &log_buffer->data[(first + i) % PG_OPERATION_LOG_COUNT]; + if (data->ol_type == ol_type && + data->ol_edition == edition && + data->ol_version == version_num) + return data; + } + + return NULL; +} + +/* + * put_operation_log_element(), put_operation_log_element_version() + * + * Put element into operation log ring buffer. + * + * DataDir is the path to the top level of the PGDATA directory tree; + * ol_type is type of operation; + * edition is edition of current PostgreSQL version; + * version_num is number of version (for example 13000802 for v13.8.2). + * + * Note that it is up to the caller to properly lock ControlFileLock when + * calling this routine in the backend. + */ +void +put_operation_log_element_version(const char *DataDir, ol_type_enum ol_type, + PgNumEdition edition, uint32 version_num) +{ + OperationLogBuffer *log_buffer; + ControlFileData *ControlFile; + bool crc_ok; + OperationLogTypeDesc *desc; + + if (!is_enum_value_correct(ol_type, OLT_BOOTSTRAP, OLT_NumberOfTypes)) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid type of operation (%u) for operation log", ol_type))); +#else + pg_fatal("invalid type of operation (%u) for operation log", ol_type); +#endif + } + + desc = get_operation_log_type_desc(ol_type); + + if (!is_enum_value_correct(desc->ol_mode, OLM_MERGE, OLM_NumberOfModes)) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid mode of operation (%u) for operation log", ol_type))); +#else + pg_fatal("invalid mode of operation (%u) for operation log", ol_type); +#endif + } + + get_controlfile_with_log(DataDir, &crc_ok, &ControlFile, &log_buffer); + + if (!crc_ok) +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("pg_control CRC value is incorrect"))); +#else + pg_fatal("pg_control CRC value is incorrect"); +#endif + + switch (desc->ol_mode) + { + case OLM_MERGE: + { + OperationLogData *data; + + data = find_operation_log_element_for_merge(ol_type, log_buffer, + edition, version_num); + if (data) + { + /* + * We just found the element with the same type and the + * same version. Update it. + */ + if (data->ol_count < PG_UINT16_MAX) /* prevent overflow */ + data->ol_count++; + data->ol_timestamp = (pg_time_t) time(NULL); + data->ol_lsn = ControlFile->checkPoint; + break; + } + } + /* FALLTHROUGH */ + + case OLM_INSERT: + { + uint16 first = log_buffer->header.ol_first; + uint16 count = log_buffer->header.ol_count; + uint16 current; + + Assert(first < PG_OPERATION_LOG_COUNT && count <= PG_OPERATION_LOG_COUNT); + + if (count == PG_OPERATION_LOG_COUNT) + { + current = first; + /* Owerflow, shift the first element */ + log_buffer->header.ol_first = (first + 1) % PG_OPERATION_LOG_COUNT; + } + else + { + current = first + count; + /* Increase number of elements: */ + log_buffer->header.ol_count++; + } + + /* Fill operation log element. */ + fill_operation_log_element(ControlFile, desc, edition, version_num, + &log_buffer->data[current]); + break; + } + + default: +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("unexpected operation log mode %d", + desc->ol_mode))); +#else + pg_fatal("unexpected operation log mode %d", desc->ol_mode); +#endif + } + + update_controlfile_log(DataDir, log_buffer, true); + + pfree(log_buffer); + + pfree(ControlFile); +} + +/* + * Helper constant for determine current edition. + * Here can be custom editions. + */ +static const uint8 current_edition = ED_PG_ORIGINAL; + +/* + * Helper constant for determine current version. + * Multiplier 100 used as reserve of last two digits for patch number. + */ +static const uint32 current_version_num = PG_VERSION_NUM * 100; + +void +put_operation_log_element(const char *DataDir, ol_type_enum ol_type) +{ + put_operation_log_element_version(DataDir, ol_type, current_edition, current_version_num); +} + +/* + * get_operation_log_element() + * + * Returns operation log buffer element with number num. + */ +OperationLogData * +get_operation_log_element(OperationLogBuffer * log_buffer, uint16 num) +{ + uint32 first = log_buffer->header.ol_first; +#ifdef USE_ASSERT_CHECKING + uint32 count = get_operation_log_count(log_buffer); + + Assert(num < count); +#endif + + return &log_buffer->data[(first + num) % PG_OPERATION_LOG_COUNT]; +} + +/* + * get_operation_log_count() + * + * Returns number of elements in given operation log buffer. + */ +uint16 +get_operation_log_count(OperationLogBuffer * log_buffer) +{ + return log_buffer->header.ol_count; +} + +/* + * get_operation_log_type_name() + * + * Returns name of given type. + */ +const char * +get_operation_log_type_name(ol_type_enum ol_type) +{ + if (is_enum_value_correct(ol_type, OLT_BOOTSTRAP, OLT_NumberOfTypes)) + return OperationLogTypesDescs[ol_type - 1].ol_name; + else + return psprintf("unknown name %u", ol_type); +} diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 06368e2366..d6ce987fde 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -247,4 +247,128 @@ typedef struct ControlFileData */ #define PG_CONTROL_FILE_SIZE 8192 +/* + * Type of operation for operation log. + */ +typedef enum +{ + OLT_BOOTSTRAP = 1, /* bootstrap */ + OLT_STARTUP, /* server startup */ + OLT_RESETWAL, /* pg_resetwal */ + OLT_REWIND, /* pg_rewind */ + OLT_UPGRADE, /* pg_upgrade */ + OLT_PROMOTED, /* promoted */ + OLT_NumberOfTypes /* should be last */ +} ol_type_enum; + +/* + * Mode of operation processing. + */ +typedef enum +{ + OLM_MERGE = 1, /* insert element only if not exists element + * with the same ol_type and ol_version; + * otherwise update existing element */ + OLM_INSERT, /* insert element into ring buffer 'as is' */ + OLM_NumberOfModes /* should be last */ +} ol_mode_enum; + +/* + * Helper struct for describing supported operations. + */ +typedef struct OperationLogTypeDesc +{ + ol_type_enum ol_type; /* element type */ + ol_mode_enum ol_mode; /* element mode */ + const char *ol_name; /* display name of element */ +} OperationLogTypeDesc; + +/* + * Element of operation log ring buffer (24 bytes). + */ +typedef struct OperationLogData +{ + uint8 ol_type; /* operation type */ + uint8 ol_edition; /* postgres edition */ + uint16 ol_count; /* number of operations */ + uint32 ol_version; /* postgres version */ + pg_time_t ol_timestamp; /* = int64, operation date/time */ + XLogRecPtr ol_lsn; /* = uint64, last check point record ptr */ +} OperationLogData; + +/* + * Header of operation log ring buffer (16 bytes). + */ +typedef struct OperationLogHeader +{ + pg_crc32c ol_crc; /* CRC of operation log ... MUST BE FIRST! */ + uint16 ol_first; /* position of first ring buffer element */ + uint16 ol_count; /* number of elements in ring buffer */ + uint8 ol_pad[8]; /* just for alignment */ +} OperationLogHeader; + +/* + * Whole size of the operation log ring buffer (with header). + */ +#define PG_OPERATION_LOG_FULL_SIZE 4096 + +/* + * Size of elements of the operation log ring buffer. + * Value must be a multiple of sizeof(OperationLogData). + */ +#define PG_OPERATION_LOG_SIZE (PG_OPERATION_LOG_FULL_SIZE - sizeof(OperationLogHeader)) + +/* + * Size of pg_control file without operation log ring buffer. + */ +#define PG_CONTROL_FILE_SIZE_WO_LOG (PG_CONTROL_FILE_SIZE - PG_OPERATION_LOG_FULL_SIZE) + +/* + * Position of the operation log ring buffer in the control file. + */ +#define PG_OPERATION_LOG_POS PG_CONTROL_FILE_SIZE_WO_LOG + +/* + * Number of elements in the operation log. + */ +#define PG_OPERATION_LOG_COUNT (PG_OPERATION_LOG_SIZE / sizeof(OperationLogData)) + +/* + * Operation log ring buffer. + */ +typedef struct OperationLogBuffer +{ + OperationLogHeader header; + OperationLogData data[PG_OPERATION_LOG_COUNT]; + +} OperationLogBuffer; + +/* Enum for postgres edition. */ +typedef enum +{ + ED_PG_ORIGINAL = 0 + /* Here can be custom editions */ +} PgNumEdition; + +#define ED_PG_ORIGINAL_STR "vanilla" +#define ED_UNKNOWN_STR "unknown" + +/* + * get_str_edition() + * + * Returns edition string by edition number. + */ +static inline const char * +get_str_edition(PgNumEdition edition) +{ + switch (edition) + { + case ED_PG_ORIGINAL: + return ED_PG_ORIGINAL_STR; + + /* Here can be custom editions */ + } + return ED_UNKNOWN_STR; +} + #endif /* PG_CONTROL_H */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fd2559442e..da6a886539 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11837,4 +11837,13 @@ prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary', prosrc => 'brin_minmax_multi_summary_send' }, +# operation log function +{ oid => '8110', descr => 'show operation log', + proname => 'pg_operation_log', prorows => '170', proretset => 't', + provolatile => 'v', prorettype => 'record', proargtypes => '', + proallargtypes => '{text,text,text,pg_lsn,timestamptz,int4}', + proargmodes => '{o,o,o,o,o,o}', + proargnames => '{event,edition,version,lsn,last,count}', + prosrc => 'pg_operation_log' }, + ] diff --git a/src/include/common/controldata_utils.h b/src/include/common/controldata_utils.h index b1dab7547c..ebddf7b897 100644 --- a/src/include/common/controldata_utils.h +++ b/src/include/common/controldata_utils.h @@ -16,4 +16,16 @@ extern ControlFileData *get_controlfile(const char *DataDir, bool *crc_ok_p); extern void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync); +extern OperationLogBuffer * get_controlfile_log(const char *DataDir, bool *crc_ok_p); +extern void update_controlfile_log(const char *DataDir, + OperationLogBuffer * log_buffer, bool do_sync); + +extern void put_operation_log_element(const char *DataDir, ol_type_enum ol_type); +extern void put_operation_log_element_version(const char *DataDir, ol_type_enum ol_type, + PgNumEdition edition, uint32 version_num); +extern uint16 get_operation_log_count(OperationLogBuffer * log_buffer); +extern OperationLogData * get_operation_log_element(OperationLogBuffer * log_buffer, + uint16 num); +extern const char *get_operation_log_type_name(ol_type_enum ol_type); + #endif /* COMMON_CONTROLDATA_UTILS_H */ diff --git a/src/test/modules/test_misc/t/004_operation_log.pl b/src/test/modules/test_misc/t/004_operation_log.pl new file mode 100644 index 0000000000..2e5f0e3b8e --- /dev/null +++ b/src/test/modules/test_misc/t/004_operation_log.pl @@ -0,0 +1,109 @@ + +# Copyright (c) 2022, PostgreSQL Global Development Group + +# Test for operation log. +# +# Some events like "bootstrap", "startup", "pg_resetwal", "promoted", "pg_upgrade". +# should be registered in operation log. +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); + +# Create a primary node +$node_primary->start; + +# Get server version +my $server_version = $node_primary->safe_psql("postgres", "SELECT current_setting('server_version_num');") + 0; +my $major_version = $server_version / 10000; +my $minor_version = $server_version % 100; + +# Initialize standby node from backup +$node_primary->backup('primary_backup'); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, 'primary_backup', + has_streaming => 1); + +$node_standby->start; + +# Wait for standby to catch up +$node_primary->wait_for_catchup($node_standby); + +# Promote the standby and stop it +$node_standby->promote; + +# Stop standby +$node_standby->stop; + +# Get first "pg_resetwal" event +system_or_bail('pg_resetwal', '-f', $node_standby->data_dir); + +# Get second "pg_resetwal" event +system_or_bail('pg_resetwal', '-f', $node_standby->data_dir); + +# Initialize a new node for the upgrade. +my $node_new = PostgreSQL::Test::Cluster->new('new'); +$node_new->init; + +my $bindir_new = $node_new->config_data('--bindir'); +my $bindir_standby = $node_standby->config_data('--bindir'); + +# We want to run pg_upgrade in the build directory so that any files generated +# finish in it, like delete_old_cluster.{sh,bat}. +chdir ${PostgreSQL::Test::Utils::tmp_check}; + +# Run pg_upgrade. +command_ok( + [ + 'pg_upgrade', '--no-sync', '-d', $node_standby->data_dir, + '-D', $node_new->data_dir, '-b', $bindir_standby, + '-B', $bindir_new, '-s', $node_new->host, + '-p', $node_standby->port, '-P', $node_new->port + ], + 'run of pg_upgrade for new instance'); + +$node_new->start; + +my $psql_stdout; + +# Check number of event "bootstrap" +$psql_stdout = $node_new->safe_psql('postgres', q( +SELECT + sum(count), count(*), min(split_part(version,'.','1')), min(split_part(version,'.','2')) +FROM pg_operation_log() WHERE event='bootstrap')); +is($psql_stdout, qq(1|1|$major_version|$minor_version), 'check number of event bootstrap'); + +# Check number of event "startup" +$psql_stdout = $node_new->safe_psql('postgres', q( +SELECT + count(*), min(split_part(version,'.','1')), min(split_part(version,'.','2')) +FROM pg_operation_log() WHERE event='startup')); +is($psql_stdout, qq(1|$major_version|$minor_version), 'check number of event startup'); + +# Check number of event "promoted" +$psql_stdout = $node_new->safe_psql('postgres', q( +SELECT + sum(count), count(*), min(split_part(version,'.','1')), min(split_part(version,'.','2')) +FROM pg_operation_log() WHERE event='promoted')); +is($psql_stdout, qq(1|1|$major_version|$minor_version), 'check number of event promoted'); + +# Check number of event "pg_upgrade" +$psql_stdout = $node_new->safe_psql('postgres', q( +SELECT + sum(count), count(*), min(split_part(version,'.','1')), min(split_part(version,'.','2')) +FROM pg_operation_log() WHERE event='pg_upgrade')); +is($psql_stdout, qq(1|1|$major_version|$minor_version), 'check number of event pg_upgrade'); + +# Check number of event "pg_resetwal" +$psql_stdout = $node_new->safe_psql('postgres', q( +SELECT + sum(count), count(*), min(split_part(version,'.','1')), min(split_part(version,'.','2')) +FROM pg_operation_log() WHERE event='pg_resetwal')); +is($psql_stdout, qq(2|1|$major_version|$minor_version), 'check number of event pg_resetwal'); + +done_testing(); -- 2.31.0.windows.1