diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index dd9136a942..d1b1f95400 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -64,9 +64,28 @@ SignalHandlerForConfigReload(SIGNAL_ARGS) } /* - * Simple signal handler for exiting quickly as if due to a crash. + * Simple signal handler for processes which have not yet touched or do not + * touch shared memory to exit quickly. * - * Normally, this would be used for handling SIGQUIT. + * Note that if processes already touched shared memory, use + * SignalHandlerForCrashExit() instead and force the postmaster into + * a system reset cycle because shared memory may be corrupted. + */ +void +SignalHandlerForNonCrashExit(SIGNAL_ARGS) +{ + /* + * Since we don't touch shared memory, we can just pull the plug and exit + * without running any atexit handlers. + */ + _exit(1); +} + +/* + * Simple signal handler for processes which have touched shared memory to + * exit quickly. + * + * Normally, this would be used for handling SIGQUIT as if due to a crash. */ void SignalHandlerForCrashExit(SIGNAL_ARGS) @@ -93,9 +112,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS) * shut down and exit. * * Typically, this handler would be used for SIGTERM, but some processes use - * other signals. In particular, the checkpointer exits on SIGUSR2, the - * stats collector on SIGQUIT, and the WAL writer exits on either SIGINT - * or SIGTERM. + * other signals. In particular, the checkpointer and the stats collector exit + * on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM. * * ShutdownRequestPending should be checked at a convenient place within the * main loop, or else the main loop should call HandleMainLoopInterrupts. diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 60f45ccc4e..fd0af0f289 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -1,7 +1,23 @@ /* ---------- * pgstat.c * - * All the statistics collector stuff hacked up in one big, ugly file. + * All the statistics collector stuff hacked up in one big, ugly file. + * + * The statistics collector is started by the postmaster as soon as the + * startup subprocess finishes, or as soon as the postmaster is ready + * to accept read only connections during archive recovery. It remains + * alive until the postmaster commands it to terminate. Normal + * termination is by SIGUSR2 after the checkpointer must exit(0), + * which instructs the statistics collector to save the final statistics + * to reuse at next startup and then exit(0). + * Emergency termination is by SIGQUIT; like any backend, the statistics + * collector will exit quickly without saving the final statistics. It's + * ok because the startup process will remove all statistics at next + * startup after emergency termination. + * + * Because the statistics collector doesn't touch shared memory, even if + * the statistics collector exits unexpectedly, the postmaster doesn't + * treat it as a crash. The postmaster will just try to restart a new one. * * TODO: - Separate collector, postmaster and backend stuff * into different files. @@ -724,6 +740,7 @@ pgstat_reset_remove_files(const char *directory) snprintf(fname, sizeof(fname), "%s/%s", directory, entry->d_name); + elog(DEBUG2, "removing stats file \"%s\"", fname); unlink(fname); } FreeDir(dir); @@ -4821,17 +4838,31 @@ PgstatCollectorMain(int argc, char *argv[]) /* * Ignore all signals usually bound to some action in the postmaster, - * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to - * support latch operations, because we only use a local latch. + * except SIGHUP, SIGQUIT and SIGUSR2. Note we don't need a SIGUSR1 + * handler to support latch operations, because we only use a local latch. + * + * We deliberately ignore SIGTERM and exit in correct order because we + * want to collect the stats sent during the shutdown from all processes. + * SIGTERM will be received during a standard Unix system shutdown cycle + * because init will SIGTERM all processes at once, and the postmaster + * will SIGTERM all processes at once when recovery_target_action=shutdown + * and the startup process exits after reaching the recovery target. We + * want to wait for the checkpointer, which is the process sends the stats + * finally, to exit, whereupon the postmaster will tell us it's okay to + * shut down (via SIGUSR2) + * + * If SIGQUIT is received, exit quickly without doing any additional work, + * for example writing stats files. We arrange to do _exit(1) because the + * stats collector doesn't touch shared memory. */ pqsignal(SIGHUP, SignalHandlerForConfigReload); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); - pqsignal(SIGQUIT, SignalHandlerForShutdownRequest); + pqsignal(SIGQUIT, SignalHandlerForNonCrashExit); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); - pqsignal(SIGUSR2, SIG_IGN); + pqsignal(SIGUSR2, SignalHandlerForShutdownRequest); /* Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); PG_SETMASK(&UnBlockSig); @@ -4852,8 +4883,8 @@ PgstatCollectorMain(int argc, char *argv[]) AddWaitEventToSet(wes, WL_SOCKET_READABLE, pgStatSock, NULL, NULL); /* - * Loop to process messages until we get SIGQUIT or detect ungraceful - * death of our parent postmaster. + * Loop to process messages until we get SIGUSR2, SIGQUIT or detect + * ungraceful death of our parent postmaster. * * For performance reasons, we don't want to do ResetLatch/WaitLatch after * every message; instead, do that only after a recv() fails to obtain a @@ -4871,7 +4902,7 @@ PgstatCollectorMain(int argc, char *argv[]) ResetLatch(MyLatch); /* - * Quit if we get SIGQUIT from the postmaster. + * Quit if we get SIGUSR2 from the postmaster. */ if (ShutdownRequestPending) break; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ef0be4ca38..08792b0033 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -401,7 +401,6 @@ static void SIGHUP_handler(SIGNAL_ARGS); static void pmdie(SIGNAL_ARGS); static void reaper(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS); -static void process_startup_packet_die(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS); static void StartupPacketTimeoutHandler(void); static void CleanupBackend(int pid, int exitstatus); @@ -3085,7 +3084,7 @@ reaper(SIGNAL_ARGS) * nothing left for it to do. */ if (PgStatPID != 0) - signal_child(PgStatPID, SIGQUIT); + signal_child(PgStatPID, SIGUSR2); } else { @@ -4320,8 +4319,14 @@ BackendInitialize(Port *port) * Exiting with _exit(1) is only possible because we have not yet touched * shared memory; therefore no outside-the-process state needs to get * cleaned up. + * + * One might be tempted to try to send a message, or log one, indicating + * why we are disconnecting. However, that would be quite unsafe in + * itself. Also, it seems undesirable to provide clues about the + * database's state to a client that has not yet completed authentication, + * or even sent us a startup packet. */ - pqsignal(SIGTERM, process_startup_packet_die); + pqsignal(SIGTERM, SignalHandlerForNonCrashExit); /* SIGQUIT handler was already set up by InitPostmasterChild */ InitializeTimeouts(); /* establishes SIGALRM handler */ PG_SETMASK(&StartupBlockSig); @@ -5274,25 +5279,6 @@ sigusr1_handler(SIGNAL_ARGS) errno = save_errno; } -/* - * SIGTERM while processing startup packet. - * - * Running proc_exit() from a signal handler would be quite unsafe. - * However, since we have not yet touched shared memory, we can just - * pull the plug and exit without running any atexit handlers. - * - * One might be tempted to try to send a message, or log one, indicating - * why we are disconnecting. However, that would be quite unsafe in itself. - * Also, it seems undesirable to provide clues about the database's state - * to a client that has not yet completed authentication, or even sent us - * a startup packet. - */ -static void -process_startup_packet_die(SIGNAL_ARGS) -{ - _exit(1); -} - /* * Dummy signal handler * @@ -5309,7 +5295,7 @@ dummy_handler(SIGNAL_ARGS) /* * Timeout while processing startup packet. - * As for process_startup_packet_die(), we exit via _exit(1). + * As for SignalHandlerForNonCrashExit(), we exit via _exit(1). */ static void StartupPacketTimeoutHandler(void) diff --git a/src/include/postmaster/interrupt.h b/src/include/postmaster/interrupt.h index 85a1293ef1..3f3dc19e24 100644 --- a/src/include/postmaster/interrupt.h +++ b/src/include/postmaster/interrupt.h @@ -26,6 +26,7 @@ extern PGDLLIMPORT volatile sig_atomic_t ShutdownRequestPending; extern void HandleMainLoopInterrupts(void); extern void SignalHandlerForConfigReload(SIGNAL_ARGS); +extern void SignalHandlerForNonCrashExit(SIGNAL_ARGS); extern void SignalHandlerForCrashExit(SIGNAL_ARGS); extern void SignalHandlerForShutdownRequest(SIGNAL_ARGS);