From 60a6058c1960246b9fd431398bd68a77a0247a9a Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Mon, 12 Aug 2024 15:47:55 -0500 Subject: [PATCH v11 1/1] vaccum_delay with absolute time nanosleep --- src/backend/commands/vacuum.c | 2 +- src/backend/port/win32/signal.c | 10 ++++++ src/include/port.h | 1 + src/include/portability/instr_time.h | 10 ++++++ src/port/pgsleep.c | 49 ++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 48f8eab202..43333c4698 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2384,7 +2384,7 @@ vacuum_delay_point(void) msec = vacuum_cost_delay * 4; pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); - pg_usleep(msec * 1000); + pg_usleep_non_interruptible(msec * 1000); pgstat_report_wait_end(); /* diff --git a/src/backend/port/win32/signal.c b/src/backend/port/win32/signal.c index 285cb611b4..edcb181215 100644 --- a/src/backend/port/win32/signal.c +++ b/src/backend/port/win32/signal.c @@ -73,6 +73,16 @@ pg_usleep(long microsec) } } +/* + * pg_usleep_non_interruptible --- delay the specified number of microseconds. + * + * Unlike pg_usleep, this relies on a non-interruptible sleep. + */ +void +pg_usleep_non_interruptible(long microsec) +{ + SleepEx((microsec < 500 ? 1 : (microsec + 500) / 1000), FALSE); +} /* Initialization */ void diff --git a/src/include/port.h b/src/include/port.h index c740005267..c8ff23e5ee 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -162,6 +162,7 @@ extern int pg_disable_aslr(void); /* Portable delay handling */ extern void pg_usleep(long microsec); +extern void pg_usleep_non_interruptible(long microsec); /* Portable SQL-like case-independent comparisons and conversions */ extern int pg_strcasecmp(const char *s1, const char *s2); diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index e66ecf34cd..6e4b0f1b17 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -36,6 +36,10 @@ * * INSTR_TIME_GET_NANOSEC(t) convert t to int64 (in nanoseconds) * + * INSTR_TIME_ADD_MICROSEC(x,t) add t (in microseconds) to x + * + * INSTR_TIME_IS_GREATER(x,y) is x greater than y? + * * Note that INSTR_TIME_SUBTRACT and INSTR_TIME_ACCUM_DIFF convert * absolute times to intervals. The INSTR_TIME_GET_xxx operations are * only useful on intervals. @@ -194,4 +198,10 @@ GetTimerFrequency(void) #define INSTR_TIME_GET_MICROSEC(t) \ (INSTR_TIME_GET_NANOSEC(t) / NS_PER_US) +#define INSTR_TIME_ADD_MICROSEC(x,t) \ + ((x).ticks += (t) * NS_PER_US) + +#define INSTR_TIME_IS_GREATER(x,y) \ + ((x).ticks > (y).ticks) + #endif /* INSTR_TIME_H */ diff --git a/src/port/pgsleep.c b/src/port/pgsleep.c index 1284458bfc..63523a6e41 100644 --- a/src/port/pgsleep.c +++ b/src/port/pgsleep.c @@ -14,6 +14,8 @@ #include +#include "portability/instr_time.h" + /* * In a Windows backend, we don't use this implementation, but rather * the signal-aware version in src/backend/port/win32/signal.c. @@ -54,4 +56,51 @@ pg_usleep(long microsec) } } +/* + * pg_usleep_non_interruptible --- delay the specified number of microseconds. + * + * Unlike pg_usleep, this function continues the delay in case of an + * interrupt. + */ +void +pg_usleep_non_interruptible(long microsec) +{ + /* + * We allow nanosleep to handle interrupts and retry with the remaining + * time. However, frequent interruptions and restarts of the nanosleep + * calls can substantially lead to drift in the time when the sleep + * finally completes. To deal with this, we break out of the loop whenever + * the current time is past the expected end time of the sleep. + */ + if (microsec > 0) + { +#ifndef WIN32 + struct timespec delay; + struct timespec remain; + instr_time end_time; + + INSTR_TIME_SET_CURRENT(end_time); + INSTR_TIME_ADD_MICROSEC(end_time, microsec); + + delay.tv_sec = microsec / 1000000L; + delay.tv_nsec = (microsec % 1000000L) * 1000; + + while (nanosleep(&delay, &remain) == -1 && errno == EINTR) + { + instr_time current_time; + + INSTR_TIME_SET_CURRENT(current_time); + + if (INSTR_TIME_IS_GREATER(current_time, end_time)) + break; + + delay = remain; + } +#else + SleepEx((microsec < 500 ? 1 : (microsec + 500) / 1000), FALSE); +#endif + } +} + + #endif /* defined(FRONTEND) || !defined(WIN32) */ -- 2.39.3 (Apple Git-146)