From d318c4108b9e05a4828f6e7f71af34c3ca89b3ed Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 16 Aug 2024 20:15:43 +0200 Subject: [PATCH v2] Add prefetching support on macOS macOS doesn't have posix_fadvise(), but fcntl() with the F_RDADVISE command does the same thing. Discussion: https://www.postgresql.org/message-id/flat/0827edec-1317-4917-a186-035eb1e3241d%40eisentraut.org --- doc/src/sgml/config.sgml | 14 +++----- doc/src/sgml/wal.sgml | 4 +-- src/backend/commands/variable.c | 4 +-- src/backend/storage/file/fd.c | 59 ++++++++++++++++++++++----------- src/include/pg_config_manual.h | 7 ++-- src/include/port/darwin.h | 5 +++ 6 files changed, 57 insertions(+), 36 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2937384b001..c6d2fa2148e 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2679,11 +2679,10 @@ Asynchronous Behavior - Asynchronous I/O depends on an effective posix_fadvise - function, which some operating systems lack. If the function is not - present then setting this parameter to anything but zero will result - in an error. On some operating systems (e.g., Solaris), the function - is present but does not actually do anything. + Asynchronous I/O depends on an effective support by the operating + system, which some operating systems lack. If there is no operating + system support then setting this parameter to anything but zero will + result in an error. @@ -3852,10 +3851,7 @@ Recovery off, on and try (the default). The setting try enables - prefetching only if the operating system provides the - posix_fadvise function, which is currently used - to implement prefetching. Note that some operating systems provide the - function, but it doesn't do anything. + prefetching only if the operating system provides prefetching support. Prefetching blocks that will soon be needed can reduce I/O wait times diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index d5df65bc693..72b73dbf113 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -841,8 +841,8 @@ <acronym>WAL</acronym> Configuration The and settings limit prefetching concurrency and distance, respectively. By default, it is set to - try, which enables the feature on systems where - posix_fadvise is available. + try, which enables the feature on systems that have + prefetching support. diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 6202c5ebe44..c1c6c2811c9 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -1212,7 +1212,7 @@ check_effective_io_concurrency(int *newval, void **extra, GucSource source) #ifndef USE_PREFETCH if (*newval != 0) { - GUC_check_errdetail("\"effective_io_concurrency\" must be set to 0 on platforms that lack posix_fadvise()."); + GUC_check_errdetail("\"effective_io_concurrency\" must be set to 0 on platforms that lack prefetching support."); return false; } #endif /* USE_PREFETCH */ @@ -1225,7 +1225,7 @@ check_maintenance_io_concurrency(int *newval, void **extra, GucSource source) #ifndef USE_PREFETCH if (*newval != 0) { - GUC_check_errdetail("\"maintenance_io_concurrency\" must be set to 0 on platforms that lack posix_fadvise()."); + GUC_check_errdetail("\"maintenance_io_concurrency\" must be set to 0 on platforms that lack prefetching support."); return false; } #endif /* USE_PREFETCH */ diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 3944321ff37..2830b310e0b 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2068,40 +2068,61 @@ FileClose(File file) /* * FilePrefetch - initiate asynchronous read of a given range of the file. * - * Currently the only implementation of this function is using posix_fadvise - * which is the simplest standardized interface that accomplishes this. - * We could add an implementation using libaio in the future; but note that - * this API is inappropriate for libaio, which wants to have a buffer provided - * to read into. + * Returns 0 on success, otherwise an errno error code (like posix_fadvise()). + * + * posix_fadvise() is the simplest standardized interface that accomplishes + * this. We could add an implementation using libaio in the future; but note + * that this API is inappropriate for libaio, which wants to have a buffer + * provided to read into. */ int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info) { -#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED) - int returnCode; - Assert(FileIsValid(file)); DO_DB(elog(LOG, "FilePrefetch: %d (%s) " INT64_FORMAT " " INT64_FORMAT, file, VfdCache[file].fileName, (int64) offset, (int64) amount)); - returnCode = FileAccess(file); - if (returnCode < 0) - return returnCode; +#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED) + { + int returnCode; + + returnCode = FileAccess(file); + if (returnCode < 0) + return returnCode; retry: - pgstat_report_wait_start(wait_event_info); - returnCode = posix_fadvise(VfdCache[file].fd, offset, amount, - POSIX_FADV_WILLNEED); - pgstat_report_wait_end(); + pgstat_report_wait_start(wait_event_info); + returnCode = posix_fadvise(VfdCache[file].fd, offset, amount, + POSIX_FADV_WILLNEED); + pgstat_report_wait_end(); - if (returnCode == EINTR) - goto retry; + if (returnCode == EINTR) + goto retry; - return returnCode; + return returnCode; + } +#elif defined(__darwin__) + { + struct radvisory + { + off_t ra_offset; /* offset into the file */ + int ra_count; /* size of the read */ + } ra; + int returnCode; + + ra.ra_offset = offset; + ra.ra_count = amount; + pgstat_report_wait_start(wait_event_info); + returnCode = fcntl(VfdCache[file].fd, F_RDADVISE, &ra); + pgstat_report_wait_end(); + if (returnCode != -1) + return 0; + else + return errno; + } #else - Assert(FileIsValid(file)); return 0; #endif } diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index e799c2989b8..d603b87afd3 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -139,11 +139,10 @@ /* * USE_PREFETCH code should be compiled only if we have a way to implement * prefetching. (This is decoupled from USE_POSIX_FADVISE because there - * might in future be support for alternative low-level prefetch APIs. - * If you change this, you probably need to adjust the error message in - * check_effective_io_concurrency.) + * might in future be support for alternative low-level prefetch APIs, + * as well as platform-specific APIs defined elsewhere.) */ -#ifdef USE_POSIX_FADVISE +#if defined(USE_POSIX_FADVISE) #define USE_PREFETCH #endif diff --git a/src/include/port/darwin.h b/src/include/port/darwin.h index 15fb69d6dbb..6aa2ea70f6b 100644 --- a/src/include/port/darwin.h +++ b/src/include/port/darwin.h @@ -6,3 +6,8 @@ #define HAVE_FSYNC_WRITETHROUGH #endif + +/* + * macOS has a platform-specific implementation of prefetching. + */ +#define USE_PREFETCH base-commit: e3ec9dc1bf4983fcedb6f43c71ea12ee26aefc7a -- 2.46.0