From 2542c6830d98e146d79844fb84fe3fb1b2945c25 Mon Sep 17 00:00:00 2001 From: Paul Amonson Date: Tue, 23 Jul 2024 11:23:23 -0700 Subject: [PATCH v10 2/4] Refactor: consolidate x86 ISA and OS runtime checks Move all x86 ISA and OS runtime checks into a single file for improved modularity and easier future maintenance. Signed-off-by: Paul Amonson Signed-off-by: Raghuveer Devulapalli --- src/include/port/pg_bitutils.h | 1 - src/include/port/pg_hw_feat_check.h | 33 ++++++ src/port/Makefile | 1 + src/port/meson.build | 3 + src/port/pg_bitutils.c | 22 +--- src/port/pg_crc32c_sse42_choose.c | 21 +--- src/port/pg_hw_feat_check.c | 163 ++++++++++++++++++++++++++++ src/port/pg_popcount_avx512.c | 78 ------------- 8 files changed, 205 insertions(+), 117 deletions(-) create mode 100644 src/include/port/pg_hw_feat_check.h create mode 100644 src/port/pg_hw_feat_check.c diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index a3cad46afe..461c7c13cf 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -312,7 +312,6 @@ extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int * files. */ #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK -extern bool pg_popcount_avx512_available(void); extern uint64 pg_popcount_avx512(const char *buf, int bytes); extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask); #endif diff --git a/src/include/port/pg_hw_feat_check.h b/src/include/port/pg_hw_feat_check.h new file mode 100644 index 0000000000..58be900b54 --- /dev/null +++ b/src/include/port/pg_hw_feat_check.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * pg_hw_feat_check.h + * Miscellaneous functions for cheing for hardware features at runtime. + * + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * src/include/port/pg_hw_feat_check.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_HW_FEAT_CHECK_H +#define PG_HW_FEAT_CHECK_H + +/* + * Test to see if all hardware features required by SSE 4.2 crc32c (64 bit) + * are available. + */ +extern PGDLLIMPORT bool pg_crc32c_sse42_available(void); + +/* + * Test to see if all hardware features required by SSE 4.1 POPCNT (64 bit) + * are available. + */ +extern PGDLLIMPORT bool pg_popcount_available(void); + +/* + * Test to see if all hardware features required by AVX-512 POPCNT are + * available. + */ +extern PGDLLIMPORT bool pg_popcount_avx512_available(void); +#endif /* PG_HW_FEAT_CHECK_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 4c22431951..6088b56b71 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -45,6 +45,7 @@ OBJS = \ path.o \ pg_bitutils.o \ pg_popcount_avx512.o \ + pg_hw_feat_check.o \ pg_strong_random.o \ pgcheckdir.o \ pgmkdirp.o \ diff --git a/src/port/meson.build b/src/port/meson.build index c5bceed9cd..ec28590473 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -8,6 +8,9 @@ pgport_sources = [ 'path.c', 'pg_bitutils.c', 'pg_popcount_avx512.c', + 'pg_crc32c_sse42_choose.c', + 'pg_crc32c_sse42.c', + 'pg_hw_feat_check.c', 'pg_strong_random.c', 'pgcheckdir.c', 'pgmkdirp.c', diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index c8399981ee..c11b13dca2 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -20,7 +20,7 @@ #endif #include "port/pg_bitutils.h" - +#include "port/pg_hw_feat_check.h" /* * Array giving the position of the left-most set bit for each possible @@ -109,7 +109,6 @@ static uint64 pg_popcount_slow(const char *buf, int bytes); static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask); #ifdef TRY_POPCNT_FAST -static bool pg_popcount_available(void); static int pg_popcount32_choose(uint32 word); static int pg_popcount64_choose(uint64 word); static uint64 pg_popcount_choose(const char *buf, int bytes); @@ -127,25 +126,6 @@ uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) #ifdef TRY_POPCNT_FAST -/* - * Return true if CPUID indicates that the POPCNT instruction is available. - */ -static bool -pg_popcount_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} - /* * These functions get called on the first call to pg_popcount32 etc. * They detect whether we can use the asm implementations, and replace diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 56d600f3a9..c659917af0 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -20,6 +20,7 @@ #include "c.h" +#if defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) #ifdef HAVE__GET_CPUID #include #endif @@ -29,22 +30,7 @@ #endif #include "port/pg_crc32c.h" - -static bool -pg_crc32c_sse42_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ -} +#include "port/pg_hw_feat_check.h" /* * This gets called on the first call. It replaces the function pointer @@ -61,4 +47,5 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) return pg_comp_crc32c(crc, data, len); } -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; +#endif diff --git a/src/port/pg_hw_feat_check.c b/src/port/pg_hw_feat_check.c new file mode 100644 index 0000000000..260aa60502 --- /dev/null +++ b/src/port/pg_hw_feat_check.c @@ -0,0 +1,163 @@ +/*------------------------------------------------------------------------- + * + * pg_hw_feat_check.c + * Test for hardware features at runtime on x86_64 platforms. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_hw_feat_check.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) +#include +#endif + +#include + +#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) +#include +#endif + +#include "port/pg_hw_feat_check.h" + +/* Define names for EXX registers to avoid hard to see bugs in code below. */ +typedef unsigned int exx_t; +typedef enum +{ + EAX = 0, + EBX = 1, + ECX = 2, + EDX = 3 +} reg_name; + +/* + * Helper function. + * Test for a bit being set in a exx_t register. + */ +inline static bool is_bit_set_in_exx(exx_t* regs, reg_name ex, int bit) +{ + return ((regs[ex] & (1 << bit)) != 0); +} + +/* + * x86_64 Platform CPUID check for Linux and Visual Studio platforms. + */ +inline static void +pg_getcpuid(unsigned int leaf, exx_t *exx) +{ +#if defined(HAVE__GET_CPUID) + __get_cpuid(leaf, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif +} + +/* + * x86_64 Platform CPUIDEX check for Linux and Visual Studio platforms. + */ +inline static void +pg_getcpuidex(unsigned int leaf, unsigned int subleaf, exx_t *exx) +{ +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(leaf, subleaf, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); +#else +#error cpuid instruction not available +#endif +} + +/* + * Check for CPU support for CPUID: osxsave + */ +inline static bool +osxsave_available(void) +{ +#if defined(HAVE_XSAVE_INTRINSICS) + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 27); /* osxsave */ +#else + return false; +#endif +} + +/* + * Does XGETBV say the ZMM registers are enabled? + * + * NB: Caller is responsible for verifying that osxsave_available() returns true + * before calling this. + */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif +inline static bool +zmm_regs_available(void) +{ +#if defined(HAVE_XSAVE_INTRINSICS) + return (_xgetbv(0) & 0xe6) == 0xe6; +#else + return false; +#endif +} + +/* + * Does CPUID say there's support for AVX-512 popcount and byte-and-word + * instructions? + */ +inline static bool +avx512_popcnt_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuidex(7, 0, exx); + + return is_bit_set_in_exx(exx, ECX, 14) && is_bit_set_in_exx(exx, EBX, 30); +} + +/* + * Return true if CPUID indicates that the POPCNT instruction is available. + */ +bool PGDLLIMPORT pg_popcount_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 23); + } + + /* + * Returns true if the CPU supports the instructions required for the AVX-512 + * pg_popcount() implementation. + * + * PA: The call to 'osxsave_available' MUST preceed the call to + * 'zmm_regs_available' function per NB above. + */ +bool PGDLLIMPORT pg_popcount_avx512_available(void) +{ + return osxsave_available() && + zmm_regs_available() && + avx512_popcnt_available(); +} + +/* + * Does CPUID say there's support for SSE 4.2? + */ +bool PGDLLIMPORT pg_crc32c_sse42_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 20); +} + diff --git a/src/port/pg_popcount_avx512.c b/src/port/pg_popcount_avx512.c index c8a4f2b19f..1123a1a634 100644 --- a/src/port/pg_popcount_avx512.c +++ b/src/port/pg_popcount_avx512.c @@ -14,16 +14,7 @@ #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK -#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) -#include -#endif - #include - -#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) -#include -#endif - #include "port/pg_bitutils.h" /* @@ -33,75 +24,6 @@ */ #ifdef TRY_POPCNT_FAST -/* - * Does CPUID say there's support for XSAVE instructions? - */ -static inline bool -xsave_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 27)) != 0; /* osxsave */ -} - -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that xsave_available() returns true - * before calling this. - */ -#ifdef HAVE_XSAVE_INTRINSICS -pg_attribute_target("xsave") -#endif -static inline bool -zmm_regs_available(void) -{ -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif -} - -/* - * Does CPUID say there's support for AVX-512 popcount and byte-and-word - * instructions? - */ -static inline bool -avx512_popcnt_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ - (exx[1] & (1 << 30)) != 0; /* avx512-bw */ -} - -/* - * Returns true if the CPU supports the instructions required for the AVX-512 - * pg_popcount() implementation. - */ -bool -pg_popcount_avx512_available(void) -{ - return xsave_available() && - zmm_regs_available() && - avx512_popcnt_available(); -} - /* * pg_popcount_avx512 * Returns the number of 1-bits in buf -- 2.34.1