From 94879f695ff8961255c2daa46b81ce378a55732d Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 13 Aug 2024 14:15:54 +1200 Subject: [PATCH v6 1/3] Provide thread-safe pg_localeconv_r(). This involves four different implementation strategies: 1. For Windows, we now require _configthreadlocale() to be available and work, and the documentation says that the object returned by localeconv() is in thread-local memory. 2. For glibc, we translate to nl_langinfo_l() calls, because it offers the same information that way as an extension, and that API is thread-safe. 3. For macOS/*BSD, use localeconv_l(), which is thread-safe. 4. For everything else, use uselocale() to set the locale for the thread, and use a big ugly lock to defend against the returned object being concurrently clobbered. In practice this currently means only Solaris. The new call is used in pg_locale.c, replacing calls to setlocale() and localeconv(). This patch adds a hard requirement on Windows' _configthreadlocale(). In the past there were said to be MinGW systems that didn't have it, or had it but it didn't work. As far as I can tell, CI (optional MinGW task + mingw cross build warning task) and build farm (fairywren msys) should be happy with this. Fingers crossed. (There are places that use configure checks for that in ECPG; other proposed patches would remove those later.) Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/CA%2BhUKGJqVe0%2BPv9dvC9dSums_PXxGo9SWcxYAMBguWJUGbWz-A%40mail.gmail.com --- configure | 2 +- configure.ac | 1 + meson.build | 1 + src/backend/utils/adt/pg_locale.c | 128 ++--------- src/include/pg_config.h.in | 3 + src/include/port.h | 6 + src/port/Makefile | 1 + src/port/meson.build | 1 + src/port/pg_localeconv_r.c | 367 ++++++++++++++++++++++++++++++ 9 files changed, 402 insertions(+), 108 deletions(-) create mode 100644 src/port/pg_localeconv_r.c diff --git a/configure b/configure index 0ffcaeb4367..3e7c5fc91d6 100755 --- a/configure +++ b/configure @@ -14934,7 +14934,7 @@ fi LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -for ac_func in backtrace_symbols copyfile copy_file_range elf_aux_info getauxval getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l +for ac_func in backtrace_symbols copyfile copy_file_range elf_aux_info getauxval getifaddrs getpeerucred inet_pton localeconv_l kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.ac b/configure.ac index f56681e0d91..e56136049e9 100644 --- a/configure.ac +++ b/configure.ac @@ -1710,6 +1710,7 @@ AC_CHECK_FUNCS(m4_normalize([ getifaddrs getpeerucred inet_pton + localeconv_l kqueue mbstowcs_l memset_s diff --git a/meson.build b/meson.build index 1ceadb9a830..a8a6f34f6c6 100644 --- a/meson.build +++ b/meson.build @@ -2634,6 +2634,7 @@ func_checks = [ ['inet_aton'], ['inet_pton'], ['kqueue'], + ['localeconv_l'], ['mbstowcs_l'], ['memset_s'], ['mkdtemp'], diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 7d92f580a57..4dd4313b779 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -547,12 +547,8 @@ PGLC_localeconv(void) static struct lconv CurrentLocaleConv; static bool CurrentLocaleConvAllocated = false; struct lconv *extlconv; - struct lconv worklconv; - char *save_lc_monetary; - char *save_lc_numeric; -#ifdef WIN32 - char *save_lc_ctype; -#endif + struct lconv tmp; + struct lconv worklconv = {0}; /* Did we do it already? */ if (CurrentLocaleConvValid) @@ -566,77 +562,21 @@ PGLC_localeconv(void) } /* - * This is tricky because we really don't want to risk throwing error - * while the locale is set to other than our usual settings. Therefore, - * the process is: collect the usual settings, set locale to special - * setting, copy relevant data into worklconv using strdup(), restore - * normal settings, convert data to desired encoding, and finally stash - * the collected data in CurrentLocaleConv. This makes it safe if we - * throw an error during encoding conversion or run out of memory anywhere - * in the process. All data pointed to by struct lconv members is - * allocated with strdup, to avoid premature elog(ERROR) and to allow - * using a single cleanup routine. + * Use thread-safe method of obtaining a copy of lconv from the operating + * system. */ - memset(&worklconv, 0, sizeof(worklconv)); - - /* Save prevailing values of monetary and numeric locales */ - save_lc_monetary = setlocale(LC_MONETARY, NULL); - if (!save_lc_monetary) - elog(ERROR, "setlocale(NULL) failed"); - save_lc_monetary = pstrdup(save_lc_monetary); - - save_lc_numeric = setlocale(LC_NUMERIC, NULL); - if (!save_lc_numeric) - elog(ERROR, "setlocale(NULL) failed"); - save_lc_numeric = pstrdup(save_lc_numeric); - -#ifdef WIN32 - - /* - * The POSIX standard explicitly says that it is undefined what happens if - * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from - * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to - * believe that localeconv() should return strings that are encoded in the - * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence, - * once we have successfully collected the localeconv() results, we will - * convert them from that codeset to the desired server encoding. - * - * Windows, of course, resolutely does things its own way; on that - * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane - * results. Hence, we must temporarily set that category as well. - */ - - /* Save prevailing value of ctype locale */ - save_lc_ctype = setlocale(LC_CTYPE, NULL); - if (!save_lc_ctype) - elog(ERROR, "setlocale(NULL) failed"); - save_lc_ctype = pstrdup(save_lc_ctype); - - /* Here begins the critical section where we must not throw error */ - - /* use numeric to set the ctype */ - setlocale(LC_CTYPE, locale_numeric); -#endif - - /* Get formatting information for numeric */ - setlocale(LC_NUMERIC, locale_numeric); - extlconv = localeconv(); - - /* Must copy data now in case setlocale() overwrites it */ + if (pg_localeconv_r(locale_monetary, + locale_numeric, + &tmp) != 0) + elog(ERROR, + "could not get lconv for LC_MONETARY = \"%s\", LC_NUMERIC = \"%s\": %m", + locale_monetary, locale_numeric); + + /* Must copy data now now so we can re-encode it. */ + extlconv = &tmp; worklconv.decimal_point = strdup(extlconv->decimal_point); worklconv.thousands_sep = strdup(extlconv->thousands_sep); worklconv.grouping = strdup(extlconv->grouping); - -#ifdef WIN32 - /* use monetary to set the ctype */ - setlocale(LC_CTYPE, locale_monetary); -#endif - - /* Get formatting information for monetary */ - setlocale(LC_MONETARY, locale_monetary); - extlconv = localeconv(); - - /* Must copy data now in case setlocale() overwrites it */ worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol); worklconv.currency_symbol = strdup(extlconv->currency_symbol); worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point); @@ -654,45 +594,19 @@ PGLC_localeconv(void) worklconv.p_sign_posn = extlconv->p_sign_posn; worklconv.n_sign_posn = extlconv->n_sign_posn; - /* - * Restore the prevailing locale settings; failure to do so is fatal. - * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC, - * but proceeding with the wrong value of LC_CTYPE would certainly be bad - * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC - * are almost certainly "C", there's really no reason that restoring those - * should fail. - */ -#ifdef WIN32 - if (!setlocale(LC_CTYPE, save_lc_ctype)) - elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); -#endif - if (!setlocale(LC_MONETARY, save_lc_monetary)) - elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary); - if (!setlocale(LC_NUMERIC, save_lc_numeric)) - elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric); + /* Free the contents of the object populated by pg_localeconv_r(). */ + pg_localeconv_free(&tmp); + + /* If any of the preceding strdup calls failed, complain now. */ + if (!struct_lconv_is_valid(&worklconv)) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - /* - * At this point we've done our best to clean up, and can call functions - * that might possibly throw errors with a clean conscience. But let's - * make sure we don't leak any already-strdup'd fields in worklconv. - */ PG_TRY(); { int encoding; - /* Release the pstrdup'd locale names */ - pfree(save_lc_monetary); - pfree(save_lc_numeric); -#ifdef WIN32 - pfree(save_lc_ctype); -#endif - - /* If any of the preceding strdup calls failed, complain now. */ - if (!struct_lconv_is_valid(&worklconv)) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - /* * Now we must perform encoding conversion from whatever's associated * with the locales into the database encoding. If we can't identify diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 07b2f798abd..efca32c33c0 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -265,6 +265,9 @@ /* Define to 1 if you have the `zstd' library (-lzstd). */ #undef HAVE_LIBZSTD +/* Define to 1 if you have the `localeconv_l' function. */ +#undef HAVE_LOCALECONV_L + /* Define to 1 if you have the header file. */ #undef HAVE_MBARRIER_H diff --git a/src/include/port.h b/src/include/port.h index 703cad868ba..3faae03d246 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -487,6 +487,12 @@ extern void *bsearch_arg(const void *key, const void *base0, int (*compar) (const void *, const void *, void *), void *arg); +/* port/pg_localeconv_r.c */ +extern int pg_localeconv_r(const char *lc_monetary, + const char *lc_numeric, + struct lconv *output); +extern void pg_localeconv_free(struct lconv *lconv); + /* port/chklocale.c */ extern int pg_get_encoding_from_locale(const char *ctype, bool write_message); diff --git a/src/port/Makefile b/src/port/Makefile index 4c224319512..7843d7b67cb 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -44,6 +44,7 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_localeconv_r.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..653539ba5b3 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -7,6 +7,7 @@ pgport_sources = [ 'noblock.c', 'path.c', 'pg_bitutils.c', + 'pg_localeconv_r.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', 'pgcheckdir.c', diff --git a/src/port/pg_localeconv_r.c b/src/port/pg_localeconv_r.c new file mode 100644 index 00000000000..efb98cd127d --- /dev/null +++ b/src/port/pg_localeconv_r.c @@ -0,0 +1,367 @@ +/*------------------------------------------------------------------------- + * + * pg_localeconv_r.c + * Thread-safe implementations of localeconv() + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_localeconv_r.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#if !defined(WIN32) +#include +#include +#endif + +#include + +#ifdef MON_THOUSANDS_SEP +/* + * One of glibc's extended langinfo items detected. Assume that the full set + * is present, which means we can use nl_langinfo_l() instead of localeconv(). + */ +#define TRANSLATE_FROM_LANGINFO +#endif + +struct lconv_member_info +{ + bool is_string; + int category; + size_t offset; +#ifdef TRANSLATE_FROM_LANGINFO + nl_item item; +#endif +}; + +/* Some macros to declare the lconv members compactly. */ +#ifdef TRANSLATE_FROM_LANGINFO +#define LCONV_M(is_string, category, name, item) \ + { is_string, category, offsetof(struct lconv, name), item } +#else +#define LCONV_M(is_string, category, name, item) \ + { is_string, category, offsetof(struct lconv, name) } +#endif +#define LCONV_S(c, n, i) LCONV_M(true, c, n, i) +#define LCONV_C(c, n, i) LCONV_M(false, c, n, i) + +/* + * The work of populating lconv objects is driven by this table. Since we + * tolerate non-matching encodings in LC_NUMERIC and LC_MONETARY, we have to + * call the underlying OS routine multiple times, with the correct locales. + * The first column of this table says which locale applies to each struct + * member. The second column is the name of the struct member. The third + * column is the name of the nl_item, if translating from nl_langinfo_l() (it's + * always the member name, in upper case). + */ +const static struct lconv_member_info table[] = { + /* String fields. */ + LCONV_S(LC_NUMERIC, decimal_point, DECIMAL_POINT), + LCONV_S(LC_NUMERIC, thousands_sep, THOUSANDS_SEP), + LCONV_S(LC_NUMERIC, grouping, GROUPING), + LCONV_S(LC_MONETARY, int_curr_symbol, INT_CURR_SYMBOL), + LCONV_S(LC_MONETARY, currency_symbol, CURRENCY_SYMBOL), + LCONV_S(LC_MONETARY, mon_decimal_point, MON_DECIMAL_POINT), + LCONV_S(LC_MONETARY, mon_thousands_sep, MON_THOUSANDS_SEP), + LCONV_S(LC_MONETARY, mon_grouping, MON_GROUPING), + LCONV_S(LC_MONETARY, positive_sign, POSITIVE_SIGN), + LCONV_S(LC_MONETARY, negative_sign, NEGATIVE_SIGN), + + /* Character fields. */ + LCONV_C(LC_MONETARY, int_frac_digits, INT_FRAC_DIGITS), + LCONV_C(LC_MONETARY, frac_digits, FRAC_DIGITS), + LCONV_C(LC_MONETARY, p_cs_precedes, P_CS_PRECEDES), + LCONV_C(LC_MONETARY, p_sep_by_space, P_SEP_BY_SPACE), + LCONV_C(LC_MONETARY, n_cs_precedes, N_CS_PRECEDES), + LCONV_C(LC_MONETARY, n_sep_by_space, N_SEP_BY_SPACE), + LCONV_C(LC_MONETARY, p_sign_posn, P_SIGN_POSN), + LCONV_C(LC_MONETARY, n_sign_posn, N_SIGN_POSN), +}; + +static inline char ** +lconv_string_member(struct lconv *lconv, int i) +{ + return (char **) ((char *) lconv + table[i].offset); +} + +static inline char * +lconv_char_member(struct lconv *lconv, int i) +{ + return (char *) lconv + table[i].offset; +} + +/* + * Free the members of a struct lconv populated by pg_localeconv_r(). The + * struct itself is in storage provided by the caller of pg_localeconv_r(). + */ +void +pg_localeconv_free(struct lconv *lconv) +{ + for (int i = 0; i < lengthof(table); ++i) + if (table[i].is_string) + free(*lconv_string_member(lconv, i)); +} + +#ifdef TRANSLATE_FROM_LANGINFO +/* + * Fill in struct lconv members using the equivalent nl_langinfo_l() items. + */ +static int +pg_localeconv_from_langinfo(struct lconv *dst, + locale_t monetary_locale, + locale_t numeric_locale) +{ + for (int i = 0; i < lengthof(table); ++i) + { + locale_t locale; + + locale = table[i].category == LC_NUMERIC ? + numeric_locale : monetary_locale; + + if (table[i].is_string) + { + char *string; + + string = nl_langinfo_l(table[i].item, locale); + if (!(string = strdup(string))) + { + pg_localeconv_free(dst); + errno = ENOMEM; + return -1; + } + *lconv_string_member(dst, i) = string; + } + else + { + *lconv_char_member(dst, i) = + *nl_langinfo_l(table[i].item, locale); + } + } + + return 0; +} +#else +/* + * Copy members from a given category. Note that you have to call this twice + * to copy the LC_MONETARY and then LC_NUMERIC members. + */ +static int +pg_localeconv_copy_members(struct lconv *dst, + struct lconv *src, + int category) +{ + for (int i = 0; i < lengthof(table); ++i) + { + if (table[i].category != category) + continue; + + if (table[i].is_string) + { + char *string; + + string = *lconv_string_member(src, i); + if (!(string = strdup(string))) + { + pg_localeconv_free(dst); + errno = ENOMEM; + return -1; + } + *lconv_string_member(dst, i) = string; + } + else + { + *lconv_char_member(dst, i) = *lconv_char_member(src, i); + } + } + + return 0; +} +#endif + +/* + * A thread-safe routine to get a copy of the lconv struct for a given + * LC_NUMERIC and LC_MONETARY. Different approaches are used on different + * OSes, because the standard interface is so multi-threading unfriendly. + * + * 1. On Windows, there is no uselocale(), but there is a way to put + * setlocale() into a thread-local mode temporarily. Its localeconv() is + * documented as returning a pointer to thread-local storage, so we don't have + * to worry about concurrent callers. + * + * 2. On Glibc, as an extension, all the information required to populate + * struct lconv is also available via nl_langpath_l(), which is thread-safe. + * + * 3. On macOS and *BSD, there is localeconv_l(), so we can create a temporary + * locale_t to pass in, and the result is a pointer to storage associated with + * the locale_t so we control its lifetime and we don't have to worry about + * concurrent calls clobbering it. + * + * 4. Otherwise, we wrap plain old localeconv() in uselocale() to avoid + * touching the global locale, but the output buffer is allowed by the standard + * to be overwritten by concurrent calls to localeconv(). We protect against + * _this_ function doing that with a Big Lock, but there isn't much we can do + * about code outside our tree that might call localeconv(), given such a poor + * interface. + * + * The POSIX standard explicitly says that it is undefined what happens if + * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from that + * implied by LC_CTYPE. In practice, all Unix-ish platforms seem to believe + * that localeconv() should return strings that are encoded in the codeset + * implied by the LC_MONETARY or LC_NUMERIC locale name. On Windows, LC_CTYPE + * has to match to get sane results. + * + * To get predicable results on all platforms, we'll call the underlying + * routines with LC_ALL set to the appropriate locale for each set of members, + * and merge the results. Three members of the resulting object are therefore + * guaranteed to be encoded with LC_NUMERIC's codeset: "decimal_point", + * "thousands_sep" and "grouping". All other members are encoded with + * LC_MONETARY's codeset. + * + * Returns 0 on success. Returns non-zero on failure, and sets errno. On + * success, the caller is responsible for calling pg_localeconf_free() on the + * output struct to free the string members it contains. + */ +int +pg_localeconv_r(const char *lc_monetary, + const char *lc_numeric, + struct lconv *output) +{ +#ifdef WIN32 + wchar_t *save_lc_ctype = NULL; + wchar_t *save_lc_monetary = NULL; + wchar_t *save_lc_numeric = NULL; + int save_config_thread_locale; + int result = -1; + + /* Put setlocale() into thread-local mode. */ + save_config_thread_locale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + + /* + * Capture the current values as wide strings. Otherwise, we might not be + * able to restore them if their names contain non-ASCII characters and + * the intermediate locale changes the expected encoding. We don't want + * to leave the caller in an unexpected state by failing to restore, or + * crash the runtime library. + */ + save_lc_ctype = _wsetlocale(LC_CTYPE, NULL); + if (!save_lc_ctype || !(save_lc_ctype = wcsdup(save_lc_ctype))) + goto exit; + save_lc_monetary = _wsetlocale(LC_MONETARY, NULL); + if (!save_lc_monetary || !(save_lc_monetary = wcsdup(save_lc_monetary))) + goto exit; + save_lc_numeric = _wsetlocale(LC_NUMERIC, NULL); + if (!save_lc_numeric || !(save_lc_numeric = wcsdup(save_lc_numeric))) + goto exit; + + memset(output, 0, sizeof(*output)); + + /* Copy the LC_MONETARY members. */ + if (!setlocale(LC_ALL, lc_monetary)) + goto exit; + result = pg_localeconv_copy_members(output, localeconv(), LC_MONETARY); + if (result != 0) + goto exit; + + /* Copy the LC_NUMERIC members. */ + if (!setlocale(LC_ALL, lc_numeric)) + goto exit; + result = pg_localeconv_copy_members(output, localeconv(), LC_NUMERIC); + +exit: + /* Restore everything we changed. */ + if (save_lc_ctype) + { + _wsetlocale(LC_CTYPE, save_lc_ctype); + free(save_lc_ctype); + } + if (save_lc_monetary) + { + _wsetlocale(LC_MONETARY, save_lc_monetary); + free(save_lc_monetary); + } + if (save_lc_numeric) + { + _wsetlocale(LC_NUMERIC, save_lc_numeric); + free(save_lc_numeric); + } + _configthreadlocale(save_config_thread_locale); + + return result; + +#else + locale_t monetary_locale; + locale_t numeric_locale; + int result; + + /* + * All variations on Unix require locale_t objects for LC_MONETARY and + * LC_NUMERIC. We'll set all locale categories, so that we can don't have + * to worry about POSIX's undefined behavior if LC_CTYPE's encoding + * doesn't match. + */ + errno = ENOENT; + monetary_locale = newlocale(LC_ALL_MASK, lc_monetary, 0); + if (monetary_locale == 0) + return -1; + numeric_locale = newlocale(LC_ALL_MASK, lc_numeric, 0); + if (numeric_locale == 0) + { + freelocale(monetary_locale); + return -1; + } + + memset(output, 0, sizeof(*output)); +#if defined(TRANSLATE_FROM_LANGINFO) + /* Copy from non-standard nl_langinfo_l() extended items. */ + result = pg_localeconv_from_langinfo(output, + monetary_locale, + numeric_locale); +#elif defined(HAVE_LOCALE_CONV_L) + /* Copy the LC_MONETARY members from a thread-safe lconv object. */ + result = pg_localeconv_copy_members(output, + localeconv_l(monetary_locale), + LC_MONETARY); + if (result != 0) + goto exit; + /* Copy the LC_NUMERIC members from a thread-safe lconv object. */ + result = pg_localeconv_copy_members(output, + localeconv_l(numeric_locale), + LC_NUMERIC); +#else + /* We have nothing better than standard POSIX facilities. */ + { + static pthread_mutex_t big_lock = PTHREAD_MUTEX_INITIALIZER; + locale_t save_locale; + + pthread_mutex_lock(&big_lock); + /* Copy the LC_MONETARY members. */ + save_locale = uselocale(monetary_locale); + result = pg_localeconv_copy_members(output, + localeconv(), + LC_MONETARY); + if (result == 0) + { + /* Copy the LC_NUMERIC members. */ + uselocale(numeric_locale); + result = pg_localeconv_copy_members(output, + localeconv(), + LC_NUMERIC); + } + pthread_mutex_unlock(&big_lock); + + uselocale(save_locale); + } +#endif + + freelocale(monetary_locale); + freelocale(numeric_locale); + + return result; +#endif +} base-commit: a9258629edabd461f998ec7d06cdf554e8a6a6ec -- 2.48.1