From cc883e9de458ab200153173f37edf6f8b0016abd Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 15 Aug 2024 16:45:27 +1200 Subject: [PATCH v6 3/3] Remove setlocale() from check_locale(). Validate locale names with newlocale() or _create_locale() instead, to avoid clobbering global state. This also removes the "canonicalization" of the locale name, which previously had two user-visible effects: 1. CREATE DATABASE ... LOCALE = '' would look up the locale from environment variables, but this was not documented behavior and doesn't seem too useful. A default will normally be inherited from the template if you just leave the option off. (Note that initdb still chooses default values from the server environment, and that would often be the original source of the template database's values.) 2. On Windows only, the setlocale() step reached by CREATE DATABASE ... LOCALE = '...' did accept a wide range of formats and do some canonicalization, when using (deprecated) pre-BCP 47 locale names, but again it seems enough to let that happen in the initdb phase only. Now, CREATE DATABASE and SET lc_XXX reject ''. CREATE COLLATION continues to accept it, as it never recorded canonicalized values so there may be system catalogs containing verbatim '' in the wild. We'll need to research the upgrade implications before banning it there. Thanks to Peter Eisentraut and Tom Lane for the suggestion that we might not really need to preserve those behaviors in the backend, in our hunt for non-thread-safe code. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/CA%2BhUKGJqVe0%2BPv9dvC9dSums_PXxGo9SWcxYAMBguWJUGbWz-A%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGK57sgUYKO03jB4VarTsswfMyScFAyJpVnYD8c%2Bg12_mg%40mail.gmail.com --- src/backend/commands/dbcommands.c | 7 +- src/backend/utils/adt/pg_locale.c | 111 ++++++++++++++++++------------ src/bin/initdb/initdb.c | 2 - src/include/port/win32_port.h | 1 - src/include/utils/pg_locale.h | 2 +- 5 files changed, 70 insertions(+), 53 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 46310add459..a71785b952d 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -729,7 +729,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) const char *dblocale = NULL; char *dbicurules = NULL; char dblocprovider = '\0'; - char *canonname; int encoding = -1; bool dbistemplate = false; bool dballowconnections = true; @@ -1063,18 +1062,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errmsg("invalid server encoding %d", encoding))); /* Check that the chosen locales are valid, and get canonical spellings */ - if (!check_locale(LC_COLLATE, dbcollate, &canonname)) + if (!check_locale(LC_COLLATE, dbcollate)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); - dbcollate = canonname; - if (!check_locale(LC_CTYPE, dbctype, &canonname)) + if (!check_locale(LC_CTYPE, dbctype)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); - dbctype = canonname; check_encoding_locale_matches(encoding, dbcollate, dbctype); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 84257c2ce74..cb39a0fcdf3 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -172,6 +172,36 @@ static pg_locale_t last_collation_cache_locale = NULL; static char *IsoLocaleName(const char *); #endif +#ifndef WIN32 +/* + * The newlocale() function needs LC_xxx_MASK, but sometimes we have LC_xxx, + * and POSIX doesn't offer a way to translate. + */ +static int +get_lc_category_mask(int category) +{ + switch (category) + { + case LC_COLLATE: + return LC_COLLATE_MASK; + case LC_CTYPE: + return LC_CTYPE_MASK; +#ifdef LC_MESSAGES + case LC_MESSAGES: + return LC_MESSAGES_MASK; +#endif + case LC_MONETARY: + return LC_MONETARY_MASK; + case LC_NUMERIC: + return LC_NUMERIC_MASK; + case LC_TIME: + return LC_TIME_MASK; + default: + return 0; + }; +} +#endif + /* * pg_perm_setlocale * @@ -281,19 +311,11 @@ pg_perm_setlocale(int category, const char *locale) /* * Is the locale name valid for the locale category? - * - * If successful, and canonname isn't NULL, a palloc'd copy of the locale's - * canonical name is stored there. This is especially useful for figuring out - * what locale name "" means (ie, the server environment value). (Actually, - * it seems that on most implementations that's the only thing it's good for; - * we could wish that setlocale gave back a canonically spelled version of - * the locale name, but typically it doesn't.) */ bool -check_locale(int category, const char *locale, char **canonname) +check_locale(int category, const char *locale) { - char *save; - char *res; + locale_t loc; /* Don't let Windows' non-ASCII locale names in. */ if (!pg_is_ascii(locale)) @@ -305,41 +327,42 @@ check_locale(int category, const char *locale, char **canonname) return false; } - if (canonname) - *canonname = NULL; /* in case of failure */ - - save = setlocale(category, NULL); - if (!save) - return false; /* won't happen, we hope */ - - /* save may be pointing at a modifiable scratch variable, see above. */ - save = pstrdup(save); - - /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); - - /* save canonical name if requested. */ - if (res && canonname) - *canonname = pstrdup(res); - - /* restore old value. */ - if (!setlocale(category, save)) - elog(WARNING, "failed to restore old locale \"%s\"", save); - pfree(save); + if (locale[0] == 0) + { + /* + * We only accept explicit locale names, not "". We don't want to + * rely on environment variables in the backend. + */ + return false; + } - /* Don't let Windows' non-ASCII locale names out. */ - if (canonname && *canonname && !pg_is_ascii(*canonname)) + /* + * See if we can open it. Unfortunately we can't always distinguish + * out-of-memory from invalid locale name. + */ + errno = ENOENT; +#ifdef WIN32 + loc = _create_locale(category, locale); + if (loc == (locale_t) 0) + _dosmaperr(GetLastError()); +#else + loc = newlocale(get_lc_category_mask(category), locale, (locale_t) 0); +#endif + if (loc == (locale_t) 0) { - ereport(WARNING, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("locale name \"%s\" contains non-ASCII characters", - *canonname))); - pfree(*canonname); - *canonname = NULL; + if (errno == ENOMEM) + elog(ERROR, "out of memory"); + + /* Otherwise assume the locale doesn't exist. */ return false; } +#ifdef WIN32 + _free_locale(loc); +#else + freelocale(loc); +#endif - return (res != NULL); + return true; } @@ -357,7 +380,7 @@ check_locale(int category, const char *locale, char **canonname) bool check_locale_monetary(char **newval, void **extra, GucSource source) { - return check_locale(LC_MONETARY, *newval, NULL); + return check_locale(LC_MONETARY, *newval); } void @@ -369,7 +392,7 @@ assign_locale_monetary(const char *newval, void *extra) bool check_locale_numeric(char **newval, void **extra, GucSource source) { - return check_locale(LC_NUMERIC, *newval, NULL); + return check_locale(LC_NUMERIC, *newval); } void @@ -381,7 +404,7 @@ assign_locale_numeric(const char *newval, void *extra) bool check_locale_time(char **newval, void **extra, GucSource source) { - return check_locale(LC_TIME, *newval, NULL); + return check_locale(LC_TIME, *newval); } void @@ -417,7 +440,7 @@ check_locale_messages(char **newval, void **extra, GucSource source) * On Windows, we can't even check the value, so accept blindly */ #if defined(LC_MESSAGES) && !defined(WIN32) - return check_locale(LC_MESSAGES, *newval, NULL); + return check_locale(LC_MESSAGES, *newval); #else return true; #endif diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 759672a9b97..69af8340116 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2205,8 +2205,6 @@ locale_date_order(const char *locale) * it seems that on most implementations that's the only thing it's good for; * we could wish that setlocale gave back a canonically spelled version of * the locale name, but typically it doesn't.) - * - * this should match the backend's check_locale() function */ static void check_locale_name(int category, const char *locale, char **canonname) diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index ff7028bdc81..d406ce9206c 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -474,7 +474,6 @@ extern char *pgwin32_setlocale(int category, const char *locale); #define setlocale(a,b) pgwin32_setlocale(a,b) - /* In backend/port/win32/signal.c */ extern PGDLLIMPORT volatile int pg_signal_queue; extern PGDLLIMPORT int pg_signal_mask; diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 0d5f0513ceb..c22f8c762e6 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -35,7 +35,7 @@ extern PGDLLIMPORT char *localized_full_months[]; /* is the databases's LC_CTYPE the C locale? */ extern PGDLLIMPORT bool database_ctype_is_c; -extern bool check_locale(int category, const char *locale, char **canonname); +extern bool check_locale(int category, const char *locale); extern char *pg_perm_setlocale(int category, const char *locale); /* -- 2.48.1