Author: Noah Misch Commit: Noah Misch Reject Windows command lines not convertible to Windows ANSI code page. PostgreSQL can't interpret them correctly without causing mojibake for other arguments. This isn't fixing a vulnerability in PostgreSQL, but Windows programs that form arbitrary CreateProcessW() command lines to execute PostgreSQL programs may have a vulnerability. If those programs exist, this change would mitigate their associated vulnerabilities. A program reaching the new error could use the following steps. Construct a char[] representing the intended PostgreSQL child program command line bytes. Filter that through MultiByteToWideChar(CP_ACP, 0, ...), and pass the result as the CreateProcessW() command line argument. This is both a compatibility break and a bug fix. While PostgreSQL has been misinterpreting the arguments this rejects, the application may be compensating. For example, an inconvertible character in the argument to psql's "--output" has been causing a write to a differently-named file, determined by Windows "best fit" behavior. If other file accesses filter the name through "best fit", the application may be functioning and oblivious to the actual file name written. Hence, no back-patch. Reported by Splitline Huang. Reviewed by FIXME. Discussion: https://postgr.es/m/FIXME diff --git a/src/backend/main/main.c b/src/backend/main/main.c index e286810..20bad17 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -150,6 +150,8 @@ main(int argc, char *argv[]) */ unsetenv("LC_ALL"); + validate_startup(); /* exit if parent called us incorrectly */ + /* * Catch standard options before doing much else, in particular before we * insist on not being root. diff --git a/src/bin/pg_config/pg_config.c b/src/bin/pg_config/pg_config.c index 504e6c5..f588fdd 100644 --- a/src/bin/pg_config/pg_config.c +++ b/src/bin/pg_config/pg_config.c @@ -25,6 +25,7 @@ #include "postgres_fe.h" #include "common/config_info.h" +#include "common/logging.h" static const char *progname; @@ -134,8 +135,8 @@ main(int argc, char **argv) int i; int j; + pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_config")); - progname = get_progname(argv[0]); /* check for --help */ diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index ce7aad4..eede8dd 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -8,6 +8,7 @@ #include +#include "common/logging.h" #include "getopt_long.h" #include "portability/instr_time.h" @@ -27,6 +28,7 @@ main(int argc, char *argv[]) { uint64 loop_count; + pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_timing")); progname = get_progname(argv[0]); diff --git a/src/common/exec.c b/src/common/exec.c index 32fd565..7a5b035 100644 --- a/src/common/exec.c +++ b/src/common/exec.c @@ -42,6 +42,9 @@ #endif #endif +#ifdef FRONTEND +#include "common/logging.h" +#endif #include "common/string.h" /* Inhibit mingw CRT's auto-globbing of command line arguments */ @@ -466,6 +469,178 @@ set_pglocale_pgservice(const char *argv0, const char *app) } } +/* + * Exit if parent process designated unacceptable conditions. + * + * Every installed executable shall call this, most via pg_logging_init(). + * + * Currently, the only unacceptable condition is a Windows command line + * containing characters not surviving conversion to Windows ANSI code page. + * Splitting such an unacceptable command line into "char *argv[]" uses "best + * fit" behavior + * (unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/readme.txt). + * For example, U+FF02 FULLWIDTH QUOTATION MARK becomes U+0022 QUOTATION MARK. + * Since U+0022 is a metacharacter that affects argument boundaries, the + * resulting argument boundaries could differ from what they would be if the + * executable were to use wmain(int argc, wchar_t *argv[]). That isn't a + * vulnerability in PostgreSQL, but it may be a vulnerability in Windows + * programs that construct CreateProcessW() arguments to execute PostgreSQL + * programs. While most "best fit" substitutes aren't metacharacters, they + * still lose parent process intent. + * + * ==== Background ==== + * + * Windows API functions taking string arguments come in two variants. The + * "wide" variant (e.g. CreateProcessW()) takes wchar_t[] strings in UTF-16 + * encoding. The "ANSI" variant (e.g. CreateProcessA()) takes char[] strings + * in the "Windows ANSI code page" encoding. Changing the Control Panel + * "system locale" changes the Windows ANSI code page. + * + * POSIX handles process arguments as an array of encoding-oblivious sequences + * of bytes [0x1,0xFF]. Windows handles any number of arguments through a + * single wchar_t[] "command line". When both parent and child use ANSI APIs, + * process arguments arrive via steps equivalent to the following: + * + * - Parent starts with the CreateProcessA() lpCommandLine argument. + * - Parent's MultiByteToWideChar(CP_ACP, 0, lpCommandLine) produces the + * child-accessible command line (the child's GetCommandLineW()). + * - Child retrieves GetCommandLineA(), which matches from + * WideCharToMultiByte(CP_ACP, 0, GetCommandLineW(), ...). + * - Child parses GetCommandLineA() to construct argv (see + * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). + * + * For code pages other than GetACP()==CP_UTF8, the WideCharToMultiByte() + * losslessly reverses the MultiByteToWideChar(). Hence, the double + * conversion is imperceptible for those code pages. It also follows that a + * CreateProcessW() caller can make arbitrary GetCommandLineA() sequences of + * bytes [0x1,0xFF]. It does that by filtering the intended GetCommandLineA() + * through MultiByteToWideChar() to get the equivalent CreateProcessW() + * lpCommandLine argument. + * + * With CP_UTF8, GetCommandLineW() will contain U+FFFD REPLACEMENT CHARACTER + * for each byte of lpCommandLine not forming valid UTF-8. Hence, + * GetCommandLineA() is always valid UTF8. Other GetCommandLineA() byte + * sequences are infeasible. An administrator elects CP_UTF8 with non-default + * Windows setting "Beta: Use Unicode UTF-8 for worldwide language support". + * + * ==== Design decisions ==== + * + * Conversion from U+0081 to 0x81 is also a "best fit" conversion in that most + * code pages do not have a character named HIGH OCTET PRESET. However, the + * conversion is bidirectional, so it loses no parent process intent. This + * applies to some other characters in [U+0080,U+00FF]. Allowing these + * characters is valuable, because it avoids rejecting any CreateProcessA() + * lpCommandLine value. Only CreateProcessW() reaches failure here. + * PostgreSQL-supplied programs don't use CreateProcessW(). + * + * Environment variables, too, see "best fit" behavior. In PGOPTIONS, an + * unescaped U+FF07 FULLWIDTH APOSTROPHE or U+FF3C FULLWIDTH REVERSE SOLIDUS + * would defeat quoting. In PGDATA, any inconvertible character would defeat + * finding the directory. Even so, we don't check GetEnvironmentStringsW(). + * We don't know which variables this process will retrieve. It doesn't help + * to reject variables outside those. + * + * One can always interconvert UTF-8 and UTF-16 without loss. Hence, one + * could skip this check for CP_UTF8. Since GetACP()==CP_UTF8 is still in + * beta, don't rely on that. + */ +void +validate_startup(void) +{ +#ifdef WIN32 + wchar_t *cmd_w = GetCommandLineW(); + wchar_t *cmd_w_cursor = cmd_w; + wchar_t ch; + bool all_ascii = true; + char *cmd_a; + wchar_t *cmd_w_from_a; + int result; + int n_cmd_w_from_a; + int n_cmd_w; + + /* Return early for all-ASCII args. */ + while ((ch = *cmd_w_cursor++)) + if (ch > 0x7f) + { + all_ascii = false; + break; + } + if (all_ascii) + return; + + /* non-ASCII: check conversion reversibility */ + cmd_a = GetCommandLineA(); + result = MultiByteToWideChar(CP_ACP, 0, cmd_a, -1, NULL, 0); + if (result != 0) + { + n_cmd_w_from_a = result; + cmd_w_from_a = palloc(sizeof(wchar_t) * n_cmd_w_from_a); + result = MultiByteToWideChar(CP_ACP, 0, cmd_a, -1, + cmd_w_from_a, n_cmd_w_from_a); + } + + /* + * Since the system fills GetCommandLineA() by converting + * GetCommandLineW() from UTF-16, there's no known way for those + * MultiByteToWideChar() calls to fail. This failure message doesn't + * deserve translation, but logging.h has no errmsg_internal() equivalent + * to suppress translation. Once we translate the string for logging.h, we + * may as well use the translation here. + */ + if (result == 0) + { +#ifndef FRONTEND + ereport(FATAL, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not convert command line to UTF-16: error code %lu", + GetLastError())); +#else + pg_log_error("could not convert command line to UTF-16: error code %lu", + GetLastError()); + exit(1); +#endif + } + + /* + * Since the system fills GetCommandLineA() by converting + * GetCommandLineW() from UTF-16, there's no known way for the reverse + * conversion to yield a different number of code units. + */ + n_cmd_w = 1 /* terminator */ + wcslen(cmd_w); + if (n_cmd_w_from_a != n_cmd_w) + { +#ifndef FRONTEND + ereport(FATAL, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("command line converted length (%d) did not match wide character command line length (%d)", + n_cmd_w_from_a, n_cmd_w)); +#else + pg_log_error("command line converted length (%d) did not match wide character command line length (%d)", + n_cmd_w_from_a, n_cmd_w); + exit(1); +#endif + } + for (int i = 0; i < n_cmd_w; i++) + { + if (cmd_w[i] == cmd_w_from_a[i]) + continue; +#ifndef FRONTEND + ereport(FATAL, + errcode(ERRCODE_SYSTEM_ERROR), + errmsg("could not convert command line to Windows ANSI code page"), + errdetail("First inconvertible UTF-16 code unit was 0x%04X at offset %d.", + cmd_w[i], i)); +#else + pg_log_error("could not convert command line to Windows ANSI code page"); + pg_log_error_detail("First inconvertible UTF-16 code unit was 0x%04X at offset %d.", + cmd_w[i], i); + exit(1); +#endif + } + /* wide strings were equal, so args are fine */ +#endif +} + #ifdef EXEC_BACKEND /* * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix diff --git a/src/common/logging.c b/src/common/logging.c index 3cf1190..b577d06 100644 --- a/src/common/logging.c +++ b/src/common/logging.c @@ -77,7 +77,8 @@ enable_vt_processing(void) #endif /* WIN32 */ /* - * This should be called before any output happens. + * Every installed frontend should call this before any output happens. Exits + * on error. */ void pg_logging_init(const char *argv0) @@ -157,6 +158,8 @@ pg_logging_init(const char *argv0) sgr_locus = SGR_LOCUS_DEFAULT; } } + + validate_startup(); /* convenient place to affect all frontends */ } /* diff --git a/src/include/port.h b/src/include/port.h index ba9ab0d..380c46e 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -132,6 +132,9 @@ extern void pgfnames_cleanup(char **filenames); /* Portable locale initialization (in exec.c) */ extern void set_pglocale_pgservice(const char *argv0, const char *app); +/* Exit if parent process designated unacceptable conditions (in exec.c) */ +extern void validate_startup(void); + /* Portable way to find and execute binaries (in exec.c) */ extern int validate_exec(const char *path); extern int find_my_exec(const char *argv0, char *retpath); diff --git a/src/interfaces/ecpg/preproc/ecpg.c b/src/interfaces/ecpg/preproc/ecpg.c index 2fcc6f8..36be896 100644 --- a/src/interfaces/ecpg/preproc/ecpg.c +++ b/src/interfaces/ecpg/preproc/ecpg.c @@ -7,6 +7,7 @@ #include +#include "common/logging.h" #include "getopt_long.h" #include "preproc_extern.h" @@ -143,8 +144,8 @@ main(int argc, char *const argv[]) char my_exec_path[MAXPGPATH]; char include_path[MAXPGPATH]; + pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("ecpg")); - progname = get_progname(argv[0]); if (find_my_exec(argv[0], my_exec_path) < 0) diff --git a/src/test/perl/PostgreSQL/Test/Utils.pm b/src/test/perl/PostgreSQL/Test/Utils.pm index 022b44b..7266f01 100644 --- a/src/test/perl/PostgreSQL/Test/Utils.pm +++ b/src/test/perl/PostgreSQL/Test/Utils.pm @@ -936,6 +936,7 @@ sub program_options_handling_ok { local $Test::Builder::Level = $Test::Builder::Level + 1; my ($cmd) = @_; + my ($stdout, $stderr); print("# Running: $cmd --not-a-valid-option\n"); my $result = IPC::Run::run [ $cmd, '--not-a-valid-option' ], '>', @@ -943,6 +944,42 @@ sub program_options_handling_ok '2>', \$stderr; ok(!$result, "$cmd with invalid option nonzero exit code"); isnt($stderr, '', "$cmd with invalid option prints error message"); + + # On Windows, confirm (1) rejection of characters we can't convert to the + # Windows ANSI code page and (2) no rejection of environment variables. + # The $unicode_only test character, U+1FBCC RAISED SMALL LEFT SQUARE + # BRACKET, could have been be any character found only in CP_UTF8 and code + # pages ineligible to be GetACP(). Do accept the wide equivalent of + # CreateProcessA(0x80 0x81), per comments at validate_startup(). + SKIP: + { + skip "wide characters are a special case to Windows only", 1 + if !$windows_os; + local $ENV{PG_TEST_EXE} = $cmd; + my $script = <<'EOPOWERSHELL'; +$want_exit_unicode_only = if ([System.Text.Encoding]::Default -eq + [System.Text.Encoding]::UTF8) { 0 } else { 1 } +$exe = $Env:PG_TEST_EXE +$any_encoding = + [System.Text.Encoding]::Default.GetString([byte[]](0x80, 0x81)) +$unicode_only = [Char]::ConvertFromUtf32(0x1FBCC) +$Env:PG_TEST_WIDE_ENV = $unicode_only +& $exe --help ($any_encoding) > $null +$exit_any_encoding = $LASTEXITCODE +& $exe --help ($unicode_only) > $null +$exit_unicode_only = $LASTEXITCODE +if ($exit_any_encoding -eq 0 -and + $exit_unicode_only -eq $want_exit_unicode_only) +{ + exit 0 +} +exit 1 +EOPOWERSHELL + $result = IPC::Run::run [ qw(powershell -NoProfile -NonInteractive), + '-Command' => $script ]; + ok($result, 'command line rejected iff cannot convert to GetACP()'); + } + return; }