From 6f0b93afb6a4ba1157482e674e71f56cd9c555c9 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Fri, 14 Feb 2025 18:31:15 -0500 Subject: [PATCH v3 2/2] Have escape functions process bytes after invalid multi-byte char Reviewed-by: Jeff Davis Backpatch: 13 --- src/fe_utils/string_utils.c | 40 ++++++++++++++++++---------------- src/interfaces/libpq/fe-exec.c | 17 ++++++++------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index b6a7b197087..8621856fbc1 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -206,14 +206,13 @@ fmtIdEnc(const char *rawid, int encoding) * "skip" over quote characters, e.g. when parsing * character-by-character. * - * Replace the bytes corresponding to the invalid character - * with an invalid sequence, for the same reason as above. + * Replace the current byte with with an invalid sequence, for the + * same reason as above. * - * It would be a bit faster to verify the whole string the - * first time we encounter a set highbit, but this way we can - * replace just the invalid characters, which probably makes - * it easier for users to find the invalidly encoded portion - * of a larger string. + * It would be a bit faster to verify the whole string the first + * time we encounter a set highbit, but this way we can replace + * just the invalid byte, which probably makes it easier for users + * to find the invalidly encoded portion of a larger string. */ enlargePQExpBuffer(id_return, 2); pg_encoding_set_invalid(encoding, @@ -222,11 +221,13 @@ fmtIdEnc(const char *rawid, int encoding) id_return->data[id_return->len] = '\0'; /* - * Copy the rest of the string after the invalid multi-byte - * character. + * Handle the following bytes as if this byte didn't exist, + * that's safer in case the subsequent bytes contain + * characters that are significant for the caller (e.g. '>' in + * html). */ - remaining -= charlen; - cp += charlen; + remaining -= 1; + cp += 1; } else { @@ -421,23 +422,24 @@ appendStringLiteral(PQExpBuffer buf, const char *str, * over quote characters, e.g. when parsing * character-by-character. * - * Replace the bytes corresponding to the invalid character with - * an invalid sequence, for the same reason as above. + * Replace the current byte with with an invalid sequence, for the + * same reason as above. * * It would be a bit faster to verify the whole string the first * time we encounter a set highbit, but this way we can replace - * just the invalid characters, which probably makes it easier for - * users to find the invalidly encoded portion of a larger string. + * just the invalid byte, which probably makes it easier for users + * to find the invalidly encoded portion of a larger string. */ pg_encoding_set_invalid(encoding, target); target += 2; - remaining -= charlen; /* - * Copy the rest of the string after the invalid multi-byte - * character. + * Handle the following bytes as if this byte didn't exist, that's + * safer in case the subsequent bytes contain important characters + * for the caller (e.g. '>' in html). */ - source += charlen; + remaining -= 1; + source += 1; } else { diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 120d4d032ec..53b906f9562 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -4139,13 +4139,13 @@ PQescapeStringInternal(PGconn *conn, * over quote characters, e.g. when parsing * character-by-character. * - * Replace the bytes corresponding to the invalid character with - * an invalid sequence, for the same reason as above. + * Replace the current byte with with an invalid sequence, for the + * same reason as above. * * It would be a bit faster to verify the whole string the first * time we encounter a set highbit, but this way we can replace - * just the invalid characters, which probably makes it easier for - * users to find the invalidly encoded portion of a larger string. + * just the invalid byte, which probably makes it easier for users + * to find the invalidly encoded portion of a larger string. */ if (error) *error = 1; @@ -4154,13 +4154,14 @@ PQescapeStringInternal(PGconn *conn, pg_encoding_set_invalid(encoding, target); target += 2; - remaining -= charlen; /* - * Copy the rest of the string after the invalid multi-byte - * character. + * Handle the following bytes as if this byte didn't exist, that's + * safer in case the subsequent bytes contain important characters + * for the caller (e.g. '>' in html). */ - source += charlen; + remaining -= 1; + source += 1; } else { -- 2.48.1.76.g4e746b1a31.dirty