From db0352fb7fa463bd7a02f73f29760d1400cef402 Mon Sep 17 00:00:00 2001 From: Steven Niu Date: Wed, 26 Mar 2025 14:43:43 +0800 Subject: [PATCH] Optimize function byteain() to avoid double scanning Optimized the function to eliminate the need for two scans, while preserving correctness and efficiency. Author: Steven Niu --- src/backend/utils/adt/varlena.c | 66 +++++++++++---------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 95631eb2099..de422cafbd5 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -291,7 +291,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * ereport(ERROR, ...) if bad form. * * BUGS: - * The input is scanned twice. * The error checking of input is minimal. */ Datum @@ -302,6 +301,7 @@ byteain(PG_FUNCTION_ARGS) char *tp; char *rp; int bc; + size_t input_len; bytea *result; /* Recognize hex input */ @@ -318,45 +318,28 @@ byteain(PG_FUNCTION_ARGS) PG_RETURN_BYTEA_P(result); } - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; + /* Handle traditional escaped style in a single pass */ + input_len = strlen(inputText); + result = palloc(input_len + VARHDRSZ); /* Allocate max possible size */ rp = VARDATA(result); + tp = inputText; + while (*tp != '\0') { if (tp[0] != '\\') + { *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) + continue; + } + + if (tp[1] == '\\') + { + *rp++ = '\\'; + tp += 2; + } + else if ((tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) { bc = VAL(tp[1]); bc <<= 3; @@ -366,23 +349,18 @@ byteain(PG_FUNCTION_ARGS) tp += 4; } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } else { - /* - * We should never get here. The first pass should not allow it. - */ + /* Invalid escape sequence: report error */ ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "bytea"))); } } + /* Set the actual size of the bytea */ + SET_VARSIZE(result, (rp - VARDATA(result)) + VARHDRSZ); + PG_RETURN_BYTEA_P(result); } -- 2.43.0