From 53d565e66b4456f94409f73482854ab138863166 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 2 Dec 2024 10:35:37 +0100 Subject: [PATCH v2 09/11] jsonpath scanner: reentrant scanner Use the flex %option reentrant to make the generated scanner reentrant and thread-safe. Note: The parser was already pure. Simplify flex scan buffer management: Instead of constructing the buffer from pieces and then using yy_scan_buffer(), we can just use yy_scan_string(), which does the same thing internally. (Actually, we use yy_scan_bytes() here because we already have the length.) Use flex yyextra to handle context information, instead of global variables. This complements the other changes to make the scanner reentrant. Reviewed-by: Heikki Linnakangas Reviewed-by: Andreas Karlsson Discussion: https://www.postgresql.org/message-id/flat/eb6faeac-2a8a-4b69-9189-c33c520e5b7b@eisentraut.org --- src/backend/utils/adt/jsonpath_gram.y | 2 + src/backend/utils/adt/jsonpath_internal.h | 12 +- src/backend/utils/adt/jsonpath_scan.l | 283 ++++++++++------------ 3 files changed, 144 insertions(+), 153 deletions(-) diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 8733a0eac66..de5a455c96d 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -60,8 +60,10 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, %name-prefix="jsonpath_yy" %parse-param {JsonPathParseResult **result} %parse-param {struct Node *escontext} +%parse-param {yyscan_t yyscanner} %lex-param {JsonPathParseResult **result} %lex-param {struct Node *escontext} +%lex-param {yyscan_t yyscanner} %union { diff --git a/src/backend/utils/adt/jsonpath_internal.h b/src/backend/utils/adt/jsonpath_internal.h index 6cd6d8b652d..dbb5e67fe2b 100644 --- a/src/backend/utils/adt/jsonpath_internal.h +++ b/src/backend/utils/adt/jsonpath_internal.h @@ -22,17 +22,25 @@ typedef struct JsonPathString int total; } JsonPathString; +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif + #include "utils/jsonpath.h" #include "jsonpath_gram.h" #define YY_DECL extern int jsonpath_yylex(YYSTYPE *yylval_param, \ JsonPathParseResult **result, \ - struct Node *escontext) + struct Node *escontext, \ + yyscan_t yyscanner) YY_DECL; extern int jsonpath_yyparse(JsonPathParseResult **result, - struct Node *escontext); + struct Node *escontext, + yyscan_t yyscanner); extern void jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext, + yyscan_t yyscanner, const char *message); #endif /* JSONPATH_INTERNAL_H */ diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index f5a85de36f5..8ed6c7ddf63 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -30,18 +30,17 @@ } %{ -static JsonPathString scanstring; - -/* Handles to the buffer that the lexer uses internally */ -static YY_BUFFER_STATE scanbufhandle; -static char *scanbuf; -static int scanbuflen; +struct jsonpath_yy_extra_type +{ + JsonPathString scanstring; +}; +#define YY_EXTRA_TYPE struct jsonpath_yy_extra_type * -static void addstring(bool init, char *s, int l); -static void addchar(bool init, char c); -static enum yytokentype checkKeyword(void); -static bool parseUnicode(char *s, int l, struct Node *escontext); -static bool parseHexChar(char *s, struct Node *escontext); +static void addstring(bool init, char *s, int l, yyscan_t yyscanner); +static void addchar(bool init, char c, yyscan_t yyscanner); +static enum yytokentype checkKeyword(yyscan_t yyscanner); +static bool parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner); +static bool parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner); /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ #undef fprintf @@ -65,6 +64,7 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyywrap %option warn %option prefix="jsonpath_yy" +%option reentrant %option bison-bridge %option noyyalloc %option noyyrealloc @@ -120,63 +120,63 @@ hex_fail \\x{hexdigit}{0,1} %% {other}+ { - addstring(false, yytext, yyleng); + addstring(false, yytext, yyleng, yyscanner); } {blank}+ { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } \/\* { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN xc; } ({special}|\") { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; yyless(0); BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } <> { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } -\\b { addchar(false, '\b'); } +\\b { addchar(false, '\b', yyscanner); } -\\f { addchar(false, '\f'); } +\\f { addchar(false, '\f', yyscanner); } -\\n { addchar(false, '\n'); } +\\n { addchar(false, '\n', yyscanner); } -\\r { addchar(false, '\r'); } +\\r { addchar(false, '\r', yyscanner); } -\\t { addchar(false, '\t'); } +\\t { addchar(false, '\t', yyscanner); } -\\v { addchar(false, '\v'); } +\\v { addchar(false, '\v', yyscanner); } {unicode}+ { - if (!parseUnicode(yytext, yyleng, escontext)) + if (!parseUnicode(yytext, yyleng, escontext, yyscanner)) yyterminate(); } {hex_char} { - if (!parseHexChar(yytext, escontext)) + if (!parseHexChar(yytext, escontext, yyscanner)) yyterminate(); } {unicode}*{unicodefail} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "invalid Unicode escape sequence"); yyterminate(); } {hex_fail} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "invalid hexadecimal character sequence"); yyterminate(); } @@ -184,37 +184,37 @@ hex_fail \\x{hexdigit}{0,1} {unicode}+\\ { /* throw back the \\, and treat as unicode */ yyless(yyleng - 1); - if (!parseUnicode(yytext, yyleng, escontext)) + if (!parseUnicode(yytext, yyleng, escontext, yyscanner)) yyterminate(); } -\\. { addchar(false, yytext[1]); } +\\. { addchar(false, yytext[1], yyscanner); } \\ { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "unexpected end after backslash"); yyterminate(); } <> { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "unterminated quoted string"); yyterminate(); } \" { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; return STRING_P; } \" { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; return VARIABLE_P; } -[^\\\"]+ { addstring(false, yytext, yyleng); } +[^\\\"]+ { addstring(false, yytext, yyleng, yyscanner); } \*\/ { BEGIN INITIAL; } @@ -223,7 +223,7 @@ hex_fail \\x{hexdigit}{0,1} \* { } <> { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "unexpected end of comment"); yyterminate(); } @@ -250,14 +250,14 @@ hex_fail \\x{hexdigit}{0,1} \> { return GREATER_P; } \${other}+ { - addstring(true, yytext + 1, yyleng - 1); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext + 1, yyleng - 1, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return VARIABLE_P; } \$\" { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xvq; } @@ -266,85 +266,85 @@ hex_fail \\x{hexdigit}{0,1} {blank}+ { /* ignore */ } \/\* { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xc; } {real} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return NUMERIC_P; } {decimal} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return NUMERIC_P; } {decinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {hexinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {octinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {bininteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {realfail} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "invalid numeric literal"); yyterminate(); } {decinteger_junk} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "trailing junk after numeric literal"); yyterminate(); } {decimal_junk} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "trailing junk after numeric literal"); yyterminate(); } {real_junk} { - jsonpath_yyerror(NULL, escontext, + jsonpath_yyerror(NULL, escontext, yyscanner, "trailing junk after numeric literal"); yyterminate(); } \" { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xq; } \\ { yyless(0); - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xnq; } {other}+ { - addstring(true, yytext, yyleng); + addstring(true, yytext, yyleng, yyscanner); BEGIN xnq; } @@ -354,10 +354,17 @@ hex_fail \\x{hexdigit}{0,1} /* LCOV_EXCL_STOP */ +/* see scan.l */ +#undef yyextra +#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r) + void jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext, + yyscan_t yyscanner, const char *message) { + struct yyguts_t * yyg = (struct yyguts_t *) yyscanner; /* needed for yytext macro */ + /* don't overwrite escontext if it's already been set */ if (SOFT_ERROR_OCCURRED(escontext)) return; @@ -427,9 +434,11 @@ static const JsonPathKeyword keywords[] = { { 12,false, TIMESTAMP_TZ_P, "timestamp_tz"}, }; -/* Check if current scanstring value is a keyword */ +/* + * Check if current scanstring value is a keyword + */ static enum yytokentype -checkKeyword() +checkKeyword(yyscan_t yyscanner) { int res = IDENT_P; int diff; @@ -437,18 +446,18 @@ checkKeyword() *StopHigh = keywords + lengthof(keywords), *StopMiddle; - if (scanstring.len > keywords[lengthof(keywords) - 1].len) + if (yyextra->scanstring.len > keywords[lengthof(keywords) - 1].len) return res; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->len == scanstring.len) - diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val, - scanstring.len); + if (StopMiddle->len == yyextra->scanstring.len) + diff = pg_strncasecmp(StopMiddle->keyword, yyextra->scanstring.val, + yyextra->scanstring.len); else - diff = StopMiddle->len - scanstring.len; + diff = StopMiddle->len - yyextra->scanstring.len; if (diff < 0) StopLow = StopMiddle + 1; @@ -457,8 +466,8 @@ checkKeyword() else { if (StopMiddle->lowercase) - diff = strncmp(StopMiddle->keyword, scanstring.val, - scanstring.len); + diff = strncmp(StopMiddle->keyword, yyextra->scanstring.val, + yyextra->scanstring.len); if (diff == 0) res = StopMiddle->val; @@ -470,85 +479,47 @@ checkKeyword() return res; } -/* - * Called before any actual parsing is done - */ -static void -jsonpath_scanner_init(const char *str, int slen) -{ - if (slen <= 0) - slen = strlen(str); - - /* - * Might be left over after ereport() - */ - yy_init_globals(); - - /* - * Make a scan buffer with special termination needed by flex. - */ - - scanbuflen = slen; - scanbuf = palloc(slen + 2); - memcpy(scanbuf, str, slen); - scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; - scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); - - BEGIN(INITIAL); -} - - -/* - * Called after parsing is done to clean up after jsonpath_scanner_init() - */ -static void -jsonpath_scanner_finish(void) -{ - yy_delete_buffer(scanbufhandle); - pfree(scanbuf); -} - /* * Resize scanstring so that it can append string of given length. * Reinitialize if required. */ static void -resizeString(bool init, int appendLen) +resizeString(bool init, int appendLen, yyscan_t yyscanner) { if (init) { - scanstring.total = Max(32, appendLen); - scanstring.val = (char *) palloc(scanstring.total); - scanstring.len = 0; + yyextra->scanstring.total = Max(32, appendLen); + yyextra->scanstring.val = (char *) palloc(yyextra->scanstring.total); + yyextra->scanstring.len = 0; } else { - if (scanstring.len + appendLen >= scanstring.total) + if (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total) { - while (scanstring.len + appendLen >= scanstring.total) - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); + while (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total) + yyextra->scanstring.total *= 2; + yyextra->scanstring.val = repalloc(yyextra->scanstring.val, yyextra->scanstring.total); } } } /* Add set of bytes at "s" of length "l" to scanstring */ static void -addstring(bool init, char *s, int l) +addstring(bool init, char *s, int l, yyscan_t yyscanner) { - resizeString(init, l + 1); - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; + resizeString(init, l + 1, yyscanner); + memcpy(yyextra->scanstring.val + yyextra->scanstring.len, s, l); + yyextra->scanstring.len += l; } /* Add single byte "c" to scanstring */ static void -addchar(bool init, char c) +addchar(bool init, char c, yyscan_t yyscanner) { - resizeString(init, 1); - scanstring.val[scanstring.len] = c; + resizeString(init, 1, yyscanner); + yyextra->scanstring.val[yyextra->scanstring.len] = c; if (c != '\0') - scanstring.len++; + yyextra->scanstring.len++; } /* Interface to jsonpath parser */ @@ -556,20 +527,30 @@ JsonPathParseResult * parsejsonpath(const char *str, int len, struct Node *escontext) { JsonPathParseResult *parseresult; + yyscan_t scanner; + struct jsonpath_yy_extra_type yyext; + + if (jsonpath_yylex_init(&scanner) != 0) + elog(ERROR, "yylex_init() failed: %m"); + + yyset_extra(&yyext, scanner); + + if (len <= 0) + len = strlen(str); - jsonpath_scanner_init(str, len); + jsonpath_yy_scan_bytes(str, len, scanner); - if (jsonpath_yyparse(&parseresult, escontext) != 0) - jsonpath_yyerror(NULL, escontext, "invalid input"); /* shouldn't happen */ + if (jsonpath_yyparse(&parseresult, escontext, scanner) != 0) + jsonpath_yyerror(NULL, escontext, scanner, "invalid input"); /* shouldn't happen */ - jsonpath_scanner_finish(); + jsonpath_yylex_destroy(scanner); return parseresult; } /* Turn hex character into integer */ static bool -hexval(char c, int *result, struct Node *escontext) +hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner) { if (c >= '0' && c <= '9') { @@ -586,13 +567,13 @@ hexval(char c, int *result, struct Node *escontext) *result = c - 'A' + 0xA; return true; } - jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit"); + jsonpath_yyerror(NULL, escontext, yyscanner, "invalid hexadecimal digit"); return false; } /* Add given unicode character to scanstring */ static bool -addUnicodeChar(int ch, struct Node *escontext) +addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner) { if (ch == 0) { @@ -618,14 +599,14 @@ addUnicodeChar(int ch, struct Node *escontext) ereturn(escontext, false, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("could not convert Unicode to server encoding"))); - addstring(false, cbuf, strlen(cbuf)); + addstring(false, cbuf, strlen(cbuf), yyscanner); } return true; } /* Add unicode character, processing any surrogate pairs */ static bool -addUnicode(int ch, int *hi_surrogate, struct Node *escontext) +addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner) { if (is_utf16_surrogate_first(ch)) { @@ -658,7 +639,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext) "surrogate."))); } - return addUnicodeChar(ch, escontext); + return addUnicodeChar(ch, escontext, yyscanner); } /* @@ -666,7 +647,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext) * src/backend/utils/adt/json.c */ static bool -parseUnicode(char *s, int l, struct Node *escontext) +parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner) { int i = 2; int hi_surrogate = -1; @@ -680,7 +661,7 @@ parseUnicode(char *s, int l, struct Node *escontext) { while (s[++i] != '}' && i < l) { - if (!hexval(s[i], &si, escontext)) + if (!hexval(s[i], &si, escontext, yyscanner)) return false; ch = (ch << 4) | si; } @@ -690,13 +671,13 @@ parseUnicode(char *s, int l, struct Node *escontext) { for (j = 0; j < 4 && i < l; j++) { - if (!hexval(s[i++], &si, escontext)) + if (!hexval(s[i++], &si, escontext, yyscanner)) return false; ch = (ch << 4) | si; } } - if (! addUnicode(ch, &hi_surrogate, escontext)) + if (! addUnicode(ch, &hi_surrogate, escontext, yyscanner)) return false; } @@ -714,17 +695,17 @@ parseUnicode(char *s, int l, struct Node *escontext) /* Parse sequence of hex-encoded characters */ static bool -parseHexChar(char *s, struct Node *escontext) +parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner) { int s2, s3, ch; - if (!hexval(s[2], &s2, escontext)) + if (!hexval(s[2], &s2, escontext, yyscanner)) return false; - if (!hexval(s[3], &s3, escontext)) + if (!hexval(s[3], &s3, escontext, yyscanner)) return false; ch = (s2 << 4) | s3; - return addUnicodeChar(ch, escontext); + return addUnicodeChar(ch, escontext, yyscanner); } /* @@ -733,13 +714,13 @@ parseHexChar(char *s, struct Node *escontext) */ void * -jsonpath_yyalloc(yy_size_t bytes) +jsonpath_yyalloc(yy_size_t bytes, yyscan_t yyscanner) { return palloc(bytes); } void * -jsonpath_yyrealloc(void *ptr, yy_size_t bytes) +jsonpath_yyrealloc(void *ptr, yy_size_t bytes, yyscan_t yyscanner) { if (ptr) return repalloc(ptr, bytes); @@ -748,7 +729,7 @@ jsonpath_yyrealloc(void *ptr, yy_size_t bytes) } void -jsonpath_yyfree(void *ptr) +jsonpath_yyfree(void *ptr, yyscan_t yyscanner) { if (ptr) pfree(ptr); -- 2.47.1