From 806f4173418991462bb2f9ad029c219ab8abe3d9 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 2 Dec 2024 10:35:37 +0100 Subject: [PATCH v0 15/15] jsonpath scanner: Use flex yyextra --- src/backend/utils/adt/jsonpath_scan.l | 171 ++++++++++++++------------ 1 file changed, 92 insertions(+), 79 deletions(-) diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 700c17712d0..8ed6c7ddf63 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -30,11 +30,15 @@ } %{ -static JsonPathString scanstring; /* FIXME */ +struct jsonpath_yy_extra_type +{ + JsonPathString scanstring; +}; +#define YY_EXTRA_TYPE struct jsonpath_yy_extra_type * -static void addstring(bool init, char *s, int l); -static void addchar(bool init, char c); -static enum yytokentype checkKeyword(void); +static void addstring(bool init, char *s, int l, yyscan_t yyscanner); +static void addchar(bool init, char c, yyscan_t yyscanner); +static enum yytokentype checkKeyword(yyscan_t yyscanner); static bool parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner); static bool parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner); @@ -116,44 +120,44 @@ hex_fail \\x{hexdigit}{0,1} %% {other}+ { - addstring(false, yytext, yyleng); + addstring(false, yytext, yyleng, yyscanner); } {blank}+ { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } \/\* { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN xc; } ({special}|\") { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; yyless(0); BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } <> { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; - return checkKeyword(); + return checkKeyword(yyscanner); } -\\b { addchar(false, '\b'); } +\\b { addchar(false, '\b', yyscanner); } -\\f { addchar(false, '\f'); } +\\f { addchar(false, '\f', yyscanner); } -\\n { addchar(false, '\n'); } +\\n { addchar(false, '\n', yyscanner); } -\\r { addchar(false, '\r'); } +\\r { addchar(false, '\r', yyscanner); } -\\t { addchar(false, '\t'); } +\\t { addchar(false, '\t', yyscanner); } -\\v { addchar(false, '\v'); } +\\v { addchar(false, '\v', yyscanner); } {unicode}+ { if (!parseUnicode(yytext, yyleng, escontext, yyscanner)) @@ -184,7 +188,7 @@ hex_fail \\x{hexdigit}{0,1} yyterminate(); } -\\. { addchar(false, yytext[1]); } +\\. { addchar(false, yytext[1], yyscanner); } \\ { jsonpath_yyerror(NULL, escontext, yyscanner, @@ -199,18 +203,18 @@ hex_fail \\x{hexdigit}{0,1} } \" { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; return STRING_P; } \" { - yylval->str = scanstring; + yylval->str = yyextra->scanstring; BEGIN INITIAL; return VARIABLE_P; } -[^\\\"]+ { addstring(false, yytext, yyleng); } +[^\\\"]+ { addstring(false, yytext, yyleng, yyscanner); } \*\/ { BEGIN INITIAL; } @@ -246,14 +250,14 @@ hex_fail \\x{hexdigit}{0,1} \> { return GREATER_P; } \${other}+ { - addstring(true, yytext + 1, yyleng - 1); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext + 1, yyleng - 1, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return VARIABLE_P; } \$\" { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xvq; } @@ -262,49 +266,49 @@ hex_fail \\x{hexdigit}{0,1} {blank}+ { /* ignore */ } \/\* { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xc; } {real} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return NUMERIC_P; } {decimal} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return NUMERIC_P; } {decinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {hexinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {octinteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } {bininteger} { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; + addstring(true, yytext, yyleng, yyscanner); + addchar(false, '\0', yyscanner); + yylval->str = yyextra->scanstring; return INT_P; } @@ -329,18 +333,18 @@ hex_fail \\x{hexdigit}{0,1} yyterminate(); } \" { - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xq; } \\ { yyless(0); - addchar(true, '\0'); + addchar(true, '\0', yyscanner); BEGIN xnq; } {other}+ { - addstring(true, yytext, yyleng); + addstring(true, yytext, yyleng, yyscanner); BEGIN xnq; } @@ -350,6 +354,10 @@ hex_fail \\x{hexdigit}{0,1} /* LCOV_EXCL_STOP */ +/* see scan.l */ +#undef yyextra +#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r) + void jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext, yyscan_t yyscanner, @@ -426,9 +434,11 @@ static const JsonPathKeyword keywords[] = { { 12,false, TIMESTAMP_TZ_P, "timestamp_tz"}, }; -/* Check if current scanstring value is a keyword */ +/* + * Check if current scanstring value is a keyword + */ static enum yytokentype -checkKeyword() +checkKeyword(yyscan_t yyscanner) { int res = IDENT_P; int diff; @@ -436,18 +446,18 @@ checkKeyword() *StopHigh = keywords + lengthof(keywords), *StopMiddle; - if (scanstring.len > keywords[lengthof(keywords) - 1].len) + if (yyextra->scanstring.len > keywords[lengthof(keywords) - 1].len) return res; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->len == scanstring.len) - diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val, - scanstring.len); + if (StopMiddle->len == yyextra->scanstring.len) + diff = pg_strncasecmp(StopMiddle->keyword, yyextra->scanstring.val, + yyextra->scanstring.len); else - diff = StopMiddle->len - scanstring.len; + diff = StopMiddle->len - yyextra->scanstring.len; if (diff < 0) StopLow = StopMiddle + 1; @@ -456,8 +466,8 @@ checkKeyword() else { if (StopMiddle->lowercase) - diff = strncmp(StopMiddle->keyword, scanstring.val, - scanstring.len); + diff = strncmp(StopMiddle->keyword, yyextra->scanstring.val, + yyextra->scanstring.len); if (diff == 0) res = StopMiddle->val; @@ -474,42 +484,42 @@ checkKeyword() * Reinitialize if required. */ static void -resizeString(bool init, int appendLen) +resizeString(bool init, int appendLen, yyscan_t yyscanner) { if (init) { - scanstring.total = Max(32, appendLen); - scanstring.val = (char *) palloc(scanstring.total); - scanstring.len = 0; + yyextra->scanstring.total = Max(32, appendLen); + yyextra->scanstring.val = (char *) palloc(yyextra->scanstring.total); + yyextra->scanstring.len = 0; } else { - if (scanstring.len + appendLen >= scanstring.total) + if (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total) { - while (scanstring.len + appendLen >= scanstring.total) - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); + while (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total) + yyextra->scanstring.total *= 2; + yyextra->scanstring.val = repalloc(yyextra->scanstring.val, yyextra->scanstring.total); } } } /* Add set of bytes at "s" of length "l" to scanstring */ static void -addstring(bool init, char *s, int l) +addstring(bool init, char *s, int l, yyscan_t yyscanner) { - resizeString(init, l + 1); - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; + resizeString(init, l + 1, yyscanner); + memcpy(yyextra->scanstring.val + yyextra->scanstring.len, s, l); + yyextra->scanstring.len += l; } /* Add single byte "c" to scanstring */ static void -addchar(bool init, char c) +addchar(bool init, char c, yyscan_t yyscanner) { - resizeString(init, 1); - scanstring.val[scanstring.len] = c; + resizeString(init, 1, yyscanner); + yyextra->scanstring.val[yyextra->scanstring.len] = c; if (c != '\0') - scanstring.len++; + yyextra->scanstring.len++; } /* Interface to jsonpath parser */ @@ -518,10 +528,13 @@ parsejsonpath(const char *str, int len, struct Node *escontext) { JsonPathParseResult *parseresult; yyscan_t scanner; + struct jsonpath_yy_extra_type yyext; if (jsonpath_yylex_init(&scanner) != 0) elog(ERROR, "yylex_init() failed: %m"); + yyset_extra(&yyext, scanner); + if (len <= 0) len = strlen(str); @@ -560,7 +573,7 @@ hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner) /* Add given unicode character to scanstring */ static bool -addUnicodeChar(int ch, struct Node *escontext) +addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner) { if (ch == 0) { @@ -586,14 +599,14 @@ addUnicodeChar(int ch, struct Node *escontext) ereturn(escontext, false, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("could not convert Unicode to server encoding"))); - addstring(false, cbuf, strlen(cbuf)); + addstring(false, cbuf, strlen(cbuf), yyscanner); } return true; } /* Add unicode character, processing any surrogate pairs */ static bool -addUnicode(int ch, int *hi_surrogate, struct Node *escontext) +addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner) { if (is_utf16_surrogate_first(ch)) { @@ -626,7 +639,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext) "surrogate."))); } - return addUnicodeChar(ch, escontext); + return addUnicodeChar(ch, escontext, yyscanner); } /* @@ -664,7 +677,7 @@ parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner) } } - if (! addUnicode(ch, &hi_surrogate, escontext)) + if (! addUnicode(ch, &hi_surrogate, escontext, yyscanner)) return false; } @@ -692,7 +705,7 @@ parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner) ch = (s2 << 4) | s3; - return addUnicodeChar(ch, escontext); + return addUnicodeChar(ch, escontext, yyscanner); } /* -- 2.47.1