From 2d7f41981187df904e3d985f2770d9b5c26e9d7c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 28 Nov 2022 09:24:20 +0100 Subject: [PATCH v11] Non-decimal integer literals Add support for hexadecimal, octal, and binary integer literals: 0x42F 0o273 0b100101 per SQL:202x draft. This adds support in the lexer as well as in the integer type input functions. Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com --- doc/src/sgml/syntax.sgml | 34 ++++ src/backend/catalog/information_schema.sql | 6 +- src/backend/catalog/sql_features.txt | 1 + src/backend/parser/parse_node.c | 37 +++- src/backend/parser/scan.l | 101 ++++++++--- src/backend/utils/adt/numutils.c | 170 ++++++++++++++++-- src/fe_utils/psqlscan.l | 78 +++++++-- src/interfaces/ecpg/preproc/pgc.l | 106 ++++++----- src/test/regress/expected/int2.out | 80 +++++++++ src/test/regress/expected/int4.out | 80 +++++++++ src/test/regress/expected/int8.out | 80 +++++++++ src/test/regress/expected/numerology.out | 193 ++++++++++++++++++++- src/test/regress/sql/int2.sql | 22 +++ src/test/regress/sql/int4.sql | 22 +++ src/test/regress/sql/int8.sql | 22 +++ src/test/regress/sql/numerology.sql | 51 +++++- 16 files changed, 974 insertions(+), 109 deletions(-) diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 93ad71737f51..956182e7c6a8 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -694,6 +694,40 @@ Numeric Constants + + Additionally, non-decimal integer constants can be used in these forms: + +0xhexdigits +0ooctdigits +0bbindigits + + hexdigits is one or more hexadecimal digits + (0-9, A-F), octdigits is one or more octal + digits (0-7), bindigits is one or more binary + digits (0 or 1). Hexadecimal digits and the radix prefixes can be in + upper or lower case. Note that only integers can have non-decimal forms, + not numbers with fractional parts. + + + + These are some examples of this: +0b100101 +0B10011001 +0o273 +0O755 +0x42f +0XFFFF + + + + + + Nondecimal integer constants are currently only supported in the range + of the bigint type (see ). + + + integer bigint diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 18725a02d1fb..95c27a625e7e 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod int4) RETURNS integer WHEN 1700 /*numeric*/ THEN CASE WHEN $2 = -1 THEN null - ELSE (($2 - 4) >> 16) & 65535 + ELSE (($2 - 4) >> 16) & 0xFFFF END WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/ WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/ @@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4) RETURNS integer WHEN $1 IN (1700) THEN CASE WHEN $2 = -1 THEN null - ELSE ($2 - 4) & 65535 + ELSE ($2 - 4) & 0xFFFF END ELSE null END; @@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod int4) RETURNS integer WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */ THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END WHEN $1 IN (1186) /* interval */ - THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 END + THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 0xFFFF END ELSE null END; diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index 8704a42b60a8..abad216b7ee4 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -527,6 +527,7 @@ T652 SQL-dynamic statements in SQL routines NO T653 SQL-schema statements in external routines YES T654 SQL-dynamic statements in external routines NO T655 Cyclically dependent routines YES +T661 Non-decimal integer literals YES SQL:202x draft T811 Basic SQL/JSON constructor functions NO T812 SQL/JSON: JSON_OBJECTAGG NO T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 4014db4b80f9..d33e3c179df7 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -385,11 +385,46 @@ make_const(ParseState *pstate, A_Const *aconst) { /* could be an oversize integer as well as a float ... */ + int base = 10; + char *startptr; + int sign; + char *testvalue; int64 val64; char *endptr; + startptr = aconst->val.fval.fval; + if (startptr[0] == '-') + { + sign = -1; + startptr++; + } + else + sign = +1; + if (startptr[0] == '0') + { + if (startptr[1] == 'b' || startptr[1] == 'B') + { + base = 2; + startptr += 2; + } + else if (startptr[1] == 'o' || startptr[1] == 'O') + { + base = 8; + startptr += 2; + } + if (startptr[1] == 'x' || startptr[1] == 'X') + { + base = 16; + startptr += 2; + } + } + + if (sign == +1) + testvalue = startptr; + else + testvalue = psprintf("-%s", startptr); errno = 0; - val64 = strtoi64(aconst->val.fval.fval, &endptr, 10); + val64 = strtoi64(testvalue, &endptr, base); if (errno == 0 && *endptr == '\0') { /* diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index db8b0fe8ebcc..9ad9e0c8ba74 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); static char *litbufdup(core_yyscan_t yyscanner); static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); -static int process_integer_literal(const char *token, YYSTYPE *lval); +static int process_integer_literal(const char *token, YYSTYPE *lval, int base); static void addunicode(pg_wchar c, yyscan_t yyscanner); #define yyerror(msg) scanner_yyerror(msg, yyscanner) @@ -385,25 +385,40 @@ operator {op_chars}+ * Unary minus is not part of a number here. Instead we pass it separately to * the parser, and there it gets coerced via doNegate(). * - * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. + * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * * {realfail} is added to prevent the need for scanner * backup when the {real} rule fails to match completely. */ -digit [0-9] - -integer {digit}+ -decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -decimalfail {digit}+\.\. -real ({integer}|{decimal})[Ee][-+]?{digit}+ -realfail ({integer}|{decimal})[Ee][-+] - -integer_junk {integer}{ident_start} -decimal_junk {decimal}{ident_start} +decdigit [0-9] +hexdigit [0-9A-Fa-f] +octdigit [0-7] +bindigit [0-1] + +decinteger {decdigit}+ +hexinteger 0[xX]{hexdigit}+ +octinteger 0[oO]{octdigit}+ +bininteger 0[bB]{bindigit}+ + +hexfail 0[xX] +octfail 0[oO] +binfail 0[bB] + +numeric (({decinteger}\.{decinteger}?)|(\.{decinteger})) +numericfail {decdigit}+\.\. + +real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+ +realfail ({decinteger}|{numeric})[Ee][-+] + +decinteger_junk {decinteger}{ident_start} +hexinteger_junk {hexinteger}{ident_start} +octinteger_junk {octinteger}{ident_start} +bininteger_junk {bininteger}{ident_start} +numeric_junk {numeric}{ident_start} real_junk {real}{ident_start} -param \${integer} -param_junk \${integer}{ident_start} +param \${decinteger} +param_junk \${decinteger}{ident_start} other . @@ -983,20 +998,44 @@ other . yyerror("trailing junk after parameter"); } -{integer} { +{decinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 10); + } +{hexinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 16); + } +{octinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 8); + } +{bininteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 2); + } +{hexfail} { + SET_YYLLOC(); + yyerror("invalid hexadecimal integer"); + } +{octfail} { SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + yyerror("invalid octal integer"); } -{decimal} { +{binfail} { + SET_YYLLOC(); + yyerror("invalid binary integer"); + } +{numeric} { SET_YYLLOC(); yylval->str = pstrdup(yytext); return FCONST; } -{decimalfail} { +{numericfail} { /* throw back the .., and treat as integer */ yyless(yyleng - 2); SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + return process_integer_literal(yytext, yylval, 10); } {real} { SET_YYLLOC(); @@ -1007,11 +1046,23 @@ other . SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } -{integer_junk} { +{decinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{hexinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{octinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{bininteger_junk} { SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } -{decimal_junk} { +{numeric_junk} { SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } @@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner) } /* - * Process {integer}. Note this will also do the right thing with {decimal}, - * ie digits and a decimal point. + * Process {decinteger}, {hexinteger}, etc. Note this will also do the right + * thing with {numeric}, ie digits and a decimal point. */ static int -process_integer_literal(const char *token, YYSTYPE *lval) +process_integer_literal(const char *token, YYSTYPE *lval, int base) { int val; char *endptr; errno = 0; - val = strtoint(token, &endptr, 10); + val = strtoint(base == 10 ? token : token + 2, &endptr, base); if (*endptr != '\0' || errno == ERANGE) { /* integer too large (or contains decimal pt), treat it as a float */ diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 834ec0b5882c..2942b7c44904 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -85,6 +85,17 @@ decimalLength64(const uint64 v) return t + (v >= PowersOfTen[t]); } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + /* * Convert input string to a signed 16 bit integer. * @@ -120,13 +131,56 @@ pg_strtoint16(const char *s) goto invalid_syntax; /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - int8 digit = (*ptr++ - '0'); + ptr += 2; + while (*ptr && isxdigit((unsigned char) *ptr)) + { + int8 digit = hexlookup[(unsigned char) *ptr]; - if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) - goto out_of_range; + if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) || + unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + goto out_of_range; + + ptr++; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) || + unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) || + unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else + { + while (*ptr && isdigit((unsigned char) *ptr)) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) || + unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + goto out_of_range; + } } /* allow trailing whitespace, but not other trailing chars */ @@ -196,13 +250,56 @@ pg_strtoint32(const char *s) goto invalid_syntax; /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - int8 digit = (*ptr++ - '0'); + ptr += 2; + while (*ptr && isxdigit((unsigned char) *ptr)) + { + int8 digit = hexlookup[(unsigned char) *ptr]; - if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) - goto out_of_range; + if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) || + unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + goto out_of_range; + + ptr++; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) || + unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) || + unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else + { + while (*ptr && isdigit((unsigned char) *ptr)) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) || + unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + goto out_of_range; + } } /* allow trailing whitespace, but not other trailing chars */ @@ -280,13 +377,56 @@ pg_strtoint64(const char *s) goto invalid_syntax; /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - int8 digit = (*ptr++ - '0'); + ptr += 2; + while (*ptr && isxdigit((unsigned char) *ptr)) + { + int8 digit = hexlookup[(unsigned char) *ptr]; - if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) - goto out_of_range; + if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) || + unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) + goto out_of_range; + + ptr++; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) || + unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) || + unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + } + else + { + while (*ptr && isdigit((unsigned char) *ptr)) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) || + unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) + goto out_of_range; + } } /* allow trailing whitespace, but not other trailing chars */ diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index ae531ec24077..cb1fc5213844 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -323,25 +323,40 @@ operator {op_chars}+ * Unary minus is not part of a number here. Instead we pass it separately to * the parser, and there it gets coerced via doNegate(). * - * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. + * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * * {realfail} is added to prevent the need for scanner * backup when the {real} rule fails to match completely. */ -digit [0-9] - -integer {digit}+ -decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -decimalfail {digit}+\.\. -real ({integer}|{decimal})[Ee][-+]?{digit}+ -realfail ({integer}|{decimal})[Ee][-+] - -integer_junk {integer}{ident_start} -decimal_junk {decimal}{ident_start} +decdigit [0-9] +hexdigit [0-9A-Fa-f] +octdigit [0-7] +bindigit [0-1] + +decinteger {decdigit}+ +hexinteger 0[xX]{hexdigit}+ +octinteger 0[oO]{octdigit}+ +bininteger 0[bB]{bindigit}+ + +hexfail 0[xX] +octfail 0[oO] +binfail 0[bB] + +numeric (({decinteger}\.{decinteger}?)|(\.{decinteger})) +numericfail {decdigit}+\.\. + +real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+ +realfail ({decinteger}|{numeric})[Ee][-+] + +decinteger_junk {decinteger}{ident_start} +hexinteger_junk {hexinteger}{ident_start} +octinteger_junk {octinteger}{ident_start} +bininteger_junk {bininteger}{ident_start} +numeric_junk {numeric}{ident_start} real_junk {real}{ident_start} -param \${integer} -param_junk \${integer}{ident_start} +param \${decinteger} +param_junk \${decinteger}{ident_start} /* psql-specific: characters allowed in variable names */ variable_char [A-Za-z\200-\377_0-9] @@ -847,13 +862,31 @@ other . ECHO; } -{integer} { +{decinteger} { + ECHO; + } +{hexinteger} { + ECHO; + } +{octinteger} { + ECHO; + } +{bininteger} { + ECHO; + } +{hexfail} { ECHO; } -{decimal} { +{octfail} { ECHO; } -{decimalfail} { +{binfail} { + ECHO; + } +{numeric} { + ECHO; + } +{numericfail} { /* throw back the .., and treat as integer */ yyless(yyleng - 2); ECHO; @@ -864,10 +897,19 @@ other . {realfail} { ECHO; } -{integer_junk} { +{decinteger_junk} { + ECHO; + } +{hexinteger_junk} { + ECHO; + } +{octinteger_junk} { + ECHO; + } +{bininteger_junk} { ECHO; } -{decimal_junk} { +{numeric_junk} { ECHO; } {real_junk} { diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index c145c9698f1a..2c09c6cb4f35 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -57,7 +57,7 @@ static bool include_next; #define startlit() (literalbuf[0] = '\0', literallen = 0) static void addlit(char *ytext, int yleng); static void addlitchar(unsigned char ychar); -static int process_integer_literal(const char *token, YYSTYPE *lval); +static int process_integer_literal(const char *token, YYSTYPE *lval, int base); static void parse_include(void); static bool ecpg_isspace(char ch); static bool isdefine(void); @@ -351,25 +351,40 @@ operator {op_chars}+ * Unary minus is not part of a number here. Instead we pass it separately to * the parser, and there it gets coerced via doNegate(). * - * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. + * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * * {realfail} is added to prevent the need for scanner * backup when the {real} rule fails to match completely. */ -digit [0-9] - -integer {digit}+ -decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -decimalfail {digit}+\.\. -real ({integer}|{decimal})[Ee][-+]?{digit}+ -realfail ({integer}|{decimal})[Ee][-+] - -integer_junk {integer}{ident_start} -decimal_junk {decimal}{ident_start} +decdigit [0-9] +hexdigit [0-9A-Fa-f] +octdigit [0-7] +bindigit [0-1] + +decinteger {decdigit}+ +hexinteger 0[xX]{hexdigit}+ +octinteger 0[oO]{octdigit}+ +bininteger 0[bB]{bindigit}+ + +hexfail 0[xX] +octfail 0[oO] +binfail 0[bB] + +numeric (({decinteger}\.{decinteger}?)|(\.{decinteger})) +numericfail {decdigit}+\.\. + +real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+ +realfail ({decinteger}|{numeric})[Ee][-+] + +decinteger_junk {decinteger}{ident_start} +hexinteger_junk {hexinteger}{ident_start} +octinteger_junk {octinteger}{ident_start} +bininteger_junk {bininteger}{ident_start} +numeric_junk {numeric}{ident_start} real_junk {real}{ident_start} -param \${integer} -param_junk \${integer}{ident_start} +param \${decinteger} +param_junk \${decinteger}{ident_start} /* special characters for other dbms */ /* we have to react differently in compat mode */ @@ -399,9 +414,6 @@ include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT] import [iI][mM][pP][oO][rR][tT] undef [uU][nN][dD][eE][fF] -/* C version of hex number */ -xch 0[xX][0-9A-Fa-f]* - ccomment "//".*\n if [iI][fF] @@ -414,7 +426,7 @@ endif [eE][nN][dD][iI][fF] struct [sS][tT][rR][uU][cC][tT] exec_sql {exec}{space}*{sql}{space}* -ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit}) +ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit}) ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit} /* we might want to parse all cpp include files */ @@ -932,17 +944,20 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } /* */ { -{integer} { - return process_integer_literal(yytext, &base_yylval); +{decinteger} { + return process_integer_literal(yytext, &base_yylval, 10); } -{decimal} { +{hexinteger} { + return process_integer_literal(yytext, &base_yylval, 16); + } +{numeric} { base_yylval.str = mm_strdup(yytext); return FCONST; } -{decimalfail} { +{numericfail} { /* throw back the .., and treat as integer */ yyless(yyleng - 2); - return process_integer_literal(yytext, &base_yylval); + return process_integer_literal(yytext, &base_yylval, 10); } {real} { base_yylval.str = mm_strdup(yytext); @@ -951,22 +966,38 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ {realfail} { /* * throw back the [Ee][+-], and figure out whether what - * remains is an {integer} or {decimal}. + * remains is an {decinteger} or {numeric}. */ yyless(yyleng - 2); - return process_integer_literal(yytext, &base_yylval); + return process_integer_literal(yytext, &base_yylval, 10); } } /* */ { +{octinteger} { + return process_integer_literal(yytext, &base_yylval, 8); + } +{bininteger} { + return process_integer_literal(yytext, &base_yylval, 2); + } + /* * Note that some trailing junk is valid in C (such as 100LL), so we * contain this to SQL mode. */ -{integer_junk} { +{decinteger_junk} { mmfatal(PARSE_ERROR, "trailing junk after numeric literal"); } -{decimal_junk} { +{hexinteger_junk} { + mmfatal(PARSE_ERROR, "trailing junk after numeric literal"); + } +{octinteger_junk} { + mmfatal(PARSE_ERROR, "trailing junk after numeric literal"); + } +{bininteger_junk} { + mmfatal(PARSE_ERROR, "trailing junk after numeric literal"); + } +{numeric_junk} { mmfatal(PARSE_ERROR, "trailing junk after numeric literal"); } {real_junk} { @@ -1036,19 +1067,6 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ return S_ANYTHING; } {ccomment} { ECHO; } -{xch} { - char* endptr; - - errno = 0; - base_yylval.ival = strtoul((char *) yytext, &endptr, 16); - if (*endptr != '\0' || errno == ERANGE) - { - errno = 0; - base_yylval.str = mm_strdup(yytext); - return SCONST; - } - return ICONST; - } {cppinclude} { if (system_includes) { @@ -1573,17 +1591,17 @@ addlitchar(unsigned char ychar) } /* - * Process {integer}. Note this will also do the right thing with {decimal}, - * ie digits and a decimal point. + * Process {decinteger}, {hexinteger}, etc. Note this will also do the right + * thing with {numeric}, ie digits and a decimal point. */ static int -process_integer_literal(const char *token, YYSTYPE *lval) +process_integer_literal(const char *token, YYSTYPE *lval, int base) { int val; char *endptr; errno = 0; - val = strtoint(token, &endptr, 10); + val = strtoint(base == 10 ? token : token + 2, &endptr, base); if (*endptr != '\0' || errno == ERANGE) { /* integer too large (or contains decimal pt), treat it as a float */ diff --git a/src/test/regress/expected/int2.out b/src/test/regress/expected/int2.out index 109cf9baaaca..37cbd419fa40 100644 --- a/src/test/regress/expected/int2.out +++ b/src/test/regress/expected/int2.out @@ -304,3 +304,83 @@ FROM (VALUES (-2.5::numeric), 2.5 | 3 (7 rows) +-- non-decimal literals +SELECT int2 '0b100101'; + int2 +------ + 37 +(1 row) + +SELECT int2 '0o273'; + int2 +------ + 187 +(1 row) + +SELECT int2 '0x42F'; + int2 +------ + 1071 +(1 row) + +-- cases near overflow +SELECT int2 '0b111111111111111'; + int2 +------- + 32767 +(1 row) + +SELECT int2 '0b1000000000000000'; +ERROR: value "0b1000000000000000" is out of range for type smallint +LINE 1: SELECT int2 '0b1000000000000000'; + ^ +SELECT int2 '0o77777'; + int2 +------- + 32767 +(1 row) + +SELECT int2 '0o100000'; +ERROR: value "0o100000" is out of range for type smallint +LINE 1: SELECT int2 '0o100000'; + ^ +SELECT int2 '0x7FFF'; + int2 +------- + 32767 +(1 row) + +SELECT int2 '0x8000'; +ERROR: value "0x8000" is out of range for type smallint +LINE 1: SELECT int2 '0x8000'; + ^ +SELECT int2 '-0b1000000000000000'; + int2 +-------- + -32768 +(1 row) + +SELECT int2 '-0b1000000000000001'; +ERROR: value "-0b1000000000000001" is out of range for type smallint +LINE 1: SELECT int2 '-0b1000000000000001'; + ^ +SELECT int2 '-0o100000'; + int2 +-------- + -32768 +(1 row) + +SELECT int2 '-0o100001'; +ERROR: value "-0o100001" is out of range for type smallint +LINE 1: SELECT int2 '-0o100001'; + ^ +SELECT int2 '-0x8000'; + int2 +-------- + -32768 +(1 row) + +SELECT int2 '-0x8001'; +ERROR: value "-0x8001" is out of range for type smallint +LINE 1: SELECT int2 '-0x8001'; + ^ diff --git a/src/test/regress/expected/int4.out b/src/test/regress/expected/int4.out index fbcc0e8d9e68..718fa3efc902 100644 --- a/src/test/regress/expected/int4.out +++ b/src/test/regress/expected/int4.out @@ -431,3 +431,83 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow ERROR: integer out of range SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow ERROR: integer out of range +-- non-decimal literals +SELECT int4 '0b100101'; + int4 +------ + 37 +(1 row) + +SELECT int4 '0o273'; + int4 +------ + 187 +(1 row) + +SELECT int4 '0x42F'; + int4 +------ + 1071 +(1 row) + +-- cases near overflow +SELECT int4 '0b1111111111111111111111111111111'; + int4 +------------ + 2147483647 +(1 row) + +SELECT int4 '0b10000000000000000000000000000000'; +ERROR: value "0b10000000000000000000000000000000" is out of range for type integer +LINE 1: SELECT int4 '0b10000000000000000000000000000000'; + ^ +SELECT int4 '0o17777777777'; + int4 +------------ + 2147483647 +(1 row) + +SELECT int4 '0o20000000000'; +ERROR: value "0o20000000000" is out of range for type integer +LINE 1: SELECT int4 '0o20000000000'; + ^ +SELECT int4 '0x7FFFFFFF'; + int4 +------------ + 2147483647 +(1 row) + +SELECT int4 '0x80000000'; +ERROR: value "0x80000000" is out of range for type integer +LINE 1: SELECT int4 '0x80000000'; + ^ +SELECT int4 '-0b10000000000000000000000000000000'; + int4 +------------- + -2147483648 +(1 row) + +SELECT int4 '-0b10000000000000000000000000000001'; +ERROR: value "-0b10000000000000000000000000000001" is out of range for type integer +LINE 1: SELECT int4 '-0b10000000000000000000000000000001'; + ^ +SELECT int4 '-0o20000000000'; + int4 +------------- + -2147483648 +(1 row) + +SELECT int4 '-0o20000000001'; +ERROR: value "-0o20000000001" is out of range for type integer +LINE 1: SELECT int4 '-0o20000000001'; + ^ +SELECT int4 '-0x80000000'; + int4 +------------- + -2147483648 +(1 row) + +SELECT int4 '-0x80000001'; +ERROR: value "-0x80000001" is out of range for type integer +LINE 1: SELECT int4 '-0x80000001'; + ^ diff --git a/src/test/regress/expected/int8.out b/src/test/regress/expected/int8.out index 1ae23cf3f94f..ab35b53cc4bd 100644 --- a/src/test/regress/expected/int8.out +++ b/src/test/regress/expected/int8.out @@ -927,3 +927,83 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow ERROR: bigint out of range SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow ERROR: bigint out of range +-- non-decimal literals +SELECT int8 '0b100101'; + int8 +------ + 37 +(1 row) + +SELECT int8 '0o273'; + int8 +------ + 187 +(1 row) + +SELECT int8 '0x42F'; + int8 +------ + 1071 +(1 row) + +-- cases near overflow +SELECT int8 '0b111111111111111111111111111111111111111111111111111111111111111'; + int8 +--------------------- + 9223372036854775807 +(1 row) + +SELECT int8 '0b1000000000000000000000000000000000000000000000000000000000000000'; +ERROR: value "0b1000000000000000000000000000000000000000000000000000000000000000" is out of range for type bigint +LINE 1: SELECT int8 '0b100000000000000000000000000000000000000000000... + ^ +SELECT int8 '0o777777777777777777777'; + int8 +--------------------- + 9223372036854775807 +(1 row) + +SELECT int8 '0o1000000000000000000000'; +ERROR: value "0o1000000000000000000000" is out of range for type bigint +LINE 1: SELECT int8 '0o1000000000000000000000'; + ^ +SELECT int8 '0x7FFFFFFFFFFFFFFF'; + int8 +--------------------- + 9223372036854775807 +(1 row) + +SELECT int8 '0x8000000000000000'; +ERROR: value "0x8000000000000000" is out of range for type bigint +LINE 1: SELECT int8 '0x8000000000000000'; + ^ +SELECT int8 '-0b1000000000000000000000000000000000000000000000000000000000000000'; + int8 +---------------------- + -9223372036854775808 +(1 row) + +SELECT int8 '-0b1000000000000000000000000000000000000000000000000000000000000001'; +ERROR: value "-0b1000000000000000000000000000000000000000000000000000000000000001" is out of range for type bigint +LINE 1: SELECT int8 '-0b10000000000000000000000000000000000000000000... + ^ +SELECT int8 '-0o1000000000000000000000'; + int8 +---------------------- + -9223372036854775808 +(1 row) + +SELECT int8 '-0o1000000000000000000001'; +ERROR: value "-0o1000000000000000000001" is out of range for type bigint +LINE 1: SELECT int8 '-0o1000000000000000000001'; + ^ +SELECT int8 '-0x8000000000000000'; + int8 +---------------------- + -9223372036854775808 +(1 row) + +SELECT int8 '-0x8000000000000001'; +ERROR: value "-0x8000000000000001" is out of range for type bigint +LINE 1: SELECT int8 '-0x8000000000000001'; + ^ diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out index 77d48434173b..15cd6b167236 100644 --- a/src/test/regress/expected/numerology.out +++ b/src/test/regress/expected/numerology.out @@ -3,14 +3,167 @@ -- Test various combinations of numeric types and functions. -- -- --- Trailing junk in numeric literals +-- numeric literals -- +SELECT 0b100101; + ?column? +---------- + 37 +(1 row) + +SELECT 0o273; + ?column? +---------- + 187 +(1 row) + +SELECT 0x42F; + ?column? +---------- + 1071 +(1 row) + +-- cases near int4 overflow +SELECT 0b1111111111111111111111111111111; + ?column? +------------ + 2147483647 +(1 row) + +SELECT 0b10000000000000000000000000000000; + ?column? +------------ + 2147483648 +(1 row) + +SELECT 0o17777777777; + ?column? +------------ + 2147483647 +(1 row) + +SELECT 0o20000000000; + ?column? +------------ + 2147483648 +(1 row) + +SELECT 0x7FFFFFFF; + ?column? +------------ + 2147483647 +(1 row) + +SELECT 0x80000000; + ?column? +------------ + 2147483648 +(1 row) + +SELECT -0b10000000000000000000000000000000; + ?column? +------------- + -2147483648 +(1 row) + +SELECT -0b10000000000000000000000000000001; + ?column? +------------- + -2147483649 +(1 row) + +SELECT -0o20000000000; + ?column? +------------- + -2147483648 +(1 row) + +SELECT -0o20000000001; + ?column? +------------- + -2147483649 +(1 row) + +SELECT -0x80000000; + ?column? +------------- + -2147483648 +(1 row) + +SELECT -0x80000001; + ?column? +------------- + -2147483649 +(1 row) + +-- cases near int8 overflow +SELECT 0b111111111111111111111111111111111111111111111111111111111111111; + ?column? +--------------------- + 9223372036854775807 +(1 row) + +SELECT 0b1000000000000000000000000000000000000000000000000000000000000000; +ERROR: invalid input syntax for type numeric: "0b1000000000000000000000000000000000000000000000000000000000000000" +LINE 1: SELECT 0b100000000000000000000000000000000000000000000000000... + ^ +SELECT 0o777777777777777777777; + ?column? +--------------------- + 9223372036854775807 +(1 row) + +SELECT 0o1000000000000000000000; +ERROR: invalid input syntax for type numeric: "0o1000000000000000000000" +LINE 1: SELECT 0o1000000000000000000000; + ^ +SELECT 0x7FFFFFFFFFFFFFFF; + ?column? +--------------------- + 9223372036854775807 +(1 row) + +SELECT 0x8000000000000000; +ERROR: invalid input syntax for type numeric: "0x8000000000000000" +LINE 1: SELECT 0x8000000000000000; + ^ +SELECT -0b1000000000000000000000000000000000000000000000000000000000000000; + ?column? +---------------------- + -9223372036854775808 +(1 row) + +SELECT -0b1000000000000000000000000000000000000000000000000000000000000001; +ERROR: invalid input syntax for type numeric: "-0b1000000000000000000000000000000000000000000000000000000000000001" +LINE 1: SELECT -0b10000000000000000000000000000000000000000000000000... + ^ +SELECT -0o1000000000000000000000; + ?column? +---------------------- + -9223372036854775808 +(1 row) + +SELECT -0o1000000000000000000001; +ERROR: invalid input syntax for type numeric: "-0o1000000000000000000001" +LINE 1: SELECT -0o1000000000000000000001; + ^ +SELECT -0x8000000000000000; + ?column? +---------------------- + -9223372036854775808 +(1 row) + +SELECT -0x8000000000000001; +ERROR: invalid input syntax for type numeric: "-0x8000000000000001" +LINE 1: SELECT -0x8000000000000001; + ^ +-- error cases SELECT 123abc; ERROR: trailing junk after numeric literal at or near "123a" LINE 1: SELECT 123abc; ^ SELECT 0x0o; -ERROR: trailing junk after numeric literal at or near "0x" +ERROR: trailing junk after numeric literal at or near "0x0o" LINE 1: SELECT 0x0o; ^ SELECT 1_2_3; @@ -45,6 +198,42 @@ PREPARE p1 AS SELECT $1a; ERROR: trailing junk after parameter at or near "$1a" LINE 1: PREPARE p1 AS SELECT $1a; ^ +SELECT 0b; +ERROR: invalid binary integer at or near "0b" +LINE 1: SELECT 0b; + ^ +SELECT 1b; +ERROR: trailing junk after numeric literal at or near "1b" +LINE 1: SELECT 1b; + ^ +SELECT 0b0x; +ERROR: trailing junk after numeric literal at or near "0b0x" +LINE 1: SELECT 0b0x; + ^ +SELECT 0o; +ERROR: invalid octal integer at or near "0o" +LINE 1: SELECT 0o; + ^ +SELECT 1o; +ERROR: trailing junk after numeric literal at or near "1o" +LINE 1: SELECT 1o; + ^ +SELECT 0o0x; +ERROR: trailing junk after numeric literal at or near "0o0x" +LINE 1: SELECT 0o0x; + ^ +SELECT 0x; +ERROR: invalid hexadecimal integer at or near "0x" +LINE 1: SELECT 0x; + ^ +SELECT 1x; +ERROR: trailing junk after numeric literal at or near "1x" +LINE 1: SELECT 1x; + ^ +SELECT 0x0y; +ERROR: trailing junk after numeric literal at or near "0x0y" +LINE 1: SELECT 0x0y; + ^ -- -- Test implicit type conversions -- This fails for Postgres v6.1 (and earlier?) diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql index ea29066b78ee..9809e87d52f2 100644 --- a/src/test/regress/sql/int2.sql +++ b/src/test/regress/sql/int2.sql @@ -104,3 +104,25 @@ (0.5::numeric), (1.5::numeric), (2.5::numeric)) t(x); + + +-- non-decimal literals + +SELECT int2 '0b100101'; +SELECT int2 '0o273'; +SELECT int2 '0x42F'; + +-- cases near overflow +SELECT int2 '0b111111111111111'; +SELECT int2 '0b1000000000000000'; +SELECT int2 '0o77777'; +SELECT int2 '0o100000'; +SELECT int2 '0x7FFF'; +SELECT int2 '0x8000'; + +SELECT int2 '-0b1000000000000000'; +SELECT int2 '-0b1000000000000001'; +SELECT int2 '-0o100000'; +SELECT int2 '-0o100001'; +SELECT int2 '-0x8000'; +SELECT int2 '-0x8001'; diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql index f19077f3da21..e704dee18a2f 100644 --- a/src/test/regress/sql/int4.sql +++ b/src/test/regress/sql/int4.sql @@ -164,3 +164,25 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow + + +-- non-decimal literals + +SELECT int4 '0b100101'; +SELECT int4 '0o273'; +SELECT int4 '0x42F'; + +-- cases near overflow +SELECT int4 '0b1111111111111111111111111111111'; +SELECT int4 '0b10000000000000000000000000000000'; +SELECT int4 '0o17777777777'; +SELECT int4 '0o20000000000'; +SELECT int4 '0x7FFFFFFF'; +SELECT int4 '0x80000000'; + +SELECT int4 '-0b10000000000000000000000000000000'; +SELECT int4 '-0b10000000000000000000000000000001'; +SELECT int4 '-0o20000000000'; +SELECT int4 '-0o20000000001'; +SELECT int4 '-0x80000000'; +SELECT int4 '-0x80000001'; diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql index 38b771964d79..0a567a81c175 100644 --- a/src/test/regress/sql/int8.sql +++ b/src/test/regress/sql/int8.sql @@ -245,3 +245,25 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow + + +-- non-decimal literals + +SELECT int8 '0b100101'; +SELECT int8 '0o273'; +SELECT int8 '0x42F'; + +-- cases near overflow +SELECT int8 '0b111111111111111111111111111111111111111111111111111111111111111'; +SELECT int8 '0b1000000000000000000000000000000000000000000000000000000000000000'; +SELECT int8 '0o777777777777777777777'; +SELECT int8 '0o1000000000000000000000'; +SELECT int8 '0x7FFFFFFFFFFFFFFF'; +SELECT int8 '0x8000000000000000'; + +SELECT int8 '-0b1000000000000000000000000000000000000000000000000000000000000000'; +SELECT int8 '-0b1000000000000000000000000000000000000000000000000000000000000001'; +SELECT int8 '-0o1000000000000000000000'; +SELECT int8 '-0o1000000000000000000001'; +SELECT int8 '-0x8000000000000000'; +SELECT int8 '-0x8000000000000001'; diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql index be7d6dfe0c26..310d9e57663e 100644 --- a/src/test/regress/sql/numerology.sql +++ b/src/test/regress/sql/numerology.sql @@ -3,10 +3,46 @@ -- Test various combinations of numeric types and functions. -- + -- --- Trailing junk in numeric literals +-- numeric literals -- +SELECT 0b100101; +SELECT 0o273; +SELECT 0x42F; + +-- cases near int4 overflow +SELECT 0b1111111111111111111111111111111; +SELECT 0b10000000000000000000000000000000; +SELECT 0o17777777777; +SELECT 0o20000000000; +SELECT 0x7FFFFFFF; +SELECT 0x80000000; + +SELECT -0b10000000000000000000000000000000; +SELECT -0b10000000000000000000000000000001; +SELECT -0o20000000000; +SELECT -0o20000000001; +SELECT -0x80000000; +SELECT -0x80000001; + +-- cases near int8 overflow +SELECT 0b111111111111111111111111111111111111111111111111111111111111111; +SELECT 0b1000000000000000000000000000000000000000000000000000000000000000; +SELECT 0o777777777777777777777; +SELECT 0o1000000000000000000000; +SELECT 0x7FFFFFFFFFFFFFFF; +SELECT 0x8000000000000000; + +SELECT -0b1000000000000000000000000000000000000000000000000000000000000000; +SELECT -0b1000000000000000000000000000000000000000000000000000000000000001; +SELECT -0o1000000000000000000000; +SELECT -0o1000000000000000000001; +SELECT -0x8000000000000000; +SELECT -0x8000000000000001; + +-- error cases SELECT 123abc; SELECT 0x0o; SELECT 1_2_3; @@ -18,6 +54,19 @@ SELECT 0.0e+a; PREPARE p1 AS SELECT $1a; +SELECT 0b; +SELECT 1b; +SELECT 0b0x; + +SELECT 0o; +SELECT 1o; +SELECT 0o0x; + +SELECT 0x; +SELECT 1x; +SELECT 0x0y; + + -- -- Test implicit type conversions -- This fails for Postgres v6.1 (and earlier?) base-commit: cbe6e482d7bf851c6e466697a21dcef7b05cbb59 -- 2.38.1