From b09c06c0c1b30f7bfe87d00439277c25774ffa6b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 4 Oct 2024 16:02:58 -0400 Subject: [PATCH v5 1/9] Clean up documentation of parse.pl, and add more input checking. README.parser is the user's manual, such as it is, for parse.pl. It's rather poorly written if you ask me; so try to improve it. (More could be written here, but this at least covers the same info in a more organized fashion.) Also, the single solitary line of usage info in parse.pl itself was a lie. Replace. Add some error checks that the ecpg.addons entries meet the syntax rules set forth in README.parser. One of them didn't, but accidentally worked anyway because the logic in include_addon is such that 'block' is the default behavior. Also add a cross-check that each ecpg.addons entry is matched exactly once in the backend grammar. This exposed that there are two dead entries there --- they are dead because the %replace_types table in parse.pl causes their nonterminals to be ignored altogether. Removing them doesn't change the generated preproc.y file. (This implies that check_rules.pl is completely worthless and should be nuked: it adds build cycles and maintenance effort while failing to reliably accomplish its one job of detecting dead rules. I've not done that here, though.) Discussion: https://postgr.es/m/2011420.1713493114@sss.pgh.pa.us --- src/interfaces/ecpg/preproc/README.parser | 119 ++++++++++++++-------- src/interfaces/ecpg/preproc/ecpg.addons | 11 +- src/interfaces/ecpg/preproc/parse.pl | 58 ++++++++--- 3 files changed, 123 insertions(+), 65 deletions(-) diff --git a/src/interfaces/ecpg/preproc/README.parser b/src/interfaces/ecpg/preproc/README.parser index ddc3061d48..5698f5ab32 100644 --- a/src/interfaces/ecpg/preproc/README.parser +++ b/src/interfaces/ecpg/preproc/README.parser @@ -1,42 +1,77 @@ -ECPG modifies and extends the core grammar in a way that -1) every token in ECPG is type. New tokens are - defined in ecpg.tokens, types are defined in ecpg.type -2) most tokens from the core grammar are simply converted - to literals concatenated together to form the SQL string - passed to the server, this is done by parse.pl. -3) some rules need side-effects, actions are either added - or completely overridden (compared to the basic token - concatenation) for them, these are defined in ecpg.addons, - the rules for ecpg.addons are explained below. -4) new grammar rules are needed for ECPG metacommands. - These are in ecpg.trailer. -5) ecpg.header contains common functions, etc. used by - actions for grammar rules. - -In "ecpg.addons", every modified rule follows this pattern: - ECPG: dumpedtokens postfix -where "dumpedtokens" is simply tokens from core gram.y's -rules concatenated together. e.g. if gram.y has this: - ruleA: tokenA tokenB tokenC {...} -then "dumpedtokens" is "ruleAtokenAtokenBtokenC". -"postfix" above can be: -a) "block" - the automatic rule created by parse.pl is completely - overridden, the code block has to be written completely as - it were in a plain bison grammar -b) "rule" - the automatic rule is extended on, so new syntaxes - are accepted for "ruleA". E.g.: - ECPG: ruleAtokenAtokenBtokenC rule - | tokenD tokenE { action_code; } - ... - It will be substituted with: - ruleA: - | tokenD tokenE { action_code; } - ... -c) "addon" - the automatic action for the rule (SQL syntax constructed - from the tokens concatenated together) is prepended with a new - action code part. This code part is written as is's already inside - the { ... } - -Multiple "addon" or "block" lines may appear together with the -new code block if the code block is common for those rules. +ECPG's grammar (preproc.y) is built by parse.pl from the +backend's grammar (gram.y) plus various add-on rules. +Some notes: + +1) Most input matching core grammar productions is simply converted + to strings and concatenated together to form the SQL string + passed to the server. parse.pl can automatically build the + grammar actions needed to do this. +2) Some grammar rules need special actions that are added to or + completely override the default token-concatenation behavior. + This is controlled by ecpg.addons as explained below. +3) Additional grammar rules are needed for ECPG's own commands. + These are in ecpg.trailer, as is the "epilogue" part of preproc.y. +4) ecpg.header contains the "prologue" part of preproc.y, including + support functions, Bison options, etc. +5) Additional terminals added by ECPG must be defined in ecpg.tokens. + Additional nonterminals added by ECPG must be defined in ecpg.type. + +ecpg.header, ecpg.tokens, ecpg.type, and ecpg.trailer are just +copied verbatim into preproc.y at appropriate points. + +ecpg.addons contains entries that begin with a line like + ECPG: concattokens ruletype +and typically have one or more following lines that are the code +for a grammar action. Any line not starting with "ECPG:" is taken +to be part of the code block for the preceding "ECPG:" line. + +"concattokens" identifies which gram.y production this entry affects. +It is simply the target nonterminal and the tokens from the gram.y rule +concatenated together. For example, to modify the action for a gram.y +rule like this: + target: tokenA tokenB tokenC {...} +"concattokens" would be "targettokenAtokenBtokenC". If we want to +modify a non-first alternative for a nonterminal, we still write the +nonterminal. For example, "concattokens" should be "targettokenDtokenE" +to affect the second alternative in: + target: tokenA tokenB tokenC {...} + | tokenD tokenE {...} + +"ruletype" is one of: + +a) "block" - the automatic action that parse.pl would create is + completely overridden. Instead the entry's code block is emitted. + The code block must include the braces ({}) needed for a Bison action. + +b) "addon" - the entry's code block is inserted into the generated + action, ahead of the automatic token-concatenation code. + In this case the code block need not contain braces, since + it will be inserted within braces. + +c) "rule" - the automatic action is emitted, but then the entry's + code block is added verbatim afterwards. This typically is + used to add new alternatives to a nonterminal of the core grammar. + For example, given the entry: + ECPG: targettokenAtokenBtokenC rule + | tokenD tokenE { custom_action; } + what will be emitted is + target: tokenA tokenB tokenC { automatic_action; } + | tokenD tokenE { custom_action; } + +Multiple "ECPG:" entries can share the same code block, if the +same action is needed for all. When an "ECPG:" line is immediately +followed by another one, it is not assigned an empty code block; +rather the next nonempty code block is assumed to apply to all +immediately preceding "ECPG:" entries. + +In addition to the modifications specified by ecpg.addons, +parse.pl contains some tables that list backend grammar +productions to be ignored or modified. + +Nonterminals that construct strings (as described above) should be +given type, which is parse.pl's default assumption for +nonterminals found in gram.y. That can be overridden at need by +making an entry in parse.pl's %replace_types table. %replace_types +can also be used to suppress output of a nonterminal's rules +altogether (in which case ecpg.trailer had better provide replacement +rules, since the nonterminal will still be referred to elsewhere). diff --git a/src/interfaces/ecpg/preproc/ecpg.addons b/src/interfaces/ecpg/preproc/ecpg.addons index e7dce4e404..6a1893553b 100644 --- a/src/interfaces/ecpg/preproc/ecpg.addons +++ b/src/interfaces/ecpg/preproc/ecpg.addons @@ -497,7 +497,7 @@ ECPG: opt_array_boundsopt_array_bounds'['']' block $$.index2 = mm_strdup($3); $$.str = cat_str(4, $1.str, mm_strdup("["), $3, mm_strdup("]")); } -ECPG: opt_array_bounds +ECPG: opt_array_bounds block { $$.index1 = mm_strdup("-1"); $$.index2 = mm_strdup("-1"); @@ -510,15 +510,6 @@ ECPG: IconstICONST block ECPG: AexprConstNULL_P rule | civar { $$ = $1; } | civarind { $$ = $1; } -ECPG: ColIdcol_name_keyword rule - | ECPGKeywords { $$ = $1; } - | ECPGCKeywords { $$ = $1; } - | CHAR_P { $$ = mm_strdup("char"); } - | VALUES { $$ = mm_strdup("values"); } -ECPG: type_function_nametype_func_name_keyword rule - | ECPGKeywords { $$ = $1; } - | ECPGTypeName { $$ = $1; } - | ECPGCKeywords { $$ = $1; } ECPG: VariableShowStmtSHOWALL block { mmerror(PARSE_ERROR, ET_ERROR, "SHOW ALL is not implemented"); diff --git a/src/interfaces/ecpg/preproc/parse.pl b/src/interfaces/ecpg/preproc/parse.pl index fe8d3e5178..86d0782d45 100644 --- a/src/interfaces/ecpg/preproc/parse.pl +++ b/src/interfaces/ecpg/preproc/parse.pl @@ -1,7 +1,13 @@ #!/usr/bin/perl # src/interfaces/ecpg/preproc/parse.pl -# parser generator for ecpg version 2 -# call with backend parser as stdin +# parser generator for ecpg +# +# See README.parser for some explanation of what this does. +# +# Command-line options: +# --srcdir: where to find ecpg-provided input files (default ".") +# --parser: the backend gram.y file to read (required, no default) +# --output: where to write preproc.y (required, no default) # # Copyright (c) 2007-2024, PostgreSQL Global Development Group # @@ -148,6 +154,14 @@ dump_buffer('trailer'); close($parserfh); +# Cross-check that we don't have dead or ambiguous addon rules. +foreach (keys %addons) +{ + die "addon rule $_ was never used\n" if $addons{$_}{used} == 0; + die "addon rule $_ was matched multiple times\n" if $addons{$_}{used} > 1; +} + + sub main { line: while (<$parserfh>) @@ -487,7 +501,10 @@ sub include_addon my $rec = $addons{$block}; return 0 unless $rec; - my $rectype = (defined $rec->{type}) ? $rec->{type} : ''; + # Track usage for later cross-check + $rec->{used}++; + + my $rectype = $rec->{type}; if ($rectype eq 'rule') { dump_fields($stmt_mode, $fields, ' { '); @@ -668,10 +685,10 @@ sub dump_line } =top - load addons into cache + load ecpg.addons into %addons hash. The result is something like %addons = { - stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] }, - stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] } + stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ], 'used' => 0 }, + stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ], 'used' => 0 } } =cut @@ -681,17 +698,25 @@ sub preload_addons my $filename = $srcdir . "/ecpg.addons"; open(my $fh, '<', $filename) or die; - # there may be multiple lines starting ECPG: and then multiple lines of code. - # the code need to be add to all prior ECPG records. - my (@needsRules, @code, $record); + # There may be multiple "ECPG:" lines and then multiple lines of code. + # The block of code needs to be added to each of the consecutively- + # preceding "ECPG:" records. + my (@needsRules, @code); - # there may be comments before the first ECPG line, skip them + # there may be comments before the first "ECPG:" line, skip them my $skip = 1; while (<$fh>) { - if (/^ECPG:\s(\S+)\s?(\w+)?/) + if (/^ECPG:\s+(\S+)\s+(\w+)\s*$/) { + # Found an "ECPG:" line, so we're done skipping the header $skip = 0; + # Validate record type and target + die "invalid record type $2 in addon rule for $1\n" + unless ($2 eq 'block' or $2 eq 'addon' or $2 eq 'rule'); + die "duplicate addon rule for $1\n" if (exists $addons{$1}); + # If we had some preceding code lines, attach them to all + # as-yet-unfinished records. if (@code) { for my $x (@needsRules) @@ -701,20 +726,27 @@ sub preload_addons @code = (); @needsRules = (); } - $record = {}; + my $record = {}; $record->{type} = $2; $record->{lines} = []; - if (exists $addons{$1}) { die "Ga! there are dups!\n"; } + $record->{used} = 0; $addons{$1} = $record; push(@needsRules, $record); } + elsif (/^ECPG:/) + { + # Complain if preceding regex failed to match + die "incorrect syntax in ECPG line: $_\n"; + } else { + # Non-ECPG line: add to @code unless we're still skipping next if $skip; push(@code, $_); } } close($fh); + # Deal with final code block if (@code) { for my $x (@needsRules) -- 2.43.5