* All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * Neither the name of the PHP_ParserGenerator nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @category PHP * @package PHP_ParserGenerator * @author Gregory Beaver * @copyright 2006 Gregory Beaver * @license http://www.opensource.org/licenses/bsd-license.php New BSD License * @version CVS: $Id: ParserGenerator.php 302382 2010-08-17 06:08:09Z jespino $ * @link http://pear.php.net/package/PHP_ParserGenerator * @since File available since Release 0.1.0 */ /**#@+ * Basic components of the parser generator */ require_once 'PHP/ParserGenerator/Action.php'; require_once 'PHP/ParserGenerator/ActionTable.php'; require_once 'PHP/ParserGenerator/Config.php'; require_once 'PHP/ParserGenerator/Data.php'; require_once 'PHP/ParserGenerator/Symbol.php'; require_once 'PHP/ParserGenerator/Rule.php'; require_once 'PHP/ParserGenerator/Parser.php'; require_once 'PHP/ParserGenerator/PropagationLink.php'; require_once 'PHP/ParserGenerator/State.php'; /**#@-*/ /** * The basic home class for the parser generator * * @category PHP * @package PHP_ParserGenerator * @author Gregory Beaver * @copyright 2006 Gregory Beaver * @license http://www.opensource.org/licenses/bsd-license.php New BSD License * @version Release: @package_version@ * @link http://pear.php.net/package/PHP_ParserGenerator * @since Class available since Release 0.1.0 * @example Lempar.php * @example examples/Parser.y Sample parser file format (PHP_LexerGenerator's parser) * @example examples/Parser.php Sample parser file format PHP code (PHP_LexerGenerator's parser) */ class PHP_ParserGenerator { /** * Set this to 1 to turn on debugging of Lemon's parsing of * grammar files. */ const DEBUG = 0; const MAXRHS = 1000; const OPT_FLAG = 1, OPT_INT = 2, OPT_DBL = 3, OPT_STR = 4, OPT_FFLAG = 5, OPT_FINT = 6, OPT_FDBL = 7, OPT_FSTR = 8; public $azDefine = array(); private static $_options = array( 'b' => array( 'type' => self::OPT_FLAG, 'arg' => 'basisflag', 'message' => 'Print only the basis in report.' ), 'c' => array( 'type' => self::OPT_FLAG, 'arg' => 'compress', 'message' => 'Don\'t compress the action table.' ), 'D' => array( 'type' => self::OPT_FSTR, 'arg' => 'handleDOption', 'message' => 'Define an %ifdef macro.' ), 'g' => array( 'type' => self::OPT_FLAG, 'arg' => 'rpflag', 'message' => 'Print grammar without actions.' ), 'm' => array( 'type' => self::OPT_FLAG, 'arg' => 'mhflag', 'message' => 'Output a makeheaders compatible file' ), 'q' => array( 'type' => self::OPT_FLAG, 'arg' => 'quiet', 'message' => '(Quiet) Don\'t print the report file.' ), 's' => array( 'type' => self::OPT_FLAG, 'arg' => 'statistics', 'message' => 'Print parser stats to standard output.' ), 'x' => array( 'type' => self::OPT_FLAG, 'arg' => 'version', 'message' => 'Print the version number.' ), 'T' => array( 'type' => self::OPT_STR, 'arg' => 'parser_template', 'message' => 'Use different parser template file.' ) ); private $_basisflag = 0; private $_compress = 0; private $_rpflag = 0; private $_mhflag = 0; private $_quiet = 0; private $_statistics = 0; private $_version = 0; private $_size; private $_parser_template = ""; /** * Process a flag command line argument. * * @param int $i * @param array $argv * * @return int */ function handleflags($i, $argv) { if (!isset($argv[1]) || !isset(self::$_options[$argv[$i][1]])) { throw new Exception('Command line syntax error: undefined option "' . $argv[$i] . '"'); } $v = self::$_options[$argv[$i][1]] == '-'; if (self::$_options[$argv[$i][1]]['type'] == self::OPT_FLAG) { $this->{self::$_options[$argv[$i][1]]['arg']} = 1; } elseif (self::$_options[$argv[$i][1]]['type'] == self::OPT_FFLAG) { $this->{self::$_options[$argv[$i][1]]['arg']}($v); } elseif (self::$_options[$argv[$i][1]]['type'] == self::OPT_FSTR) { $this->{self::$_options[$argv[$i][1]]['arg']}(substr($v, 2)); } else { throw new Exception('Command line syntax error: missing argument on switch: "' . $argv[$i] . '"'); } return 0; } /** * Process a command line switch which has an argument. * * @param int $i * @param array $argv * * @return int */ function handleswitch($i, $argv) { $lv = 0; $dv = 0.0; $sv = $end = $cp = ''; $j; // int $errcnt = 0; $cp = strstr($argv[$i], '='); if (!$cp) { throw new Exception('INTERNAL ERROR: handleswitch passed bad argument, no "=" in arg'); } $argv[$i] = substr($argv[$i], 0, strlen($argv[$i]) - strlen($cp)); if (!isset(self::$_options[$argv[$i]])) { throw new Exception('Command line syntax error: undefined option "' . $argv[$i] . $cp . '"'); } $cp = substr($cp, 1); switch (self::$_options[$argv[$i]]['type']) { case self::OPT_FLAG: case self::OPT_FFLAG: throw new Exception('Command line syntax error: option requires an argument "' . $argv[$i] . '=' . $cp . '"'); case self::OPT_DBL: case self::OPT_FDBL: $dv = (double) $cp; break; case self::OPT_INT: case self::OPT_FINT: $lv = (int) $cp; break; case self::OPT_STR: case self::OPT_FSTR: $sv = $cp; break; } switch(self::$_options[$argv[$i]]['type']) { case self::OPT_FLAG: case self::OPT_FFLAG: break; case self::OPT_DBL: $this->{self::$_options[$argv[$i]]['arg']} = $dv; break; case self::OPT_FDBL: $this->{self::$_options[$argv[$i]]['arg']}($dv); break; case self::OPT_INT: $this->{self::$_options[$argv[$i]]['arg']} = $lv; break; case self::OPT_FINT: $this->{self::$_options[$argv[$i]]['arg']}($lv); break; case self::OPT_STR: $this->{self::$_options[$argv[$i]]['arg']} = $sv; break; case self::OPT_FSTR: $this->{self::$_options[$argv[$i]]['arg']}($sv); break; } return 0; } /** * OptInit * * @param array $a Arguments * * @return int */ function OptInit($a) { $errcnt = 0; $argv = $a; try { if (is_array($argv) && count($argv) && self::$_options) { for ($i = 1; $i < count($argv); $i++) { if ($argv[$i][0] == '+' || $argv[$i][0] == '-') { $errcnt += $this->handleflags($i, $argv); } elseif (strstr($argv[$i], '=')) { $errcnt += $this->handleswitch($i, $argv); } } } } catch (Exception $e) { $this->OptPrint(); echo $e->getMessage()."\n"; //exit(1); } return 0; } /** * Return the index of the N-th non-switch argument. Return -1 * if N is out of range. * * @param int $n * @param int $a * * @return int */ private function _argindex($n, $a) { $dashdash = 0; if (!is_array($a) || !count($a)) { return -1; } for ($i=1; $i < count($a); $i++) { if ($dashdash || !($a[$i][0] == '-' || $a[$i][0] == '+' || strchr($a[$i], '='))) { if ($n == 0) { return $i; } $n--; } if ($_SERVER['argv'][$i] == '--') { $dashdash = 1; } } return -1; } /** * Return the value of the non-option argument as indexed by $i * * @param int $i * @param array $a the value of $argv * * @return 0|string */ private function _optArg($i, $a) { if (-1 == ($ind = $this->_argindex($i, $a))) { return 0; } return $a[$ind]; } /** * @param array $a * * @return int number of arguments */ function OptNArgs($a) { $cnt = $dashdash = 0; if (is_array($a) && count($a)) { for ($i = 1; $i < count($a); $i++) { if ($dashdash || !($a[$i][0] == '-' || $a[$i][0] == '+' || strchr($a[$i], '=')) ) { $cnt++; } if ($a[$i] == "--") { $dashdash = 1; } } } return $cnt; } /** * Print out command-line options * * @return void */ function OptPrint() { $max = 0; foreach (self::$_options as $label => $info) { $len = strlen($label) + 1; switch ($info['type']) { case self::OPT_FLAG: case self::OPT_FFLAG: break; case self::OPT_INT: case self::OPT_FINT: $len += 9; /* length of "" */ break; case self::OPT_DBL: case self::OPT_FDBL: $len += 6; /* length of "" */ break; case self::OPT_STR: case self::OPT_FSTR: $len += 8; /* length of "" */ break; } if ($len > $max) { $max = $len; } } foreach (self::$_options as $label => $info) { switch ($info['type']) { case self::OPT_FLAG: case self::OPT_FFLAG: echo " -$label"; echo str_repeat(' ', $max - strlen($label)); echo " $info[message]\n"; break; case self::OPT_INT: case self::OPT_FINT: echo " $label=" . str_repeat(' ', $max - strlen($label) - 9); echo " $info[message]\n"; break; case self::OPT_DBL: case self::OPT_FDBL: echo " $label=" . str_repeat(' ', $max - strlen($label) - 6); echo " $info[message]\n"; break; case self::OPT_STR: case self::OPT_FSTR: echo " $label=" . str_repeat(' ', $max - strlen($label) - 8); echo " $info[message]\n"; break; } } } /** * This routine is called with the argument to each -D command-line option. * Add the macro defined to the azDefine array. * * @param string $z * * @return void */ private function _handleDOption($z) { if ($a = strstr($z, '=')) { $z = substr($a, 1); // strip first = } $this->azDefine[] = $z; } /**************** From the file "main.c" ************************************/ /* ** Main program file for the LEMON parser generator. */ /** * The main program. Parse the command line and do it... * * @return int Number of error and conflicts */ function main() { $lem = new PHP_ParserGenerator_Data; $this->OptInit($_SERVER['argv']); if ($this->_version) { echo "Lemon version 1.0/PHP_ParserGenerator port version @package_version@\n"; //exit(0); return; } if ($this->OptNArgs($_SERVER['argv']) != 1) { echo "Exactly one filename argument is required.\n"; //exit(1); return; } $lem->errorcnt = 0; $lem->parser_template = $this->_parser_template; /* Initialize the machine */ $lem->argv0 = $_SERVER['argv'][0]; $lem->filename = $this->_optArg(0, $_SERVER['argv']); $a = pathinfo($lem->filename); if (isset($a['extension'])) { $ext = '.' . $a['extension']; $lem->filenosuffix = substr($lem->filename, 0, strlen($lem->filename) - strlen($ext)); } else { $lem->filenosuffix = $lem->filename; } $lem->basisflag = $this->_basisflag; $lem->has_fallback = 0; $lem->nconflict = 0; $lem->name = $lem->include_code = $lem->include_classcode = $lem->arg = $lem->tokentype = $lem->start = 0; $lem->vartype = 0; $lem->stacksize = 0; $lem->error = $lem->overflow = $lem->failure = $lem->accept = $lem->tokendest = $lem->tokenprefix = $lem->outname = $lem->extracode = 0; $lem->vardest = 0; $lem->tablesize = 0; PHP_ParserGenerator_Symbol::Symbol_new("$"); $lem->errsym = PHP_ParserGenerator_Symbol::Symbol_new("error"); /* Parse the input file */ $parser = new PHP_ParserGenerator_Parser($this); $parser->Parse($lem); if ($lem->errorcnt) { return; // exit($lem->errorcnt); } if ($lem->rule === 0) { printf("Empty grammar.\n"); return; //exit(1); } /* Count and index the symbols of the grammar */ $lem->nsymbol = PHP_ParserGenerator_Symbol::Symbol_count(); PHP_ParserGenerator_Symbol::Symbol_new("{default}"); $lem->symbols = PHP_ParserGenerator_Symbol::Symbol_arrayof(); for ($i = 0; $i <= $lem->nsymbol; $i++) { $lem->symbols[$i]->index = $i; } usort($lem->symbols, array('PHP_ParserGenerator_Symbol', 'sortSymbols')); for ($i = 0; $i <= $lem->nsymbol; $i++) { $lem->symbols[$i]->index = $i; } // find the first lower-case symbol for ($i = 1; ord($lem->symbols[$i]->name[0]) <= ord('Z'); $i++); $lem->nterminal = $i; /* Generate a reprint of the grammar, if requested on the command line */ if ($this->_rpflag) { $this->Reprint(); } else { /* Initialize the size for all follow and first sets */ $this->SetSize($lem->nterminal); /* Find the precedence for every production rule (that has one) */ $lem->FindRulePrecedences(); /* Compute the lambda-nonterminals and the first-sets for every ** nonterminal */ $lem->FindFirstSets(); /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ $lem->nstate = 0; $lem->FindStates(); $lem->sorted = PHP_ParserGenerator_State::State_arrayof(); /* Tie up loose ends on the propagation links */ $lem->FindLinks(); /* Compute the follow set of every reducible configuration */ $lem->FindFollowSets(); /* Compute the action tables */ $lem->FindActions(); /* Compress the action tables */ if ($this->_compress===0) { $lem->CompressTables(); } /* Reorder and renumber the states so that states with fewer choices ** occur at the end. */ $lem->ResortStates(); /* Generate a report of the parser generated. (the "y.output" file) */ if (!$this->_quiet) { $lem->ReportOutput(); } /* Generate the source code for the parser */ $lem->ReportTable($this->_mhflag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ //if (!$this->_mhflag) { // $this->ReportHeader(); //} } if ($this->_statistics) { printf( "Parser statistics: %d terminals, %d nonterminals, %d rules\n", $lem->nterminal, $lem->nsymbol - $lem->nterminal, $lem->nrule ); printf( " %d states, %d parser table entries, %d conflicts\n", $lem->nstate, $lem->tablesize, $lem->nconflict ); } if ($lem->nconflict) { printf("%d parsing conflicts.\n", $lem->nconflict); } //exit($lem->errorcnt + $lem->nconflict); echo $lem->errorcnt + $lem->nconflict; return ($lem->errorcnt + $lem->nconflict); } /** * SetSize * * @param int $n * * @access public * @return void */ function SetSize($n) { $this->_size = $n + 1; } /** * Merge in a merge sort for a linked list * * Side effects: * The "next" pointers for elements in the lists a and b are * changed. * * @param mixed $a A sorted, null-terminated linked list. (May be null). * @param mixed $b A sorted, null-terminated linked list. (May be null). * @param function $cmp A pointer to the comparison function. * @param integer $offset Offset in the structure to the "next" field. * * @return mixed A pointer to the head of a sorted list containing the * elements of both a and b. */ static function merge($a, $b, $cmp, $offset) { if ($a === 0) { $head = $b; } elseif ($b === 0) { $head = $a; } else { if (call_user_func($cmp, $a, $b) < 0) { $ptr = $a; $a = $a->$offset; } else { $ptr = $b; $b = $b->$offset; } $head = $ptr; while ($a && $b) { if (call_user_func($cmp, $a, $b) < 0) { $ptr->$offset = $a; $ptr = $a; $a = $a->$offset; } else { $ptr->$offset = $b; $ptr = $b; $b = $b->$offset; } } if ($a !== 0) { $ptr->$offset = $a; } else { $ptr->$offset = $b; } } return $head; } #define LISTSIZE 30 /** * Side effects: * The "next" pointers for elements in list are changed. * * @param mixed $list Pointer to a singly-linked list of structures. * @param mixed $next Pointer to pointer to the second element of the list. * @param function $cmp A comparison function. * * @return mixed A pointer to the head of a sorted list containing the * elements orginally in list. */ static function msort($list, $next, $cmp) { if ($list === 0) { return $list; } if ($list->$next === 0) { return $list; } $set = array_fill(0, 30, 0); while ($list) { $ep = $list; $list = $list->$next; $ep->$next = 0; for ($i = 0; $i < 29 && $set[$i] !== 0; $i++) { $ep = self::merge($ep, $set[$i], $cmp, $next); $set[$i] = 0; } $set[$i] = $ep; } $ep = 0; for ($i = 0; $i < 30; $i++) { if ($set[$i] !== 0) { $ep = self::merge($ep, $set[$i], $cmp, $next); } } return $ep; } /* Find a good place to break "msg" so that its length is at least "min" ** but no more than "max". Make the point as close to max as possible. */ static function findbreak($msg, $min, $max) { if ($min >= strlen($msg)) { return strlen($msg); } for ($i = $spot = $min; $i <= $max && $i < strlen($msg); $i++) { $c = $msg[$i]; if ($c == '-' && $i < $max - 1) { $spot = $i + 1; } if ($c == ' ') { $spot = $i; } } return $spot; } static function ErrorMsg($filename, $lineno, $format) { /* Prepare a prefix to be prepended to every output line */ if ($lineno > 0) { $prefix = sprintf("%20s:%d: ", $filename, $lineno); } else { $prefix = sprintf("%20s: ", $filename); } $prefixsize = strlen($prefix); $availablewidth = 79 - $prefixsize; /* Generate the error message */ $ap = func_get_args(); array_shift($ap); // $filename array_shift($ap); // $lineno array_shift($ap); // $format $errmsg = vsprintf($format, $ap); $linewidth = strlen($errmsg); /* Remove trailing "\n"s from the error message. */ while ($linewidth > 0 && in_array($errmsg[$linewidth-1], array("\n", "\r"), true) ) { --$linewidth; $errmsg = substr($errmsg, 0, strlen($errmsg) - 1); } /* Print the error message */ $base = 0; $errmsg = str_replace( array("\r", "\n", "\t"), array(' ', ' ', ' '), $errmsg ); while (strlen($errmsg)) { $end = $restart = self::findbreak($errmsg, 0, $availablewidth); if (strlen($errmsg) <= 79 && $end < strlen($errmsg) && $end <= 79) { $end = $restart = strlen($errmsg); } while (isset($errmsg[$restart]) && $errmsg[$restart] == ' ') { $restart++; } printf("%s%.${end}s\n", $prefix, $errmsg); $errmsg = substr($errmsg, $restart); } } /** * Duplicate the input file without comments and without actions * on rules * * @return void */ function Reprint() { printf("// Reprint of input file \"%s\".\n// Symbols:\n", $this->filename); $maxlen = 10; for ($i = 0; $i < $this->nsymbol; $i++) { $sp = $this->symbols[$i]; $len = strlen($sp->name); if ($len > $maxlen ) { $maxlen = $len; } } $ncolumns = 76 / ($maxlen + 5); if ($ncolumns < 1) { $ncolumns = 1; } $skip = ($this->nsymbol + $ncolumns - 1) / $ncolumns; for ($i = 0; $i < $skip; $i++) { print "//"; for ($j = $i; $j < $this->nsymbol; $j += $skip) { $sp = $this->symbols[$j]; //assert( sp->index==j ); printf(" %3d %-${maxlen}.${maxlen}s", $j, $sp->name); } print "\n"; } for ($rp = $this->rule; $rp; $rp = $rp->next) { printf("%s", $rp->lhs->name); /*if ($rp->lhsalias) { printf("(%s)", $rp->lhsalias); }*/ print " ::="; for ($i = 0; $i < $rp->nrhs; $i++) { $sp = $rp->rhs[$i]; printf(" %s", $sp->name); if ($sp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) { for ($j = 1; $j < $sp->nsubsym; $j++) { printf("|%s", $sp->subsym[$j]->name); } } /*if ($rp->rhsalias[$i]) { printf("(%s)", $rp->rhsalias[$i]); }*/ } print "."; if ($rp->precsym) { printf(" [%s]", $rp->precsym->name); } /*if ($rp->code) { print "\n " . $rp->code); }*/ print "\n"; } } }