Source for file Main.php
Documentation is available at Main.php
define('NO_OFFSET', -2147483647 );
* The state of the yy_action table under construction is an instance of
* the following structure
public $nAction = 0; /* Number of used slots in aAction[] */
public $aAction = /* The yy_action[] table under construction */
'lookahead' => -1 , /* Value of the lookahead token */
'action' => -1 /* Action to take on the given lookahead */
public $aLookahead = /* A single new transaction set */
'lookahead' => 0 , /* Value of the lookahead token */
'action' => 0 /* Action to take on the given lookahead */
public $mnLookahead = 0; /* Minimum aLookahead[].lookahead */
public $mnAction = 0; /* Action associated with mnLookahead */
public $mxLookahead = 0; /* Maximum aLookahead[].lookahead */
public $nLookahead = 0; /* Used slots in aLookahead[] */
* Add a new action to the current transaction set
function acttab_action ($lookahead, $action)
if ($this->nLookahead === 0 ) {
$this->aLookahead = array ();
$this->mxLookahead = $lookahead;
$this->mnLookahead = $lookahead;
$this->mnAction = $action;
if ($this->mxLookahead < $lookahead) {
$this->mxLookahead = $lookahead;
if ($this->mnLookahead > $lookahead) {
$this->mnLookahead = $lookahead;
$this->mnAction = $action;
$this->aLookahead[$this->nLookahead] = array (
'lookahead' => $lookahead,
* Add the transaction set built up with prior calls to acttab_action()
* into the current action table. Then reset the transaction set back
* to an empty set in preparation for a new round of acttab_action() calls.
* Return the offset into the action table of the new transaction.
if ($this->nLookahead <= 0 ) {
throw new Exception ('nLookahead is not set up?');
/* Scan the existing action table looking for an offset where we can
** insert the current transaction set. Fall out of the loop when that
** offset is found. In the worst case, we fall out of the loop when
** i reaches $this->nAction, which means we append the new transaction set.
** i is the index in $this->aAction[] where $this->mnLookahead is inserted.
for ($i = 0; $i < $this->nAction + $this->mnLookahead; $i++ ) {
if (!isset ($this->aAction[$i])) {
$this->aAction[$i] = array (
if ($this->aAction[$i]['lookahead'] < 0 ) {
for ($j = 0; $j < $this->nLookahead; $j++ ) {
if (!isset ($this->aLookahead[$j])) {
$this->aLookahead[$j] = array (
$k = $this->aLookahead[$j]['lookahead'] -
if (!isset ($this->aAction[$k])) {
$this->aAction[$k] = array (
if ($this->aAction[$k]['lookahead'] >= 0 ) {
if ($j < $this->nLookahead ) {
for ($j = 0; $j < $this->nAction; $j++ ) {
if (!isset ($this->aAction[$j])) {
$this->aAction[$j] = array (
if ($this->aAction[$j]['lookahead'] == $j +
$this->mnLookahead - $i) {
if ($j == $this->nAction) {
break; /* Fits in empty slots */
} elseif ($this->aAction[$i]['lookahead'] == $this->mnLookahead) {
if ($this->aAction[$i]['action'] != $this->mnAction) {
for ($j = 0; $j < $this->nLookahead; $j++ ) {
$k = $this->aLookahead[$j]['lookahead'] -
if ($k < 0 || $k >= $this->nAction) {
if (!isset ($this->aAction[$k])) {
$this->aAction[$k] = array (
if ($this->aLookahead[$j]['lookahead'] !=
$this->aAction[$k]['lookahead']) {
if ($this->aLookahead[$j]['action'] !=
$this->aAction[$k]['action']) {
if ($j < $this->nLookahead) {
for ($j = 0; $j < $this->nAction; $j++ ) {
if (!isset ($this->aAction[$j])) {
$this->aAction[$j] = array (
if ($this->aAction[$j]['lookahead'] < 0 ) {
if ($this->aAction[$j]['lookahead'] == $j +
$this->mnLookahead - $i) {
if ($n == $this->nLookahead) {
break; /* Same as a prior transaction set */
/* Insert transaction set at index i. */
for ($j = 0; $j < $this->nLookahead; $j++ ) {
if (!isset ($this->aLookahead[$j])) {
$this->aLookahead[$j] = array (
$k = $this->aLookahead[$j]['lookahead'] - $this->mnLookahead + $i;
$this->aAction[$k] = $this->aLookahead[$j];
if ($k >= $this->nAction) {
$this->aLookahead = array ();
/* Return the offset that is added to the lookahead in order to get the
** index into yy_action of the action */
return $i - $this->mnLookahead;
/* Symbols (terminals and nonterminals) of the grammar are stored
public $name; /* Name of the symbol */
public $index; /* Index number for this symbol */
public $type; /* Symbols are all either TERMINALS or NTs */
public $rule; /* Linked list of rules of this (if an NT) */
public $fallback; /* fallback token in case this token doesn't parse */
public $prec; /* Precedence if defined (-1 otherwise) */
public $assoc; /* Associativity if predecence is defined */
public $firstset; /* First-set for all rules of this symbol */
public $lambda; /* True if NT and can generate an empty string */
public $destructor = 0; /* Code which executes whenever this symbol is
** popped from the stack during error processing */
public $destructorln; /* Line number of destructor code */
public $datatype; /* The data type of information held by this
** object. Only used if type==NONTERMINAL */
public $dtnum; /* The data type number. In the parser, the value
** stack is a union. The .yy%d element of this
** union is the correct data type for this object */
/* The following fields are used by MULTITERMINALs only */
public $nsubsym; /* Number of constituent symbols in the MULTI */
* @var array an array of {@link LemonSymbol} objects
public $subsym = array (); /* Array of constituent symbols */
private static $symbol_table = array ();
* Return a pointer to the (terminal or nonterminal) symbol "x".
* Create a new symbol if this is the first time "x" has been seen.
public static function Symbol_new ($x)
if (isset (self ::$symbol_table[$x])) {
return self ::$symbol_table[$x];
$sp->type = preg_match('/[A-Z]/', $x[0 ]) ? self ::TERMINAL : self ::NONTERMINAL;
self ::$symbol_table[$sp->name ] = $sp;
* Return the number of unique symbols
public static function Symbol_count ()
return count (self ::$symbol_table);
public static function Symbol_arrayof ()
return array_values (self ::$symbol_table);
public static function Symbol_find ($x)
if (isset (self ::$symbol_table[$x])) {
return self ::$symbol_table[$x];
* Sort function helper for symbols
* Symbols that begin with upper case letters (terminals or tokens)
* must sort before symbols that begin with lower case letters
* (non-terminals). Other than that, the order does not matter.
* We find experimentally that leaving the symbols in their original
* order (the order they appeared in the grammar file) gives the
* smallest parser tables in SQLite.
public static function sortSymbols ($a, $b)
$i1 = $a->index + 10000000* (ord ($a->name [0 ]) > ord('Z'));
$i2 = $b->index + 10000000* (ord($b->name [0 ]) > ord('Z'));
* Return true if two symbols are the same.
public static function same_symbol (LemonSymbol $a, LemonSymbol $b)
if ($a->type != self ::MULTITERMINAL ) return 0;
if ($b->type != self ::MULTITERMINAL ) return 0;
if ($a->nsubsym != $b->nsubsym ) return 0;
for ($i = 0; $i < $a->nsubsym; $i++ ) {
if ($a->subsym [$i] != $b->subsym [$i]) return 0;
/* Each production rule in the grammar is stored in the following
* @var array an array of {@link LemonSymbol} objects
public $lhs; /* Left-hand side of the rule */
public $lhsalias = array (); /* Alias for the LHS (NULL if none) */
public $ruleline; /* Line number for the rule */
public $nrhs; /* Number of RHS symbols */
* @var array an array of {@link LemonSymbol} objects
public $rhs; /* The RHS symbols */
public $rhsalias = array (); /* An alias for each RHS symbol (NULL if none) */
public $line; /* Line number at which code begins */
public $code; /* The code executed when this rule is reduced */
public $precsym; /* Precedence symbol for this rule */
public $index; /* An index number for this rule */
public $canReduce; /* True if this rule is ever reduced */
public $nextlhs; /* Next rule with the same LHS */
public $next; /* Next rule in the global list */
/* A configuration is a production rule of the grammar together with
** a mark (dot) showing how much of that rule has been processed so far.
** Configurations also contain a follow-set which is a list of terminal
** symbols which are allowed to immediately follow the end of the rule.
** Every configuration is recorded as an instance of the following: */
public $rp; /* The rule upon which the configuration is based */
public $dot; /* The parse point */
public $fws; /* Follow-set for this configuration only */
public $fplp; /* Follow-set forward propagation links */
public $bplp; /* Follow-set backwards propagation links */
public $stp; /* Pointer to state which contains this */
COMPLETE, /* The status is used during followset and
INCOMPLETE /* shift computations
* Index of next LemonConfig object
public $next; /* Next configuration in the state */
* Index of the next basis configuration LemonConfig object
public $bp; /* The next basis configuration */
static public $current; /* Top of list of configs */
static public $currentend; /* Last on list of configs */
static public $basis; /* Top of list of basis configs */
static public $basisend; /* Last on list of basis configs */
static public $x4a = array ();
* Return a pointer to a new configuration
private static function newconfig ()
static function Configshow (LemonConfig $cfp)
$fp = fopen('php://output', 'w');
if ($cfp->dot == $cfp->rp ->nrhs ) {
$buf = sprintf('(%d)', $cfp->rp ->index );
//SetPrint(fp,cfp->fws,$this);
//PlinkPrint(fp,cfp->fplp,"To ");
//PlinkPrint(fp,cfp->bplp,"From");
* Initialized the configuration list builder
static function Configlist_init ()
self ::$currentend = &self ::$current;
self ::$basisend = &self ::$basis;
* Remove all data from the table. Pass each data to the function "f"
* as it is removed. ("f" may be null to avoid this step.)
static function Configtable_reset ($f)
self ::$currentend = &self ::$current;
self ::$basisend = &self ::$basis;
self ::Configtable_clear (0 );
* Remove all data from the table. Pass each data to the function "f"
* as it is removed. ("f" may be null to avoid this step.)
static function Configtable_clear ($f)
if (!count(self ::$x4a)) {
for ($i = 0; $i < count (self ::$x4a); $i++ ) {
call_user_func ($f, self ::$x4a[$i]->data );
* Initialized the configuration list builder
static function Configlist_reset ()
self ::Configtable_clear (0 );
* Add another configuration to the configuration list
* @param LemonRule the rule
* @param int Index into the RHS of the rule where the dot goes
static function Configlist_add ($rp, $dot)
$model = new LemonConfig;
$cfp = self ::Configtable_find ($model);
$cfp = self ::newconfig ();
$cfp->fplp = $cfp->bplp = 0;
self ::$currentend = $cfp;
self ::$currentend = &$cfp->next;
self ::Configtable_insert ($cfp);
* Add a basis configuration to the configuration list
static function Configlist_addbasis ($rp, $dot)
$model = new LemonConfig;
$cfp = self ::Configtable_find ($model);
$cfp = self ::newconfig ();
$cfp->fplp = $cfp->bplp = 0;
self ::$currentend = $cfp;
self ::$currentend = &$cfp->next;
self ::$basisend = &$cfp->bp;
self ::Configtable_insert ($cfp);
/* Compute the closure of the configuration list */
static function Configlist_closure (LemonData $lemp)
for ($cfp = self ::$current; $cfp; $cfp = $cfp->next ) {
if ($sp->type == LemonSymbol ::NONTERMINAL ) {
if ($sp->rule === 0 && $sp !== $lemp->errsym ) {
Lemon ::ErrorMsg ($lemp->filename , $rp->line ,
"Nonterminal \"%s\" has no rules.", $sp->name );
for ($newrp = $sp->rule; $newrp; $newrp = $newrp->nextlhs ) {
$newcfp = self ::Configlist_add ($newrp, 0 );
for ($i = $dot + 1; $i < $rp->nrhs; $i++ ) {
if ($xsp->type == LemonSymbol ::TERMINAL ) {
$newcfp->fws [$xsp->index ] = 1;
} elseif ($xsp->type == LemonSymbol ::MULTITERMINAL ) {
for ($k = 0; $k < $xsp->nsubsym; $k++ ) {
$newcfp->fws [$xsp->subsym [k ]->index ] = 1;
if ($xsp->lambda === false ) {
LemonPlink ::Plink_add ($cfp->fplp , $newcfp);
* Sort the configuration list
static function Configlist_sort ()
//self::Configshow(self::$current);
self ::$current = Lemon ::msort (self ::$current,'next', array ('LemonConfig', 'Configcmp'));
//self::Configshow(self::$current);
* Sort the configuration list
static function Configlist_sortbasis ()
self ::$basis = Lemon ::msort (self ::$current,'bp', array ('LemonConfig', 'Configcmp'));
/** Return a pointer to the head of the configuration list and
static function Configlist_return ()
self ::$currentend = &self ::$current;
/** Return a pointer to the head of the basis list and
static function Configlist_basis ()
self ::$basisend = &self ::$basis;
* Free all elements of the given configuration list
static function Configlist_eat ($cfp)
for(; $cfp; $cfp = $nextcfp){
throw new Exception ('fplp of configuration non-zero?');
throw new Exception ('bplp of configuration non-zero?');
static function Configcmp ($a, $b)
$x = $a->rp ->index - $b->rp ->index;
function ConfigPrint ($fp)
fprintf($fp, "%s ::=", $rp->lhs ->name );
for ($i = 0; $i <= $rp->nrhs; $i++ ) {
if ($sp->type == LemonSymbol ::MULTITERMINAL ) {
for ($j = 1; $j < $sp->nsubsym; $j++ ) {
fprintf($fp, '|%s', $sp->subsym [$j]->name );
private static function confighash (LemonConfig $a)
$h = $h * 571 + $a->rp ->index * 37 + $a->dot;
* Insert a new record into the array. Return TRUE if successful.
* Prior data with the same key is NOT overwritten
static function Configtable_insert (LemonConfig $data)
//typedef struct s_x4node {
// struct config *data; /* The data */
// struct s_x4node *next; /* Next entry with the same hash */
// struct s_x4node **from; /* Previous link */
$h = self ::confighash ($data);
if (isset (self ::$x4a[$h])) {
if (self ::Configcmp ($np->data , $data) == 0 ) {
/* An existing entry with the same key is found. */
/* Fail because overwrite is not allows. */
/* Insert the new data */
$np = array ('data' => $data, 'next' => 0 , 'from' => 0 );
$np = new LemonStateNode;
// as you might notice, "from" always points to itself.
// this bug is in the original lemon parser, but from is never actually accessed
// so it don't much matter now, do it?
if (isset (self ::$x4a[$h])) {
self ::$x4a[$h]->from = $np->next;
$np->next = self ::$x4a[$h];
* Return a pointer to data assigned to the given key. Return NULL
static function Configtable_find (LemonConfig $key)
$h = self ::confighash ($key);
if (!isset (self ::$x4a[$h])) {
if (self ::Configcmp ($np->data , $key) == 0 ) {
return $np ? $np->data : 0;
/* Every shift or reduce operation is stored as one of the following */
const SHIFT = 1 , ACCEPT = 2 , REDUCE = 3 , ERROR = 4 , CONFLICT = 5 , SH_RESOLVED = 6 ,
RD_RESOLVED = 7 , NOT_USED = 8;
public $sp; /* The look-ahead symbol */
CONFLICT, /* Was a reduce, but part of a conflict
SH_RESOLVED, /* Was a shift. Precedence resolved conflict
RD_RESOLVED, /* Was reduce. Precedence resolved conflict
NOT_USED /* Deleted by compression
struct state *stp; /* The new state, if a shift
struct rule *rp; /* The rule, if a reduce
public $x = array ('stp' => null , 'rp' => null );
public $next; /* Next action for this state */
public $collide; /* Next action with the same hash */
/* Compare two actions */
static function actioncmp (LemonAction $ap1, LemonAction $ap2)
$rc = $ap1->sp ->index - $ap2->sp ->index;
$rc = $ap1->type - $ap2->type;
if ($ap1->type != LemonAction ::REDUCE &&
$ap1->type != LemonAction ::RD_RESOLVED &&
$ap1->type != LemonAction ::CONFLICT ) {
throw new Exception ('action has not been processed: ' .
if ($ap2->type != LemonAction ::REDUCE &&
$ap2->type != LemonAction ::RD_RESOLVED &&
$ap2->type != LemonAction ::CONFLICT ) {
throw new Exception ('action has not been processed: ' .
$rc = $ap1->x ->index - $ap2->x ->index;
* create linked list of LemonActions
* @param LemonAction|null
* @param int one of the constants from LemonAction
* @param LemonSymbol|LemonRule
static function Action_add (&$app, $type, LemonSymbol $sp, $arg)
/* Sort parser actions */
static function Action_sort (LemonAction $ap)
$ap = Lemon ::msort ($ap, 'next', array ('LemonAction', 'actioncmp'));
* Print an action to the given file descriptor. Return FALSE if
* nothing was actually printed.
function PrintAction ($fp, $indent)
fprintf($fp, " %${indent}s shift %d" , $this->sp->name , $this->x->statenum );
fprintf($fp, " %${indent}s reduce %d" , $this->sp->name , $this->x->index );
fprintf($fp, " %${indent}s accept" , $this->sp->name );
fprintf($fp, " %${indent}s error" , $this->sp->name );
fprintf($fp, " %${indent}s reduce %-3d ** Parsing conflict **" , $this->sp->name , $this->x->index );
/* A followset propagation link indicates that the contents of one
** configuration followset should be propagated to another whenever
public $cfp; /* The configuration to which linked */
public $next = 0; /* The next propagate link */
* Add a plink to a plink list
static function Plink_add (&$plpp, LemonConfig $cfp)
/* Transfer every plink on the list "from" to the list "to" */
static function Plink_copy (LemonPlink &$to, LemonPlink $from)
* Delete every plink on the list
static function Plink_delete ($plp)
/* Each state of the generated parser's finite state machine
** is encoded as an instance of the following structure. */
public $bp; /* The basis configurations for this state */
public $cfp; /* All configurations in this set */
public $statenum; /* Sequencial number for this state */
public $ap; /* Array of actions for this state */
public $nTknAct, $nNtAct; /* Number of actions on terminals and nonterminals */
public $iTknOfst, $iNtOfst; /* yy_action[] offset for terminals and nonterms */
public $iDflt; /* Default action */
public static $x3a = array ();
public static $states = array ();
* Compare two states for sorting purposes. The smaller state is the
* one with the most non-terminal actions. If they have the same number
* of non-terminal actions, then the smaller is the one with the most
static function stateResortCompare ($a, $b)
$n = $b->nNtAct - $a->nNtAct;
$n = $b->nTknAct - $a->nTknAct;
static function statecmp ($a, $b)
for ($rc = 0; $rc == 0 && $a && $b; $a = $a->bp , $b = $b->bp ) {
$rc = $a->rp ->index - $b->rp ->index;
private static function statehash (LemonConfig $a)
$h = $h * 571 + $a->rp ->index * 37 + $a->dot;
* Return a pointer to data assigned to the given key. Return NULL
* @return null|LemonState
static function State_find (LemonConfig $key)
if (!count(self ::$x3a)) {
$h = self ::statehash ($key);
if (!isset (self ::$x3a[$h])) {
if (self ::statecmp ($np->key , $key) == 0 ) {
return $np ? $np->data : 0;
* Insert a new record into the array. Return TRUE if successful.
* Prior data with the same key is NOT overwritten
* @param LemonState $state
* @param LemonConfig $key
static function State_insert (LemonState $state, LemonConfig $key)
$h = self ::statehash ($key);
if (isset (self ::$x3a[$h])) {
if (self ::statecmp ($np->key , $key) == 0 ) {
/* An existing entry with the same key is found. */
/* Fail because overwrite is not allows. */
/* Insert the new data */
$np = new LemonStateNode;
// the original lemon code sets the from link always to itself
// setting up a faulty double-linked list
// however, the from links are never used, so I suspect a copy/paste
// error from a standard algorithm that was never caught
if (isset (self ::$x3a[$h])) {
self ::$x3a[$h]->from = $np; // lemon has $np->next here
self ::$x3a[$h] = 0; // dummy to avoid notice
$np->next = self ::$x3a[$h];
$np->from = self ::$x3a[$h];
static function State_arrayof ()
/* The state vector for the entire parser generator is recorded as
** follows. (LEMON uses no global variables and makes little use of
** static variables. Fields in the following structure can be thought
** of as begin global variables in the program.) */
* @var array array of {@link LemonState} objects
public $sorted; /* Table of states sorted by state number */
public $rule; /* List of all rules */
public $nstate; /* Number of states */
public $nrule; /* Number of rules */
public $nsymbol; /* Number of terminal and nonterminal symbols */
public $nterminal; /* Number of terminal symbols */
* @var array array of {@link LemonSymbol} objects
public $symbols = array (); /* Sorted array of pointers to symbols */
public $errorcnt; /* Number of errors */
public $errsym; /* The error symbol */
public $name; /* Name of the generated parser */
public $arg; /* Declaration of the 3th argument to parser */
public $tokentype; /* Type of terminal symbols in the parser stack */
public $vartype; /* The default type of non-terminal symbols */
public $start; /* Name of the start symbol for the grammar */
public $stacksize; /* Size of the parser stack */
public $include_code; /* Code to put at the start of the parser file */
public $includeln; /* Line number for start of include code */
public $include_classcode; /* Code to put in the parser class */
public $include_classln; /* Line number for start of include code */
public $declare_classcode; /* any extends/implements code */
public $declare_classln; /* Line number for start of class declaration code */
public $error; /* Code to execute when an error is seen */
public $errorln; /* Line number for start of error code */
public $overflow; /* Code to execute on a stack overflow */
public $overflowln; /* Line number for start of overflow code */
public $failure; /* Code to execute on parser failure */
public $failureln; /* Line number for start of failure code */
public $accept; /* Code to execute when the parser excepts */
public $acceptln; /* Line number for the start of accept code */
public $extracode; /* Code appended to the generated file */
public $extracodeln; /* Line number for the start of the extra code */
public $tokendest; /* Code to execute to destroy token data */
public $tokendestln; /* Line number for token destroyer code */
public $vardest; /* Code for the default non-terminal destructor */
public $vardestln; /* Line number for default non-term destructor code*/
public $filename; /* Name of the input file */
public $filenosuffix; /* Name of the input file without its extension */
public $outname; /* Name of the current output file */
public $tokenprefix; /* A prefix added to token names in the .h file */
public $nconflict; /* Number of parsing conflicts */
public $tablesize; /* Size of the parse tables */
public $basisflag; /* Prpublic $only basis configurations */
public $has_fallback; /* True if any %fallback is seen in the grammer */
public $argv0; /* Name of the program */
/* Find a precedence symbol of every rule in the grammar.
* Those rules which have a precedence symbol coded in the input
* grammar using the "[symbol]" construct will already have the
* rp->precsym field filled. Other rules take as their precedence
* symbol the first RHS symbol with a defined precedence. If there
* are not RHS symbols with a defined precedence, the precedence
* symbol field is left blank.
function FindRulePrecedences ()
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
if ($rp->precsym === 0 ) {
for ($i = 0; $i < $rp->nrhs && $rp->precsym === 0; $i++ ) {
if ($sp->type == LemonSymbol ::MULTITERMINAL ) {
for ($j = 0; $j < $sp->nsubsym; $j++ ) {
if ($sp->subsym [$j]->prec >= 0 ) {
$rp->precsym = $sp->subsym [$j];
} elseif ($sp->prec >= 0 ) {
$rp->precsym = $rp->rhs [$i];
/* Find all nonterminals which will generate the empty string.
* Then go back and compute the first sets of every nonterminal.
* The first set is the set of all terminal symbols which can begin
* a string generated by that nonterminal.
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$this->symbols[$i]->lambda = false;
for($i = $this->nterminal; $i < $this->nsymbol; $i++ ) {
$this->symbols[$i]->firstset = array ();
/* First compute all lambdas */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
for ($i = 0; $i < $rp->nrhs; $i++ ) {
if ($sp->type != LemonSymbol ::TERMINAL || $sp->lambda === false ) {
/* Now compute all first sets */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
for ($i = 0; $i < $rp->nrhs; $i++ ) {
if ($s2->type == LemonSymbol ::TERMINAL ) {
//progress += SetAdd(s1->firstset,s2->index);
$progress += isset ($s1->firstset [$s2->index ]) ? 0 : 1;
$s1->firstset [$s2->index ] = 1;
} elseif ($s2->type == LemonSymbol ::MULTITERMINAL ) {
for ($j = 0; $j < $s2->nsubsym; $j++ ) {
//progress += SetAdd(s1->firstset,s2->subsym[j]->index);
$progress += isset ($s1->firstset [$s2->subsym [$j]->index ]) ? 0 : 1;
$s1->firstset [$s2->subsym [$j]->index ] = 1;
if ($s1->lambda === false ) {
//progress += SetUnion(s1->firstset,s2->firstset);
if ($s2->lambda === false ) {
/** Compute all LR(0) states for the grammar. Links
* are added to between some states so that the LR(1) follow sets
LemonConfig ::Configlist_init ();
/* Find the start symbol */
$sp = LemonSymbol ::Symbol_find ($this->start);
Lemon ::ErrorMsg ($this->filename, 0 ,
"The specified start symbol \"%s\" is not " .
"in a nonterminal of the grammar. \"%s\" will be used as the start " .
"symbol instead.", $this->start, $this->rule->lhs ->name );
/* Make sure the start symbol doesn't occur on the right-hand side of
** any rule. Report an error if it does. (YACC would generate a new
** start symbol in this case.) */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
for ($i = 0; $i < $rp->nrhs; $i++ ) {
if ($rp->rhs [$i]->type == LemonSymbol ::MULTITERMINAL ) {
foreach ($rp->rhs [$i]->subsym as $subsp) {
Lemon ::ErrorMsg ($this->filename, 0 ,
"The start symbol \"%s\" occurs on the " .
"right-hand side of a rule. This will result in a parser which " .
"does not work properly.", $sp->name );
} elseif ($rp->rhs [$i] === $sp) {
Lemon ::ErrorMsg ($this->filename, 0 ,
"The start symbol \"%s\" occurs on the " .
"right-hand side of a rule. This will result in a parser which " .
"does not work properly.", $sp->name );
/* The basis configuration set for the first state
** is all rules which have the start symbol as their
for ($rp = $sp->rule; $rp; $rp = $rp->nextlhs ) {
$newcfp = LemonConfig ::Configlist_addbasis ($rp, 0 );
/* Compute the first state. All other states will be
** computed automatically during the computation of the first one.
** The returned pointer to the first state is not used. */
$newstp = $this->getstate ();
$this->buildshifts ($newstp[0 ]); /* Recursively compute successor states */
private function getstate ()
/* Extract the sorted basis of the new state. The basis was constructed
** by prior calls to "Configlist_addbasis()". */
LemonConfig ::Configlist_sortbasis ();
$bp = LemonConfig ::Configlist_basis ();
/* Get a state with the same basis */
$stp = LemonState ::State_find ($bp);
/* A state with the same basis already exists! Copy all the follow-set
** propagation links from the state under construction into the
** preexisting state, then return a pointer to the preexisting state */
for($x = $bp, $y = $stp->bp; $x && $y; $x = $x->bp , $y = $y->bp ) {
LemonPlink ::Plink_copy ($y->bplp , $x->bplp );
LemonPlink ::Plink_delete ($x->fplp );
$cfp = LemonConfig ::Configlist_return ();
LemonConfig ::Configlist_eat ($cfp);
/* This really is a new state. Construct all the details */
LemonConfig ::Configlist_closure ($this); /* Compute the configuration closure */
LemonConfig ::Configlist_sort (); /* Sort the configuration closure */
$cfp = LemonConfig ::Configlist_return (); /* Get a pointer to the config list */
$stp = new LemonState; /* A new state structure */
$stp->bp = $bp; /* Remember the configuration basis */
$stp->cfp = $cfp; /* Remember the configuration closure */
$stp->statenum = $this->nstate++; /* Every state gets a sequence number */
$stp->ap = 0; /* No actions, yet. */
LemonState ::State_insert ($stp, $stp->bp ); /* Add to the state table */
// this can't work, recursion is too deep, move it into FindStates()
//$this->buildshifts($stp); /* Recursively compute successor states */
* Construct all successor states to the given state. A "successor"
* state is any state which can be reached by a shift action.
* @param LemonState The state from which successors are computed
private function buildshifts (LemonState $stp)
// struct config *cfp; /* For looping thru the config closure of "stp" */
// struct config *bcfp; /* For the inner loop on config closure of "stp" */
// struct config *new; /* */
// struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
// struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
// struct state *newstp; /* A pointer to a successor state */
/* Each configuration becomes complete after it contibutes to a successor
** state. Initially, all configurations are incomplete */
for ($cfp = $stp->cfp; $cfp; $cfp = $cfp->next ) {
$cfp->status = LemonConfig ::INCOMPLETE;
/* Loop through all configurations of the state "stp" */
for ($cfp = $stp->cfp; $cfp; $cfp = $cfp->next ) {
if ($cfp->status == LemonConfig ::COMPLETE ) {
continue; /* Already used by inner loop */
if ($cfp->dot >= $cfp->rp ->nrhs ) {
continue; /* Can't shift this config */
LemonConfig ::Configlist_reset (); /* Reset the new config set */
$sp = $cfp->rp ->rhs [$cfp->dot ]; /* Symbol after the dot */
/* For every configuration in the state "stp" which has the symbol "sp"
** following its dot, add the same configuration to the basis set under
** construction but with the dot shifted one symbol to the right. */
for ($bcfp = $cfp; $bcfp; $bcfp = $bcfp->next ) {
if ($bcfp->status == LemonConfig ::COMPLETE ) {
continue; /* Already used */
if ($bcfp->dot >= $bcfp->rp ->nrhs ) {
continue; /* Can't shift this one */
$bsp = $bcfp->rp ->rhs [$bcfp->dot ]; /* Get symbol after dot */
if (!LemonSymbol ::same_symbol ($bsp, $sp)) {
continue; /* Must be same as for "cfp" */
$bcfp->status = LemonConfig ::COMPLETE; /* Mark this config as used */
$new = LemonConfig ::Configlist_addbasis ($bcfp->rp , $bcfp->dot + 1 );
LemonPlink ::Plink_add ($new->bplp , $bcfp);
/* Get a pointer to the state described by the basis configuration set
** constructed in the preceding loop */
$newstp = $this->getstate ();
$this->buildshifts ($newstp[0 ]); /* Recursively compute successor states */
/* The state "newstp" is reached from the state "stp" by a shift action
if ($sp->type == LemonSymbol ::MULTITERMINAL ) {
for($i = 0; $i < $sp->nsubsym; $i++ ) {
LemonAction ::Action_add ($stp->ap , LemonAction ::SHIFT , $sp->subsym [$i],
LemonAction ::Action_add ($stp->ap , LemonAction ::SHIFT , $sp, $newstp);
* Construct the propagation links
** Add to every propagate link a pointer back to the state to
** which the link is attached. */
foreach ($this->sorted as $info) {
$info->key ->stp = $info->data;
/* Convert all backlinks into forward links. Only the forward
** links are used in the follow-set computation. */
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i];
for ($cfp = $stp->data ->cfp; $cfp; $cfp = $cfp->next ) {
for ($plp = $cfp->bplp; $plp; $plp = $plp->next ) {
LemonPlink ::Plink_add ($other->fplp , $cfp);
* Compute the reduce actions, and resolve conflicts.
/* Add all of the reduce actions
** A reduce action is added for each element of the followset of
** a configuration which has its dot at the extreme right.
for ($i = 0; $i < $this->nstate; $i++ ) { /* Loop over all states */
$stp = $this->sorted[$i]->data;
for ($cfp = $stp->cfp; $cfp; $cfp = $cfp->next ) {
/* Loop over all configurations */
if ($cfp->rp ->nrhs == $cfp->dot ) { /* Is dot at extreme right? */
for ($j = 0; $j < $this->nterminal; $j++ ) {
if (isset ($cfp->fws [$j])) {
/* Add a reduce action to the state "stp" which will reduce by the
** rule "cfp->rp" if the lookahead symbol is "$this->symbols[j]" */
LemonAction ::Action_add ($stp->ap , LemonAction ::REDUCE ,
$this->symbols[$j], $cfp->rp );
/* Add the accepting token */
if ($this->start instanceof LemonSymbol ) {
$sp = LemonSymbol ::Symbol_find ($this->start);
/* Add to the first state (which is always the starting state of the
** finite state machine) an action to ACCEPT if the lookahead is the
LemonAction ::Action_add ($this->sorted[0 ]->data ->ap , LemonAction ::ACCEPT , $sp, 0 );
for ($i = 0; $i < $this->nstate; $i++ ) {
// struct action *ap, *nap;
$stp = $this->sorted[$i]->data;
throw new Exception ('state has no actions associated');
$stp->ap = LemonAction ::Action_sort ($stp->ap );
for ($ap = $stp->ap; $ap !== 0 && $ap->next !== 0; $ap = $ap->next ) {
for ($nap = $ap->next; $nap !== 0 && $nap->sp === $ap->sp ; $nap = $nap->next ) {
/* The two actions "ap" and "nap" have the same lookahead.
** Figure out which one should be used */
$this->nconflict += $this->resolve_conflict ($ap, $nap, $this->errsym);
/* Report an error for each rule that can never be reduced. */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
for ($i = 0; $i < $this->nstate; $i++ ) {
for ($ap = $this->sorted[$i]->data ->ap; $ap !== 0; $ap = $ap->next ) {
if ($ap->type == LemonAction ::REDUCE ) {
$ap->x ->canReduce = true;
for ($rp = $this->rule; $rp !== 0; $rp = $rp->next ) {
Lemon ::ErrorMsg ($this->filename, $rp->ruleline , "This rule can not be reduced.\n");
/** Resolve a conflict between the two given actions. If the
* conflict can't be resolve, return non-zero.
* To resolve a conflict, first look to see if either action
* is on an error rule. In that case, take the action which
* is not associated with the error rule. If neither or both
* actions are associated with an error rule, then try to
* use precedence to resolve the conflict.
* If either action is a SHIFT, then it must be apx. This
* function won't work if apx->type==REDUCE and apy->type==SHIFT.
* @param LemonSymbol|nullThe error symbol (if defined. NULL otherwise)
function resolve_conflict ($apx, $apy, $errsym)
if ($apx->sp !== $apy->sp ) {
throw new Exception ('no conflict but resolve_conflict called');
if ($apx->type == LemonAction ::SHIFT && $apy->type == LemonAction ::REDUCE ) {
if ($spy === 0 || $spx->prec < 0 || $spy->prec < 0 ) {
/* Not enough precedence information. */
$apy->type = LemonAction ::CONFLICT;
} elseif ($spx->prec > $spy->prec ) { /* Lower precedence wins */
$apy->type = LemonAction ::RD_RESOLVED;
} elseif ($spx->prec < $spy->prec ) {
$apx->type = LemonAction ::SH_RESOLVED;
} elseif ($spx->prec === $spy->prec && $spx->assoc == LemonSymbol ::RIGHT ) {
$apy->type = LemonAction ::RD_RESOLVED; /* associativity */
} elseif ($spx->prec === $spy->prec && $spx->assoc == LemonSymbol ::LEFT ) {
$apx->type = LemonAction ::SH_RESOLVED;
if ($spx->prec !== $spy->prec || $spx->assoc !== LemonSymbol ::NONE ) {
throw new Exception ('$spx->prec !== $spy->prec || $spx->assoc !== LemonSymbol::NONE');
$apy->type = LemonAction ::CONFLICT;
} elseif ($apx->type == LemonAction ::REDUCE && $apy->type == LemonAction ::REDUCE ) {
if ($spx === 0 || $spy === 0 || $spx->prec < 0 ||
$spy->prec < 0 || $spx->prec === $spy->prec ) {
$apy->type = LemonAction ::CONFLICT;
} elseif ($spx->prec > $spy->prec ) {
$apy->type = LemonAction ::RD_RESOLVED;
} elseif ($spx->prec < $spy->prec ) {
$apx->type = LemonAction ::RD_RESOLVED;
if ($apx->type!== LemonAction ::SH_RESOLVED &&
$apx->type!== LemonAction ::RD_RESOLVED &&
$apx->type!== LemonAction ::CONFLICT &&
$apy->type!== LemonAction ::SH_RESOLVED &&
$apy->type!== LemonAction ::RD_RESOLVED &&
$apy->type!== LemonAction ::CONFLICT ) {
throw new Exception ('$apx->type!== LemonAction::SH_RESOLVED &&
$apx->type!== LemonAction::RD_RESOLVED &&
$apx->type!== LemonAction::CONFLICT &&
$apy->type!== LemonAction::SH_RESOLVED &&
$apy->type!== LemonAction::RD_RESOLVED &&
$apy->type!== LemonAction::CONFLICT');
/* The REDUCE/SHIFT case cannot happen because SHIFTs come before
** REDUCEs on the list. If we reach this point it must be because
** the parser conflict had already been resolved. */
* Reduce the size of the action tables, if possible, by making use
* In this version, we take the most frequent REDUCE action and make
function CompressTables ()
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i]->data;
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->type != LemonAction ::REDUCE ) {
for ($ap2 = $ap->next; $ap2; $ap2 = $ap2->next ) {
if ($ap2->type != LemonAction ::REDUCE ) {
/* Do not make a default if the number of rules to default
/* Combine matching REDUCE actions into a single default */
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->type == LemonAction ::REDUCE && $ap->x === $rbest) {
throw new Exception ('$ap is not an object');
$ap->sp = LemonSymbol ::Symbol_new ("{default}");
for ($ap = $ap->next; $ap; $ap = $ap->next ) {
if ($ap->type == LemonAction ::REDUCE && $ap->x === $rbest) {
$ap->type = LemonAction ::NOT_USED;
$stp->ap = LemonAction ::Action_sort ($stp->ap );
* Renumber and resort states so that states with fewer choices
* occur at the end. Except, keep state 0 as the first state.
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i]->data;
$stp->nTknAct = $stp->nNtAct = 0;
$stp->iDflt = $this->nstate + $this->nrule;
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($this->compute_action ($ap) >= 0 ) {
if ($ap->sp ->index < $this->nterminal) {
} elseif ($ap->sp ->index < $this->nsymbol) {
$stp->iDflt = $this->compute_action ($ap);
$this->sorted[$i] = $stp;
$save = $this->sorted[0 ];
usort($this->sorted, array ('LemonState', 'stateResortCompare'));
for($i = 0; $i < $this->nstate; $i++ ) {
$this->sorted[$i]->statenum = $i;
* Given an action, compute the integer value for that action
* which is to be put in the action table of the generated machine.
* Return negative if no action should be generated.
function compute_action ($ap)
case LemonAction ::REDUCE:
$act = $ap->x ->index + $this->nstate;
$act = $this->nstate + $this->nrule;
case LemonAction ::ACCEPT:
$act = $this->nstate + $this->nrule + 1;
* Generate the "y.output" log file
$fp = fopen($this->filenosuffix . ".out", "wb");
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i];
fprintf($fp, "State %d:\n", $stp->statenum );
if ($cfp->dot == $cfp->rp ->nrhs ) {
$buf = sprintf('(%d)', $cfp->rp ->index );
//SetPrint(fp,cfp->fws,$this);
//PlinkPrint(fp,cfp->fplp,"To ");
//PlinkPrint(fp,cfp->bplp,"From");
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->PrintAction ($fp, 30 )) {
/* The next function finds the template file and opens it, returning
** a pointer to the opened file. */
private function tplt_open ()
$templatename = dirname(__FILE__ ) . "Lempar.php";
$buf = $this->filenosuffix . '.lt';
$tpltname = $templatename;
} elseif ($fp = @fopen($templatename, 'rb', true )) {
echo "Can't find the parser driver template file \"%s\".\n",
$in = @fopen($tpltname,"rb");
printf("Can't open the template file \"%s\".\n", $tpltname);
* The next cluster of routines are for reading the template file
* and writing the results to the generated parser
* The first function transfers data from "in" to "out" until
* a line is seen which begins with "%%". The line number is
* if name!=0, then any word that begin with "Parse" is changed to
* begin with *name instead.
private function tplt_xfer ($name, $in, $out, &$lineno)
while (($line = fgets($in, 1024 )) && ($line[0 ] != '%' || $line[1 ] != '%')) {
for ($i = 0; $i < strlen($line); $i++ ) {
if ($line[$i] == 'P' && substr($line, $i, 5 ) == "Parse"
&& ($i === 0 || preg_match('/[^a-zA-Z]/', $line[$i - 1 ]))) {
* Print a #line directive line to the output file.
private function tplt_linedir ($out, $lineno, $filename)
fwrite($out, '#line ' . $lineno . ' "' . $filename . "\"\n");
* Print a string to the file and keep the linenumber up to date
private function tplt_print ($out, $str, $strln, &$lineno)
$this->tplt_linedir ($out, $strln, $this->filename);
$this->tplt_linedir ($out, $lineno + 2 , $this->outname);
* Compute all followsets.
* A followset is the set of all symbols which can come immediately
function FindFollowSets ()
for ($i = 0; $i < $this->nstate; $i++ ) {
for ($cfp = $this->sorted[$i]->data ->cfp; $cfp; $cfp = $cfp->next ) {
$cfp->status = LemonConfig ::INCOMPLETE;
for ($i = 0; $i < $this->nstate; $i++ ) {
for ($cfp = $this->sorted[$i]->data ->cfp; $cfp; $cfp = $cfp->next ) {
if ($cfp->status == LemonConfig ::COMPLETE ) {
for ($plp = $cfp->fplp; $plp; $plp = $plp->next ) {
$plp->cfp ->status = LemonConfig ::INCOMPLETE;
$cfp->status = LemonConfig ::COMPLETE;
* Generate C source code for the parser
* @param int Output in makeheaders format if true
function ReportTable ($mhflag)
// struct acttab *pActtab;
// int mnTknOfst, mxTknOfst;
// int mnNtOfst, mxNtOfst;
$in = $this->tplt_open ();
$out = fopen($this->filenosuffix . ".php", "wb");
$this->outname = $this->filenosuffix . ".php";
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the include code, if any */
$this->tplt_print ($out, $this->include_code, $this->includeln, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the class declaration code */
$this->tplt_print ($out, $this->declare_classcode, $this->declare_classln,
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the internal parser class include code, if any */
$this->tplt_print ($out, $this->include_classcode, $this->include_classln,
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate #defines for all tokens */
//fprintf($out, "#if INTERFACE\n");
if ($this->tokenprefix) {
$prefix = $this->tokenprefix;
for ($i = 1; $i < $this->nterminal; $i++ ) {
fprintf($out, " const %s%-30s = %2d;\n", $prefix, $this->symbols[$i]->name , $i);
//fwrite($out, "#endif\n");
fwrite($out, " const YY_NO_ACTION = " .
($this->nstate + $this->nrule + 2 ) . ";\n");
fwrite($out, " const YY_ACCEPT_ACTION = " .
($this->nstate + $this->nrule + 1 ) . ";\n");
fwrite($out, " const YY_ERROR_ACTION = " .
($this->nstate + $this->nrule) . ";\n");
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the action table and its associates:
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
/* Compute the actions on all states and count them up */
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i];
$ax[$i * 2 ]['stp'] = $stp;
$ax[$i * 2 ]['isTkn'] = 1;
$ax[$i * 2 ]['nAction'] = $stp->nTknAct;
$ax[$i * 2 + 1 ] = array ();
$ax[$i * 2 + 1 ]['stp'] = $stp;
$ax[$i * 2 + 1 ]['isTkn'] = 0;
$ax[$i * 2 + 1 ]['nAction'] = $stp->nNtAct;
$mxTknOfst = $mnTknOfst = 0;
$mxNtOfst = $mnNtOfst = 0;
/* Compute the action table. In order to try to keep the size of the
** action table to a minimum, the heuristic of placing the largest action
usort($ax, array ('LemonData', 'axset_compare'));
$pActtab = new LemonActtab;
for ($i = 0; $i < $this->nstate * 2 && $ax[$i]['nAction'] > 0; $i++ ) {
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->sp ->index >= $this->nterminal) {
$action = $this->compute_action ($ap);
$pActtab->acttab_action ($ap->sp ->index , $action);
$stp->iTknOfst = $pActtab->acttab_insert ();
if ($stp->iTknOfst < $mnTknOfst) {
$mnTknOfst = $stp->iTknOfst;
if ($stp->iTknOfst > $mxTknOfst) {
$mxTknOfst = $stp->iTknOfst;
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->sp ->index < $this->nterminal) {
if ($ap->sp ->index == $this->nsymbol) {
$action = $this->compute_action ($ap);
$pActtab->acttab_action ($ap->sp ->index , $action);
$stp->iNtOfst = $pActtab->acttab_insert ();
if ($stp->iNtOfst < $mnNtOfst) {
$mnNtOfst = $stp->iNtOfst;
if ($stp->iNtOfst > $mxNtOfst) {
$mxNtOfst = $stp->iNtOfst;
/* Output the yy_action table */
fprintf($out, " const YY_SZ_ACTTAB = %d;\n", $pActtab->nAction );
fwrite($out, "static public \$yy_action = array(\n");
for($i = $j = 0; $i < $n; $i++ ) {
$action = $pActtab->aAction [$i]['action'];
$action = $this->nsymbol + $this->nrule + 2;
if ($j == 9 || $i == $n - 1 ) {
fwrite($out, " );\n"); $lineno++;
/* Output the yy_lookahead table */
fwrite($out, " static public \$yy_lookahead = array(\n");
for ($i = $j = 0; $i < $n; $i++ ) {
$la = $pActtab->aAction [$i]['lookahead'];
if ($j == 9 || $i == $n - 1 ) {
/* Output the yy_shift_ofst[] table */
fprintf($out, " const YY_SHIFT_USE_DFLT = %d;\n", $mnTknOfst - 1 );
while ($n > 0 && $this->sorted[$n - 1 ]->iTknOfst == NO_OFFSET) {
fprintf($out, " const YY_SHIFT_MAX = %d;\n", $n - 1 );
fwrite($out, " static public \$yy_shift_ofst = array(\n");
for ($i = $j = 0; $i < $n; $i++ ) {
$stp = $this->sorted[$i];
if ($j == 9 || $i == $n - 1 ) {
/* Output the yy_reduce_ofst[] table */
fprintf($out, " const YY_REDUCE_USE_DFLT = %d;\n", $mnNtOfst - 1 );
while ($n > 0 && $this->sorted[$n - 1 ]->iNtOfst == NO_OFFSET) {
fprintf($out, " const YY_REDUCE_MAX = %d;\n", $n - 1 );
fwrite($out, " static public \$yy_reduce_ofst = array(\n");
for ($i = $j = 0; $i < $n; $i++ ) {
$stp = $this->sorted[$i];
if ($j == 9 || $i == $n - 1 ) {
/* Output the expected tokens table */
fwrite($out, " static public \$yyExpectedTokens = array(\n");
for ($i = 0; $i < $this->nstate; $i++ ) {
$stp = $this->sorted[$i];
fwrite($out, " /* $i */ array(" );
for ($ap = $stp->ap; $ap; $ap = $ap->next ) {
if ($ap->sp ->index < $this->nterminal) {
if ($ap->type == LemonAction ::SHIFT ||
$ap->type == LemonAction ::REDUCE ) {
fwrite($out, $ap->sp ->index . ', ');
/* Output the default action table */
fwrite($out, " static public \$yy_default = array(\n");
for ($i = $j = 0; $i < $n; $i++ ) {
$stp = $this->sorted[$i];
fprintf($out, " %4d,", $stp->iDflt );
if ($j == 9 || $i == $n - 1 ) {
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the defines */
fprintf($out, " const YYNOCODE = %d;\n", $this->nsymbol + 1 );
if($this->stacksize <= 0 ) {
Lemon ::ErrorMsg ($this->filename, 0 ,
"Illegal stack size: [%s]. The stack size should be an integer constant.",
$this->stacksize = "100";
fprintf($out, " const YYSTACKDEPTH = %s;\n", $this->stacksize);
fwrite($out," const YYSTACKDEPTH = 100;\n");
$name = $this->name ? $this->name : "Parse";
if (isset ($this->arg) && strlen($this->arg)) {
$this->arg = str_replace('$', '', $this->arg); // remove $ from $var
fprintf($out, " const %sARG_DECL = '%s';\n", $name, $this->arg);
fprintf($out, " const %sARG_DECL = false;\n", $name);
fprintf($out, " const YYNSTATE = %d;\n", $this->nstate);
fprintf($out, " const YYNRULE = %d;\n", $this->nrule);
fprintf($out, " const YYERRORSYMBOL = %d;\n", $this->errsym->index );
fprintf($out, " const YYERRSYMDT = 'yy%d';\n", $this->errsym->dtnum );
if ($this->has_fallback) {
fwrite($out, " const YYFALLBACK = 1;\n");
fwrite($out, " const YYFALLBACK = 0;\n");
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the table of fallback tokens.
if ($this->has_fallback) {
for ($i = 0; $i < $this->nterminal; $i++ ) {
if ($p->fallback === 0 ) {
fprintf($out, " 0, /* %10s => nothing */\n", $p->name );
fprintf($out, " %3d, /* %10s => %s */\n",
$p->fallback ->index , $p->name , $p->fallback ->name );
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate a table containing the symbolic name of every symbol
for ($i = 0; $i < $this->nsymbol; $i++ ) {
fprintf($out," %-15s", "'" . $this->symbols[$i]->name . "',");
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate a table containing a text string that describes every
** rule in the rule set of the grammer. This information is used
** when tracing REDUCE actions.
for ($i = 0 , $rp = $this->rule; $rp; $rp = $rp->next , $i++ ) {
throw new Exception ('rp->index != i and should be');
fprintf($out, " /* %3d */ \"%s ::=", $i, $rp->lhs ->name );
for ($j = 0; $j < $rp->nrhs; $j++ ) {
if ($sp->type == lemonSymbol ::MULTITERMINAL ) {
for($k = 1; $k < $sp->nsubsym; $k++ ) {
fwrite($out, '|' . $sp->subsym [$k]->name );
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes every time a symbol is popped from
** the stack while processing errors or while destroying the parser.
** (In other words, generate the %destructor actions)
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$sp = $this->symbols[$i];
if ($sp === 0 || $sp->type != LemonSymbol ::TERMINAL ) {
fprintf($out, " case %d:\n", $sp->index );
for ($i = 0; $i < $this->nsymbol &&
$this->symbols[$i]->type != LemonSymbol ::TERMINAL; $i++ );
if ($i < $this->nsymbol) {
$this->emit_destructor_code ($out, $this->symbols[$i], $lineno);
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$sp = $this->symbols[$i];
if ($sp === 0 || $sp->type == LemonSymbol ::TERMINAL ||
$sp->index <= 0 || $sp->destructor != 0 ) {
fprintf($out, " case %d:\n", $sp->index );
$this->emit_destructor_code ($out, $dflt_sp, $lineno);
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$sp = $this->symbols[$i];
if ($sp === 0 || $sp->type == LemonSymbol ::TERMINAL ||
fprintf($out, " case %d:\n", $sp->index );
/* Combine duplicate destructors into a single case */
for ($j = $i + 1; $j < $this->nsymbol; $j++ ) {
$sp2 = $this->symbols[$j];
if ($sp2 && $sp2->type != LemonSymbol ::TERMINAL && $sp2->destructor
&& $sp2->dtnum == $sp->dtnum
&& $sp->destructor == $sp2->destructor ) {
fprintf($out, " case %d:\n", $sp2->index );
$this->emit_destructor_code ($out, $this->symbols[$i], $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes whenever the parser stack overflows */
$this->tplt_print ($out, $this->overflow, $this->overflowln, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate the table of rule information
** Note: This code depends on the fact that rules are number
** sequentually beginning with 0.
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
fprintf($out, " array( 'lhs' => %d, 'rhs' => %d ),\n",
$rp->lhs ->index , $rp->nrhs );
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes during each REDUCE action */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
$this->translate_code ($rp);
/* Generate the method map for each REDUCE action */
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
fwrite($out, ' ' . $rp->index . ' => ' . $rp->index . ",\n");
for ($rp2 = $rp->next; $rp2; $rp2 = $rp2->next ) {
if ($rp2->code === $rp->code ) {
fwrite($out, ' ' . $rp2->index . ' => ' .
$this->tplt_xfer ($this->name, $in, $out, $lineno);
for ($rp = $this->rule; $rp; $rp = $rp->next ) {
$this->emit_code ($out, $rp, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes if a parse fails */
$this->tplt_print ($out, $this->failure, $this->failureln, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes when a syntax error occurs */
$this->tplt_print ($out, $this->error, $this->errorln, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Generate code which executes when the parser accepts its input */
$this->tplt_print ($out, $this->accept, $this->acceptln, $lineno);
$this->tplt_xfer ($this->name, $in, $out, $lineno);
/* Append any addition code the user desires */
$this->tplt_print ($out, $this->extracode, $this->extracodeln, $lineno);
* Generate code which executes when the rule "rp" is reduced. Write
* the code to "out". Make sure lineno stays up-to-date.
function emit_code ($out, LemonRule $rp, &$lineno)
/* Generate code to do the reduce action */
$this->tplt_linedir ($out, $rp->line , $this->filename);
fwrite($out, " function yy_r$rp->index(){" . $rp->code );
$this->tplt_linedir ($out, $lineno, $this->outname);
} /* End if( rp->code ) */
* Append text to a dynamically allocated string. If zText is 0 then
* reset the string to be empty again. Always return the complete text
* of the string (which is overwritten with each call).
* n bytes of zText are stored. If n==0 then all of zText is stored.
* If n==-1, then the previous character is overwritten.
function append_str ($zText, $n)
throw new Exception ('z is zero-length');
* zCode is a string that is the action associated with a rule. Expand
* the symbols in this string so that the refer to elements of the parser
function translate_code (LemonRule $rp)
$lhsused = 0; /* True if the LHS element has been used */
$used = array (); /* True for each RHS element which is used */
for($i = 0; $i < $rp->nrhs; $i++ ) {
$this->append_str ('', 0 );
for ($i = 0; $i < strlen($rp->code ); $i++ ) {
($i === 0 || (!preg_match('/[A-Za-z0-9_]/', $rp->code [$i - 1 ])))) {
// previous line is in essence a temporary substr, so
$test = substr($rp->code , $i);
if ($rp->lhsalias && $tempcp == $rp->lhsalias ) {
$this->append_str ("\$this->_retvalue", 0 );
for ($ii = 0; $ii < $rp->nrhs; $ii++ ) {
if ($rp->rhsalias [$ii] && $tempcp == $rp->rhsalias [$ii]) {
if ($ii !== 0 && $rp->code [$ii - 1 ] == '@') {
/* If the argument is of the form @X then substitute
** the token number of X, not the value of X */
$this->append_str ("\$this->yystack[\$this->yyidx + " .
($ii - $rp->nrhs + 1 ) . "]->major", -1 );
if ($sp->type == LemonSymbol ::MULTITERMINAL ) {
$dtnum = $sp->subsym [0 ]->dtnum;
$this->append_str ("\$this->yystack[\$this->yyidx + " .
($ii - $rp->nrhs + 1 ) . "]->minor", 0 );
$this->append_str ($cp, 1 );
/* Check to make sure the LHS has been used */
if ($rp->lhsalias && !$lhsused) {
Lemon ::ErrorMsg ($this->filename , $rp->ruleline ,
"Label \"%s\" for \"%s(%s)\" is never used.",
$rp->lhsalias , $rp->lhs ->name , $rp->lhsalias );
/* Generate destructor code for RHS symbols which are not used in the
for($i = 0; $i < $rp->nrhs; $i++ ) {
if ($rp->rhsalias [$i] && !isset ($used[$i])) {
Lemon ::ErrorMsg ($this->filename , $rp->ruleline ,
"Label %s for \"%s(%s)\" is never used.",
$rp->rhsalias [$i], $rp->rhs [$i]->name , $rp->rhsalias [$i]);
} elseif ($rp->rhsalias [$i] == 0 ) {
if ($rp->rhs [$i]->type == LemonSymbol ::TERMINAL ) {
$hasdestructor = $this->tokendest != 0;
$hasdestructor = $this->vardest !== 0 || $rp->rhs [$i]->destructor !== 0;
$this->append_str (" \$this->yy_destructor(" .
($rp->rhs [$i]->index ) . ", \$this->yystack[\$this->yyidx + " .
($i - $rp->nrhs + 1 ) . "]->minor);\n", 0 );
/* No destructor defined for this term */
$cp = $this->append_str ('', 0 );
* The following routine emits code for the destructor for the
function emit_destructor_code ($out, LemonSymbol $sp, &$lineno)
if ($sp->type == LemonSymbol ::TERMINAL ) {
$this->tplt_linedir ($out, $this->tokendestln , $this->filename );
} elseif ($sp->destructor ) {
$this->tplt_linedir ($out, $sp->destructorln , $this->filename );
} elseif ($this->vardest ) {
$this->tplt_linedir ($out, $this->vardestln , $this->filename );
throw new Exception ('emit_destructor'); /* Cannot happen */
for ($i = 0; $i < strlen($cp); $i++ ) {
if ($cp[$i]== '$' && $cp[$i + 1 ]== '$' ) {
fprintf($out, "(yypminor->yy%d)", $sp->dtnum );
$this->tplt_linedir ($out, $lineno, $this->outname );
* Compare to axset structures for sorting purposes
static function axset_compare ($a, $b)
return $b['nAction'] - $a['nAction'];
const OPT_FLAG = 1 , OPT_INT = 2 , OPT_DBL = 3 , OPT_STR = 4 ,
OPT_FFLAG = 5 , OPT_FINT = 6 , OPT_FDBL = 7 , OPT_FSTR = 8;
public $azDefine = array ();
private static $options = array (
'type' => self ::OPT_FLAG ,
'message' => 'Print only the basis in report.'
'type' => self ::OPT_FLAG ,
'message' => 'Don\'t compress the action table.'
'type' => self ::OPT_FSTR ,
'arg' => 'handle_D_option',
'message' => 'Define an %ifdef macro.'
'type' => self ::OPT_FLAG ,
'message' => 'Print grammar without actions.'
'type' => self ::OPT_FLAG ,
'message' => 'Output a makeheaders compatible file'
'type' => self ::OPT_FLAG ,
'message' => '(Quiet) Don\'t print the report file.'
'type' => self ::OPT_FLAG ,
'message' => 'Print parser stats to standard output.'
'type' => self ::OPT_FLAG ,
'message' => 'Print the version number.'
* Process a flag command line argument.
function handleflags ($i, $argv)
if (!isset ($argv[1 ]) || !isset (self ::$options[$argv[$i][1 ]])) {
throw new Exception ('Command line syntax error: undefined option "' . $argv[$i] . '"');
$v = self ::$options[$argv[$i][1 ]] == '-';
if (self ::$options[$argv[$i][1 ]]['type'] == self ::OPT_FLAG ) {
$this->{self ::$options[$argv[$i][1 ]]['arg']} = (int) $v;
} elseif (self ::$options[$argv[$i][1 ]]['type'] == self ::OPT_FFLAG ) {
$this->{self ::$options[$argv[$i][1 ]]['arg']}($v);
} elseif (self ::$options[$argv[$i][1 ]]['type'] == self ::OPT_FSTR ) {
$this->{self ::$options[$argv[$i][1 ]]['arg']}(substr ($v, 2 ));
throw new Exception ('Command line syntax error: missing argument on switch: "' . $argv[$i] . '"');
* Process a command line switch which has an argument.
function handleswitch ($i, $argv)
throw new Exception ('INTERNAL ERROR: handleswitch passed bad argument, no "=" in arg');
if (!isset (self ::$options[$argv[$i]])) {
throw new Exception ('Command line syntax error: undefined option "' . $argv[$i] .
switch (self ::$options[$argv[$i]]['type']) {
throw new Exception ('Command line syntax error: option requires an argument "' .
$argv[$i] . '=' . $cp . '"');
switch(self ::$options[$argv[$i]]['type']) {
$this->$ {self ::$options[$argv[$i]]['arg']} = $dv;
$this->$ {self ::$options[$argv[$i]]['arg']}($dv);
$this->$ {self ::$options[$argv[$i]]['arg']} = $lv;
$this->$ {self ::$options[$argv[$i]]['arg']}($lv);
$this->$ {self ::$options[$argv[$i]]['arg']} = $sv;
$this->$ {self ::$options[$argv[$i]]['arg']}($sv);
* @param array valid options
for($i = 1; $i < count ($argv); $i++ ) {
if ($argv[$i][0 ] == '+' || $argv[$i][0 ] == '-') {
$errcnt += $this->handleflags ($i, $argv);
} elseif (strstr($argv[$i],'=')) {
$errcnt += $this->handleswitch (i , $argv);
* Return the index of the N-th non-switch argument. Return -1
private function argindex ($n, $a)
for ($i=1; $i < count($a); $i++ ) {
if ($dashdash || !($a[$i][0 ] == '-' || $a[$i][0 ] == '+' ||
if ($_SERVER['argv'][$i] == '--') {
* Return the value of the non-option argument as indexed by $i
* @param array the value of $argv
private function OptArg ($i, $a)
if (-1 == ($ind = $this->argindex ($i, $a))) {
* @return int number of arguments
for($i = 1; $i < count($a); $i++ ) {
if ($dashdash || !($a[$i][0 ] == '-' || $a[$i][0 ] == '+' ||
* Print out command-line options
foreach (self ::$options as $label => $info) {
$len = strlen ($label) + 1;
$len += 9; /* length of "<integer>" */
$len += 6; /* length of "<real>" */
$len += 8; /* length of "<string>" */
foreach (self ::$options as $label => $info) {
printf(" -%-*s %s\n", $max, $label, $info['message']);
printf(" %s=<integer>%*s %s\n", $label, $max - strlen($label) - 9 ,
printf(" %s=<real>%*s %s\n", $label, $max - strlen($label) - 6 ,
printf(" %s=<string>%*s %s\n", $label, $max - strlen($label) - 8 ,
* This routine is called with the argument to each -D command-line option.
* Add the macro defined to the azDefine array.
private function handle_D_option ($z)
$z = substr($a, 1 ); // strip first =
/**************** From the file "main.c" ************************************/
** Main program file for the LEMON parser generator.
/* The main program. Parse the command line and do it... */
$this->OptInit ($_SERVER['argv']);
echo "Lemon version 1.0/PHP port version 1.0\n";
if ($this->OptNArgs ($_SERVER['argv']) != 1 ) {
echo "Exactly one filename argument is required.\n";
/* Initialize the machine */
$lem->argv0 = $_SERVER['argv'][0 ];
$lem->filename = $this->OptArg (0 , $_SERVER['argv']);
if (isset ($a['extension'])) {
$ext = '.' . $a['extension'];
$lem->filenosuffix = $lem->filename;
$lem->basisflag = $this->basisflag;
$lem->name = $lem->include_code = $lem->include_classcode = $lem->arg =
$lem->tokentype = $lem->start = 0;
$lem->error = $lem->overflow = $lem->failure = $lem->accept = $lem->tokendest =
$lem->tokenprefix = $lem->outname = $lem->extracode = 0;
LemonSymbol ::Symbol_new ("$");
$lem->errsym = LemonSymbol ::Symbol_new ("error");
/* Parse the input file */
$parser = new LemonParser ($this);
/* Count and index the symbols of the grammar */
$lem->nsymbol = LemonSymbol ::Symbol_count ();
LemonSymbol ::Symbol_new ("{default}");
$lem->symbols = LemonSymbol ::Symbol_arrayof ();
for ($i = 0; $i <= $lem->nsymbol; $i++ ) {
$lem->symbols [$i]->index = $i;
usort($lem->symbols , array ('LemonSymbol', 'sortSymbols'));
for ($i = 0; $i <= $lem->nsymbol; $i++ ) {
$lem->symbols [$i]->index = $i;
// find the first lower-case symbol
for($i = 1; ord($lem->symbols [$i]->name [0 ]) < ord ('Z'); $i++ );
/* Generate a reprint of the grammar, if requested on the command line */
/* Initialize the size for all follow and first sets */
$this->SetSize ($lem->nterminal );
/* Find the precedence for every production rule (that has one) */
$lem->FindRulePrecedences ();
/* Compute the lambda-nonterminals and the first-sets for every
/* Compute all LR(0) states. Also record follow-set propagation
** links so that the follow-set can be computed later */
$lem->sorted = LemonState ::State_arrayof ();
/* Tie up loose ends on the propagation links */
/* Compute the follow set of every reducible configuration */
/* Compute the action tables */
/* Compress the action tables */
if ($this->compress===0 ) {
/* Reorder and renumber the states so that states with fewer choices
/* Generate a report of the parser generated. (the "y.output" file) */
/* Generate the source code for the parser */
$lem->ReportTable ($this->mhflag );
/* Produce a header file for use by the scanner. (This step is
** omitted if the "-m" option is used because makeheaders will
** generate the file for us.) */
// $this->ReportHeader();
printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
$lem->nterminal , $lem->nsymbol - $lem->nterminal , $lem->nrule );
printf(" %d states, %d parser table entries, %d conflicts\n",
$lem->nstate , $lem->tablesize , $lem->nconflict );
printf("%d parsing conflicts.\n", $lem->nconflict );
exit ($lem->errorcnt + $lem->nconflict );
return ($lem->errorcnt + $lem->nconflict );
* Merge in a merge sort for a linked list
* - a: A sorted, null-terminated linked list. (May be null).
* - b: A sorted, null-terminated linked list. (May be null).
* - cmp: A pointer to the comparison function.
* - offset: Offset in the structure to the "next" field.
* A pointer to the head of a sorted list containing the elements
* The "next" pointers for elements in the lists a and b are
static function merge ($a, $b, $cmp, $offset)
** list: Pointer to a singly-linked list of structures.
** next: Pointer to pointer to the second element of the list.
** cmp: A comparison function.
** A pointer to the head of a sorted list containing the elements
** The "next" pointers for elements in list are changed.
static function msort ($list, $next, $cmp)
if ($list->$next === 0 ) {
for ($i = 0; $i < 29 && $set[$i] !== 0; $i++ ) {
$ep = self ::merge ($ep, $set[$i], $cmp, $next);
for ($i = 0; $i < 30; $i++ ) {
$ep = self ::merge ($ep, $set[$i], $cmp, $next);
/* Find a good place to break "msg" so that its length is at least "min"
** but no more than "max". Make the point as close to max as possible.
static function findbreak ($msg, $min, $max)
for ($i = $spot = $min; $i <= $max && $i < strlen($msg); $i++ ) {
if ($c == '-' && $i < $max - 1 ) {
static function ErrorMsg ($filename, $lineno, $format)
/* Prepare a prefix to be prepended to every output line */
$prefix = sprintf("%20s:%d: ", $filename, $lineno);
$prefix = sprintf("%20s: ", $filename);
$prefixsize = strlen($prefix);
$availablewidth = 79 - $prefixsize;
/* Generate the error message */
/* Remove trailing "\n"s from the error message. */
while ($linewidth > 0 && in_array($errmsg[$linewidth-1 ], array ("\n", "\r"), true )) {
/* Print the error message */
$errmsg = str_replace(array ("\r", "\n", "\t"), array (' ', ' ', ' '), $errmsg);
$end = $restart = self ::findbreak ($errmsg, 0 , $availablewidth);
if (strlen($errmsg) <= 79 && $end < strlen($errmsg) && $end <= 79 ) {
$end = $restart = strlen($errmsg);
while (isset ($errmsg[$restart]) && $errmsg[$restart] == ' ') {
printf(" %s%.${end}s\n" , $prefix, $errmsg);
$errmsg = substr($errmsg, $restart);
* Duplicate the input file without comments and without actions
printf("// Reprint of input file \"%s\".\n// Symbols:\n", $this->filename );
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$sp = $this->symbols [$i];
$ncolumns = 76 / ($maxlen + 5 );
$skip = ($this->nsymbol + $ncolumns - 1 ) / $ncolumns;
for ($i = 0; $i < $skip; $i++ ) {
for ($j = $i; $j < $this->nsymbol; $j += $skip) {
$sp = $this->symbols [$j];
//assert( sp->index==j );
printf(" %3d %-${maxlen}.${maxlen}s" , $j, $sp->name );
for ($rp = $this->rule; $rp; $rp = $rp->next) {
printf("(%s)", $rp->lhsalias);
for ($i = 0; $i < $rp->nrhs; $i++ ) {
if ($sp->type == LemonSymbol ::MULTITERMINAL ) {
for ($j = 1; $j < $sp->nsubsym; $j++ ) {
printf("|%s", $sp->subsym [$j]->name );
/* if ($rp->rhsalias[$i]) {
printf("(%s)", $rp->rhsalias[$i]);
printf(" [%s]", $rp->precsym ->name );
print "\n " . $rp->code);
const WAITING_FOR_DECL_OR_RULE = 2;
const WAITING_FOR_DECL_KEYWORD = 3;
const WAITING_FOR_DECL_ARG = 4;
const WAITING_FOR_PRECEDENCE_SYMBOL = 5;
const WAITING_FOR_ARROW = 6;
const PRECEDENCE_MARK_1 = 13;
const PRECEDENCE_MARK_2 = 14;
const RESYNC_AFTER_RULE_ERROR = 15;
const RESYNC_AFTER_DECL_ERROR = 16;
const WAITING_FOR_DESTRUCTOR_SYMBOL = 17;
const WAITING_FOR_DATATYPE_SYMBOL = 18;
const WAITING_FOR_FALLBACK_ID = 19;
public $filename; /* Name of the input file */
public $tokenlineno; /* Linenumber at which current token starts */
public $errorcnt; /* Number of errors so far */
public $tokenstart; /* Text of current token */
public $gp; /* Global state vector */
WAITING_FOR_DECL_OR_RULE,
WAITING_FOR_DECL_KEYWORD,
WAITING_FOR_PRECEDENCE_SYMBOL,
WAITING_FOR_DESTRUCTOR_SYMBOL,
WAITING_FOR_DATATYPE_SYMBOL,
public $state; /* The state of the parser */
public $fallback; /* The fallback token */
public $lhs; /* Left-hand side of current rule */
public $lhsalias; /* Alias for the LHS */
public $nrhs; /* Number of right-hand side symbols seen */
* @var array array of {@link LemonSymbol} objects
public $rhs = array (); /* RHS symbols */
public $alias = array (); /* Aliases for each RHS symbol name (or NULL) */
public $prevrule; /* Previous rule parsed */
public $declkeyword; /* Keyword of a declaration */
* @var array array of strings
public $declargslot = array (); /* Where the declaration argument should be put */
public $decllnslot; /* Where the declaration linenumber is put */
public $declassoc; /* Assign this association to decl arguments */
public $preccounter; /* Assign this precedence to decl arguments */
public $firstrule; /* Pointer to first rule in the grammar */
public $lastrule; /* Pointer to the most recently parsed rule */
function __construct ($lem)
* Run the proprocessor over the input file text. The Lemon variable
* $azDefine contains the names of all defined
* macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
* comments them out. Text in between is also commented out as appropriate.
private function preprocess_input (&$z)
for ($i=0; $i < strlen($z); $i++ ) {
if ($z[$i] != '%' || ($i > 0 && $z[$i-1 ] != "\n")) {
if (substr($z, $i, 6 ) === "%endif" && trim($z[$i+6 ]) === '') {
for ($j = $start; $j < $i; $j++ ) {
if ($z[$j] != "\n") $z[$j] = ' ';
for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++ ) {
} elseif (substr($z, $i, 6 ) === "%ifdef" && trim($z[$i+6 ]) === '' ||
substr($z, $i, 7 ) === "%ifndef" && trim($z[$i+7 ]) === '') {
if (isset ($this->lemon->azDefine [$n])) {
// this is a rather obtuse way of checking whether this is %ifndef
//for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++) $z[$j] = ' ';
$z = substr($z, 0 , $i); // remove instead of adding ' '
$z = substr($z, 0 , $i) . substr($z, $i + $j); // remove instead of adding ' '
throw new Exception (" unterminated %ifdef starting on line $start_lineno\n" );
* In spite of its name, this function is really a scanner.
* It reads in the entire input file (all at once) then tokenizes it.
* Each token is passed to the function "parseonetoken" which builds all
* the appropriate data structures in the global state vector "gp".
$this->filename = $gp->filename;
$this->state = self ::INITIALIZE;
/* Begin by reading the input file */
Lemon ::ErrorMsg ($this->filename, 0 , "Can't open this file for reading.");
ErrorMsg ($this->filename, 0 , "Can't read in all %d bytes of this file.",
/* Make an initial pass through the file to handle %ifdef and %ifndef */
$this->preprocess_input ($filebuf);
/* Now scan the text of the input file */
for ($cp = 0 , $c = $filebuf[0 ]; $cp < strlen($filebuf); $cp++ ) {
if ($c == "\n") $lineno++; /* Keep track of the line number */
} /* Skip all white space */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1 ] == '/') {
/* Skip C++ style comments */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1 ] == '*') {
/* Skip C style comments */
$this->tokenstart = $cp; /* Mark the beginning of the token */
$this->tokenlineno = $lineno; /* Linenumber on which token begins */
if ($filebuf[$cp] == '"') { /* String literals */
Lemon ::ErrorMsg ($this->filename, $startline,
"String starting on this line is not terminated before the end of the file.");
$nextcp = $cp = strlen($filebuf);
} elseif ($filebuf[$cp] == '{') { /* A block of C code */
for ($level = 1; $cp < strlen($filebuf) && ($level > 1 || $filebuf[$cp] != '}'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
} elseif ($filebuf[$cp] == '{') {
} elseif ($filebuf[$cp] == '}') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1 ] == '*') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1 ] == '/') {
/* Skip C++ style comments too */
} elseif ($filebuf[$cp] == "'" || $filebuf[$cp] == '"') {
/* String a character literals */
$startchar = $filebuf[$cp];
for ($cp++; $cp < strlen($filebuf) && ($filebuf[$cp] != $startchar || $prevc === '\\'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
if ($cp >= strlen($filebuf)) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"PHP code starting on this line is not terminated before the end of the file.");
} elseif (preg_match('/[a-zA-Z0-9]/', $filebuf[$cp])) {
$cp += strlen($preg_results[0 ]);
} elseif ($filebuf[$cp] == ':' && $filebuf[$cp + 1 ] == ':' &&
$filebuf[$cp + 2 ] == '=') {
} elseif (($filebuf[$cp] == '/' || $filebuf[$cp] == '|') &&
$cp += strlen($preg_results[0 ]);
/* All other (one character) operators */
$this->parseonetoken (substr($filebuf, $this->tokenstart,
$cp - $this->tokenstart)); /* Parse the token */
$gp->rule = $this->firstrule;
$gp->errorcnt = $this->errorcnt;
function parseonetoken ($token)
$this->a = 0; // for referencing in WAITING_FOR_DECL_KEYWORD
printf("%s:%d: Token=[%s] state=%d\n",
$this->filename, $this->tokenlineno, $token, $this->state);
$this->firstrule = $this->lastrule = 0;
/* Fall thru to next case */
case self ::WAITING_FOR_DECL_OR_RULE:
$this->state = self ::WAITING_FOR_DECL_KEYWORD;
$this->lhs = LemonSymbol ::Symbol_new ($x);
$this->state = self ::WAITING_FOR_ARROW;
} elseif ($x[0 ] == '{') {
if ($this->prevrule === 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"There is no prior rule opon which to attach the code
fragment which begins on this line.");
} elseif ($this->prevrule->code != 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Code fragment beginning on this line is not the first \
to follow the previous rule.");
$this->prevrule->line = $this->tokenlineno;
$this->prevrule->code = substr($x, 1 );
} elseif ($x[0 ] == '[') {
$this->state = self ::PRECEDENCE_MARK_1;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Token \"%s\" should be either \"%%\" or a nonterminal name.",
case self ::PRECEDENCE_MARK_1:
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"The precedence symbol must be a terminal.");
} elseif ($this->prevrule === 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"There is no prior rule to assign precedence \"[%s]\".", $x);
} elseif ($this->prevrule->precsym != 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Precedence mark on this line is not the first to follow the previous rule.");
$this->prevrule->precsym = LemonSymbol ::Symbol_new ($x);
$this->state = self ::PRECEDENCE_MARK_2;
case self ::PRECEDENCE_MARK_2:
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Missing \"]\" on precedence mark.");
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
case self ::WAITING_FOR_ARROW:
if ($x[0 ] == ':' && $x[1 ] == ':' && $x[2 ] == '=') {
$this->state = self ::IN_RHS;
} elseif ($x[0 ] == '(') {
$this->state = self ::LHS_ALIAS_1;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Expected to see a \":\" following the LHS symbol \"%s\".",
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$this->state = self ::LHS_ALIAS_2;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"\"%s\" is not a valid alias for the LHS \"%s\"\n",
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$this->state = self ::LHS_ALIAS_3;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Missing \")\" following LHS alias name \"%s\".",$this->lhsalias);
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$this->state = self ::IN_RHS;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Missing \"->\" following: \"%s(%s)\".",
$this->lhs->name , $this->lhsalias);
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$rp->ruleline = $this->tokenlineno;
for ($i = 0; $i < $this->nrhs; $i++ ) {
$rp->rhs [$i] = $this->rhs[$i];
$rp->rhsalias [$i] = $this->alias[$i];
$rp->lhsalias = $this->lhsalias;
$rp->index = $this->gp->nrule++;
$rp->nextlhs = $rp->lhs ->rule;
if ($this->firstrule === 0 ) {
$this->firstrule = $this->lastrule = $rp;
$this->lastrule->next = $rp;
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
if ($this->nrhs >= Lemon ::MAXRHS ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Too many symbols on RHS or rule beginning at \"%s\".",
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
if (isset ($this->rhs[$this->nrhs - 1 ])) {
$msp = $this->rhs[$this->nrhs - 1 ];
if ($msp->type == LemonSymbol ::MULTITERMINAL ) {
array ($this, '_printmulti'), '');
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
'WARNING: symbol ' . $x . ' will not' .
' be part of previous multiterminal %s',
$this->rhs[$this->nrhs] = LemonSymbol ::Symbol_new ($x);
$this->alias[$this->nrhs] = 0;
} elseif (($x[0 ] == '|' || $x[0 ] == '/') && $this->nrhs > 0 ) {
$msp = $this->rhs[$this->nrhs - 1 ];
if ($msp->type != LemonSymbol ::MULTITERMINAL ) {
$msp->type = LemonSymbol ::MULTITERMINAL;
$msp->subsym = array ($origsp);
$msp->name = $origsp->name;
$this->rhs[$this->nrhs - 1 ] = $msp;
$msp->subsym [$msp->nsubsym - 1 ] = LemonSymbol ::Symbol_new (substr($x, 1 ));
preg_match('/[a-z]/', $msp->subsym [0 ]->name [0 ])) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Cannot form a compound containing a non-terminal");
} elseif ($x[0 ] == '(' && $this->nrhs > 0 ) {
$this->state = self ::RHS_ALIAS_1;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Illegal character on RHS of rule: \"%s\".", $x);
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$this->alias[$this->nrhs - 1 ] = $x;
$this->state = self ::RHS_ALIAS_2;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
$x, $this->rhs[$this->nrhs - 1 ]->name );
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
$this->state = self ::IN_RHS;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Missing \")\" following LHS alias name \"%s\".", $this->lhsalias);
$this->state = self ::RESYNC_AFTER_RULE_ERROR;
case self ::WAITING_FOR_DECL_KEYWORD:
$this->declargslot = &$this->a;
$this->decllnslot = &$this->a;
$this->state = self ::WAITING_FOR_DECL_ARG;
$this->declargslot = &$this->gp->name;
} elseif ('include' == $x) {
$this->declargslot = &$this->gp->include_code;
$this->decllnslot = &$this->gp->includeln;
} elseif ('include_class' == $x) {
$this->declargslot = &$this->gp->include_classcode;
$this->decllnslot = &$this->gp->include_classln;
} elseif ('declare_class' == $x) {
$this->declargslot = &$this->gp->declare_classcode;
$this->decllnslot = &$this->gp->declare_classln;
} elseif ('code' == $x) {
$this->declargslot = &$this->gp->extracode;
$this->decllnslot = &$this->gp->extracodeln;
} elseif ('token_destructor' == $x) {
$this->declargslot = &$this->gp->tokendest;
$this->decllnslot = &$this->gp->tokendestln;
} elseif ('default_destructor' == $x) {
$this->declargslot = &$this->gp->vardest;
$this->decllnslot = &$this->gp->vardestln;
} elseif ('token_prefix' == $x) {
$this->declargslot = &$this->gp->tokenprefix;
} elseif ('syntax_error' == $x) {
$this->declargslot = &$this->gp->error;
$this->decllnslot = &$this->gp->errorln;
} elseif ('parse_accept' == $x) {
$this->declargslot = &$this->gp->accept;
$this->decllnslot = &$this->gp->acceptln;
} elseif ('parse_failure' == $x) {
$this->declargslot = &$this->gp->failure;
$this->decllnslot = &$this->gp->failureln;
} elseif ('stack_overflow' == $x) {
$this->declargslot = &$this->gp->overflow;
$this->decllnslot = &$this->gp->overflowln;
} else if('extra_argument' == $x) {
$this->declargslot = &$this->gp->arg;
} elseif ('token_type' == $x) {
$this->declargslot = &$this->gp->tokentype;
} elseif ('default_type' == $x) {
$this->declargslot = &$this->gp->vartype;
} elseif ('stack_size' == $x) {
$this->declargslot = &$this->gp->stacksize;
} elseif ('start_symbol' == $x) {
$this->declargslot = &$this->gp->start;
} elseif ('left' == $x) {
$this->declassoc = LemonSymbol ::LEFT;
$this->state = self ::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('right' == $x) {
$this->declassoc = LemonSymbol ::RIGHT;
$this->state = self ::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('nonassoc' == $x) {
$this->declassoc = LemonSymbol ::NONE;
$this->state = self ::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('destructor' == $x) {
$this->state = self ::WAITING_FOR_DESTRUCTOR_SYMBOL;
} elseif ('type' == $x) {
$this->state = self ::WAITING_FOR_DATATYPE_SYMBOL;
} elseif ('fallback' == $x) {
$this->state = self ::WAITING_FOR_FALLBACK_ID;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Unknown declaration keyword: \"%%%s\".", $x);
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Illegal declaration keyword: \"%s\".", $x);
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
case self ::WAITING_FOR_DESTRUCTOR_SYMBOL:
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Symbol name missing after %destructor keyword");
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
$sp = LemonSymbol ::Symbol_new ($x);
$this->declargslot = &$sp->destructor;
$this->decllnslot = &$sp->destructorln;
$this->state = self ::WAITING_FOR_DECL_ARG;
case self ::WAITING_FOR_DATATYPE_SYMBOL:
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Symbol name missing after %destructor keyword");
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
$sp = LemonSymbol ::Symbol_new ($x);
$this->declargslot = &$sp->datatype;
$this->state = self ::WAITING_FOR_DECL_ARG;
case self ::WAITING_FOR_PRECEDENCE_SYMBOL:
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
$sp = LemonSymbol ::Symbol_new ($x);
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Symbol \"%s\" has already been given a precedence.", $x);
$sp->prec = $this->preccounter;
$sp->assoc = $this->declassoc;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Can't assign a precedence to \"%s\".", $x);
case self ::WAITING_FOR_DECL_ARG:
if ($this->declargslot != 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"The argument \"%s\" to declaration \"%%%s\" is not the first.",
$x[0 ] == '"' ? substr($x, 1 ) : $x, $this->declkeyword);
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
$this->declargslot = ($x[0 ] == '"' || $x[0 ] == '{') ? substr($x, 1 ) : $x;
if (!$this->decllnslot) {
$this->decllnslot = $this->tokenlineno;
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"Illegal argument to %%%s: %s",$this->declkeyword, $x);
$this->state = self ::RESYNC_AFTER_DECL_ERROR;
case self ::WAITING_FOR_FALLBACK_ID:
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"%%fallback argument \"%s\" should be a token", $x);
$sp = LemonSymbol ::Symbol_new ($x);
if ($this->fallback === 0 ) {
Lemon ::ErrorMsg ($this->filename, $this->tokenlineno,
"More than one fallback assigned to token %s", $x);
$sp->fallback = $this->fallback;
$this->gp->has_fallback = 1;
case self ::RESYNC_AFTER_RULE_ERROR:
/* if ($x[0] == '.') $this->state = self::WAITING_FOR_DECL_OR_RULE;
case self ::RESYNC_AFTER_DECL_ERROR:
$this->state = self ::WAITING_FOR_DECL_OR_RULE;
$this->state = self ::WAITING_FOR_DECL_KEYWORD;
function _printmulti ($a, $b)
$_SERVER['argv'] = array ('lemon', '-s', '/development/lemon/PHP_Parser.y');
//$_SERVER['argv'] = array('lemon', '-s', '/development/File_ChessPGN/ChessPGN/Parser.y');
Documentation generated on Mon, 11 Mar 2019 15:40:59 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|