Source for file Lexer.php
Documentation is available at Lexer.php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +----------------------------------------------------------------------+
// | Copyright (c) 2002-2004 Brent Cook |
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or |
// | modify it under the terms of the GNU Lesser General Public |
// | License as published by the Free Software Foundation; either |
// | version 2.1 of the License, or (at your option) any later version. |
// | This library is distributed in the hope that it will be useful, |
// | but WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | Lesser General Public License for more details. |
// | You should have received a copy of the GNU Lesser General Public |
// | License along with this library; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|
// +----------------------------------------------------------------------+
// | Authors: Brent Cook <busterbcook@yahoo.com> |
// | Jason Pell <jasonpell@hotmail.com> |
// +----------------------------------------------------------------------+
// $Id: Lexer.php 263419 2008-07-24 15:22:46Z cybot $
include dirname (__FILE__ ) . '/ctype.php';
// variables: 'ident', 'sys_var'
// values: 'real_val', 'text_val', 'int_val', null
* A lexigraphical analyser inspired by the msql lexer
* @author Brent Cook <busterbcook@yahoo.com>
// array of valid tokens for the lexer to recognize
// format is 'token literal'=>TOKEN_VALUE
// {{{ instance variables
// Will not be altered by skip()
// Provide lookahead capability.
// Specify how many tokens to save in tokenStack, so the
// token stream can be pushed back.
// {{{ incidental functions
$this->allowIdentFirstDigit = $lexeropts['allowIdentFirstDigit'];
return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!'));
* Push back a token, so the very next call to lex() will return that token.
* Calls to this function will be ignored if there is no lookahead specified
* to the constructor, or the pushBack() function has already been called the
* maximum number of token's that can be looked ahead.
// The stackPtr, should always be the same as the count of
// elements in the tokenStack. The stackPtr, can be thought
// of as pointing to the next token to be added. If however
// a pushBack() call is made, the stackPtr, will be less than the
// count, to indicate that we should take that token from the
// stack, instead of calling nextToken for a new token.
// We have read the token, so now iterate again.
// If $tokenStack is full (equal to lookahead), pop the oldest
// element off, to make room for the new one.
// For some reason array_shift and
// array_pop screw up the indexing, so we do it manually.
// Indicate that we should put the element in
// at the stackPtr position.
//echo 'last token: ' . $this->tokText . "\n";
//echo "State: $state, Char: $c\n";
// {{{ State 0 : Start of token
while (($c == ' ') || ($c == "\t")
|| ($c == "\n") || ($c == "\r")
if ($c == "\n" || $c == "\r") {
// Handle MAC/Unix/Windows line endings.
// Escape quotes and backslashes
if ($t == '\'' || $t == '\\' || $t == '"') {
// Unknown token. Revert to single char
if (isset ($this->quotes [$c])) {
if ($c == '_') { // system variable
if ($t == '.') { // ellipsis
if ($this->get() == '.') {
foreach ($this->comments as $comment_start => $comment_end) {
// negative number, or operator '-', finally checked in case 6
if ($this->isCompop($c)) { // comparison operator
// Unknown token. Revert to single char
// {{{ State 1 : Incomplete keyword or ident
/* {{{ State 2 : Complete keyword or ident */
if (isset ($this->symbols[$testToken])) {
// {{{ State 5: Incomplete real or int number
if($t == '.') { // ellipsis
// Do we allow idents to begin with a digit?
if ($this->allowIdentFirstDigit ) {
} else { // a number must end with non-alpha character
// complete number, or '-'
// {{{ State 6: Complete integer number
// '-' or negative number
// {{{ State 7: Incomplete real number
if ($c == 'e' || $c == 'E') {
// {{{ State 8: Complete real number
// {{{ State 10: Incomplete comparison operator
// {{{ State 11: Complete comparison operator
// {{{ State 12: Incomplete quoted string or ident
if ($quote != $this->get()) {
// {{{ State 13: Complete quoted string or ident
switch ($this->quotes [$quote]) {
|| ($comment_end == "\n" && ($c == "\n" || $c == "\r"))
if ($c == "\n" || $c == "\r") {
// Handle MAC/Unix/Windows line endings.
// {{{ State 15: Exponent Sign in Scientific Notation
if($c == '-' || $c == '+') {
// {{{ state 16: Exponent Value-first digit in Scientific Notation
$state = 999; // if no digit, then token is unknown
// {{{ State 17: Exponent Value in Scientific Notation
$state = 8; // At least 1 exponent digit was required
// {{{ State 18 : Incomplete System Variable
// {{{ State 19: Complete Sys Var
// {{{ State 999 : Unknown token. Revert to single char
// {{{ State 1000 : End Of Input
Documentation generated on Mon, 11 Mar 2019 15:39:48 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|