SQL_Parser
[ class tree: SQL_Parser ] [ index: SQL_Parser ] [ all elements ]

Source for file Lexer.php

Documentation is available at Lexer.php

  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  3. // +----------------------------------------------------------------------+
  4. // | Copyright (c) 2002-2004 Brent Cook                                        |
  5. // +----------------------------------------------------------------------+
  6. // | This library is free software; you can redistribute it and/or        |
  7. // | modify it under the terms of the GNU Lesser General Public           |
  8. // | License as published by the Free Software Foundation; either         |
  9. // | version 2.1 of the License, or (at your option) any later version.   |
  10. // |                                                                      |
  11. // | This library is distributed in the hope that it will be useful,      |
  12. // | but WITHOUT ANY WARRANTY; without even the implied warranty of       |
  13. // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
  14. // | Lesser General Public License for more details.                      |
  15. // |                                                                      |
  16. // | You should have received a copy of the GNU Lesser General Public     |
  17. // | License along with this library; if not, write to the Free Software  |
  18. // | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|
  19. // +----------------------------------------------------------------------+
  20. // | Authors: Brent Cook <busterbcook@yahoo.com>                          |
  21. // |          Jason Pell <jasonpell@hotmail.com>                          |
  22. // +----------------------------------------------------------------------+
  23. //
  24. // $Id: Lexer.php 263419 2008-07-24 15:22:46Z cybot $
  25. //
  26.  
  27. include dirname(__FILE__'/ctype.php';
  28.  
  29. // {{{ token definitions
  30. // variables: 'ident', 'sys_var'
  31. // values:    'real_val', 'text_val', 'int_val', null
  32. // }}}
  33.  
  34. /**
  35.  * A lexigraphical analyser inspired by the msql lexer
  36.  *
  37.  * @author  Brent Cook <busterbcook@yahoo.com>
  38.  * @version 0.5
  39.  * @access  public
  40.  * @package SQL_Parser
  41.  */
  42. {
  43.     // array of valid tokens for the lexer to recognize
  44.     // format is 'token literal'=>TOKEN_VALUE
  45.     var $symbols = array();
  46.  
  47.     // {{{ instance variables
  48.     var $tokPtr = 0;
  49.     var $tokStart = 0;
  50.     var $tokLen = 0;
  51.     var $tokText = '';
  52.     var $lineNo = 0;
  53.     var $lineBegin = 0;
  54.     var $string = '';
  55.     var $stringLen = 0;
  56.  
  57.     // Will not be altered by skip()
  58.     var $tokAbsStart = 0;
  59.     var $skipText = '';
  60.  
  61.     // Provide lookahead capability.
  62.     var $lookahead = 0;
  63.     // Specify how many tokens to save in tokenStack, so the
  64.     // token stream can be pushed back.
  65.     var $tokenStack = array();
  66.     var $stackPtr = 0;
  67.     // }}}
  68.  
  69.     // {{{ incidental functions
  70.     function SQL_Parser_Lexer($string ''$lookahead = 0$lexeropts)
  71.     {
  72.         $this->string = $string;
  73.         $this->stringLen = strlen($string);
  74.         $this->lookahead = $lookahead;
  75.         $this->allowIdentFirstDigit $lexeropts['allowIdentFirstDigit'];
  76.     }
  77.  
  78.     function get()
  79.     {
  80.         ++$this->tokPtr;
  81.         ++$this->tokLen;
  82.         return ($this->tokPtr <= $this->stringLen$this->string{$this->tokPtr - 1: null;
  83.     }
  84.  
  85.     function unget()
  86.     {
  87.         --$this->tokPtr;
  88.         --$this->tokLen;
  89.     }
  90.  
  91.     function skip()
  92.     {
  93.         ++$this->tokStart;
  94.         return ($this->tokPtr != $this->stringLen$this->string{$this->tokPtr++: null;
  95.     }
  96.  
  97.     function revert()
  98.     {
  99.         $this->tokPtr = $this->tokStart;
  100.         $this->tokLen = 0;
  101.     }
  102.  
  103.     function isCompop($c)
  104.     {
  105.         return (($c == '<'|| ($c == '>'|| ($c == '='|| ($c == '!'));
  106.     }
  107.     // }}}
  108.  
  109.     // {{{ pushBack()
  110.     /*
  111.      * Push back a token, so the very next call to lex() will return that token.
  112.      * Calls to this function will be ignored if there is no lookahead specified
  113.      * to the constructor, or the pushBack() function has already been called the
  114.      * maximum number of token's that can be looked ahead.
  115.      */
  116.     function pushBack()
  117.     {
  118.         if ($this->lookahead > 0 && count($this->tokenStack> 0 && $this->stackPtr > 0{
  119.             $this->stackPtr--;
  120.         }
  121.     }
  122.     // }}}
  123.  
  124.     // {{{ lex()
  125.     function lex()
  126.     {
  127.         if ($this->lookahead > 0{
  128.             // The stackPtr, should always be the same as the count of
  129.             // elements in the tokenStack.  The stackPtr, can be thought
  130.             // of as pointing to the next token to be added.  If however
  131.             // a pushBack() call is made, the stackPtr, will be less than the
  132.             // count, to indicate that we should take that token from the
  133.             // stack, instead of calling nextToken for a new token.
  134.  
  135.             if ($this->stackPtr < count($this->tokenStack)) {
  136.  
  137.                 $this->tokText  = $this->tokenStack[$this->stackPtr]['tokText'];
  138.                 $this->skipText = $this->tokenStack[$this->stackPtr]['skipText'];
  139.                 $token $this->tokenStack[$this->stackPtr]['token'];
  140.  
  141.                 // We have read the token, so now iterate again.
  142.                 $this->stackPtr++;
  143.                 return $token;
  144.  
  145.             else {
  146.  
  147.                 // If $tokenStack is full (equal to lookahead), pop the oldest
  148.                 // element off, to make room for the new one.
  149.  
  150.                 if ($this->stackPtr == $this->lookahead{
  151.                     // For some reason array_shift and
  152.                     // array_pop screw up the indexing, so we do it manually.
  153.                     for ($i = 0; $i (count($this->tokenStack- 1)$i++{
  154.                         $this->tokenStack[$i$this->tokenStack[$i + 1];
  155.                     }
  156.  
  157.                     // Indicate that we should put the element in
  158.                     // at the stackPtr position.
  159.                     $this->stackPtr--;
  160.                 }
  161.  
  162.                 $token $this->nextToken();
  163.                 $this->tokenStack[$this->stackPtr=
  164.                     array('token'=>$token,
  165.                             'tokText'=>$this->tokText,
  166.                             'skipText'=>$this->skipText);
  167.                 $this->stackPtr++;
  168.                 return $token;
  169.             }
  170.         else {
  171.             return $this->nextToken();
  172.         }
  173.     }
  174.     // }}}
  175.  
  176.     // {{{ nextToken()
  177.     function nextToken()
  178.     {
  179.         //echo 'last token: ' . $this->tokText . "\n";
  180.         if ($this->string == ''{
  181.             return;
  182.         }
  183.         $state = 0;
  184.         $this->tokAbsStart = $this->tokStart;
  185.  
  186.         while (true{
  187.             //echo "State: $state, Char: $c\n";
  188.             switch ($state{
  189.                 // {{{ State 0 : Start of token
  190.                 case 0:
  191.                     $this->tokPtr = $this->tokStart;
  192.                     $this->tokText = '';
  193.                     $this->tokLen = 0;
  194.                     $c $this->get();
  195.  
  196.                     if (is_null($c)) // End Of Input
  197.                         $state = 1000;
  198.                         break;
  199.                     }
  200.  
  201.                     while (($c == ' '|| ($c == "\t")
  202.                             || ($c == "\n"|| ($c == "\r")
  203.                     {
  204.                         if ($c == "\n" || $c == "\r"{
  205.                             // Handle MAC/Unix/Windows line endings.
  206.                             if ($c == "\r"{
  207.                                 $c $this->skip();
  208.  
  209.                                 // If not DOS newline
  210.                                 if ($c != "\n"{
  211.                                     $this->unget();
  212.                                 }
  213.                             }
  214.                             ++$this->lineNo;
  215.                             $this->lineBegin = $this->tokPtr;
  216.                         }
  217.  
  218.                         $c $this->skip();
  219.                         $this->tokLen = 1;
  220.                     }
  221.  
  222.                     // Escape quotes and backslashes
  223.                     if ($c == '\\'{
  224.                         $t $this->get();
  225.                         if ($t == '\'' || $t == '\\' || $t == '"'{
  226.                             $this->tokText = $t;
  227.                             $this->tokStart = $this->tokPtr;
  228.                             return $this->tokText;
  229.                         else {
  230.                             $this->unget();
  231.  
  232.                             // Unknown token.  Revert to single char
  233.                             $state = 999;
  234.                             break;
  235.                         }
  236.                     }
  237.  
  238.                     if (isset($this->quotes[$c])) {
  239.                         $quote $c;
  240.                         $state = 12;
  241.                         break;
  242.                     }
  243.  
  244.                     if ($c == '_'// system variable
  245.                         $state = 18;
  246.                         break;
  247.                     }
  248.  
  249.                     if (ctype_alpha(ord($c))) // keyword or ident
  250.                         $state = 1;
  251.                         break;
  252.                     }
  253.  
  254.                     if (ctype_digit(ord($c))) // real or int number
  255.                         $state = 5;
  256.                         break;
  257.                     }
  258.  
  259.                     if ($c == '.'{
  260.                         $t $this->get();
  261.                         if ($t == '.'// ellipsis
  262.                             if ($this->get(== '.'{
  263.                                 $this->tokText = '...';
  264.                                 $this->tokStart = $this->tokPtr;
  265.                                 return $this->tokText;
  266.                             else {
  267.                                 $state = 999;
  268.                                 break;
  269.                             }
  270.                         else if (ctype_digit(ord($t))) // real number
  271.                             $this->unget();
  272.                             $state = 7;
  273.                             break;
  274.                         else // period
  275.                             $this->unget();
  276.                         }
  277.                     }
  278.  
  279.  
  280.                     // comments
  281.                     foreach ($this->comments as $comment_start => $comment_end{
  282.                         if (substr($this->string$this->tokPtr - 1strlen($comment_start)) === $comment_start{
  283.                             $state = 14;
  284.                             break 2;
  285.                         }
  286.                     }
  287.  
  288.                     if ($c == '-'{
  289.                         // negative number, or operator '-', finally checked in case 6
  290.                         $state = 5;
  291.                         break;
  292.                     }
  293.  
  294.                     if ($this->isCompop($c)) // comparison operator
  295.                         $state = 10;
  296.                         break;
  297.                     }
  298.                     // Unknown token.  Revert to single char
  299.                     $state = 999;
  300.                     break;
  301.                     // }}}
  302.  
  303.                     // {{{ State 1 : Incomplete keyword or ident
  304.                 case 1:
  305.                     $c $this->get();
  306.                     if (ctype_alnum(ord($c)) || ($c == '_')) {
  307.                         $state = 1;
  308.                         break;
  309.                     }
  310.                     $state = 2;
  311.                     break;
  312.                     // }}}
  313.  
  314.                     /* {{{ State 2 : Complete keyword or ident */
  315.                 case 2:
  316.                     $this->unget();
  317.                     $this->tokText = substr($this->string$this->tokStart,
  318.                             $this->tokLen);
  319.  
  320.                     $testToken strtolower($this->tokText);
  321.                     if (isset($this->symbols[$testToken])) {
  322.  
  323.                         $this->skipText = substr($this->string$this->tokAbsStart,
  324.                                 $this->tokStart-$this->tokAbsStart);
  325.                         $this->tokStart = $this->tokPtr;
  326.                         return $testToken;
  327.                     else {
  328.                         $this->skipText = substr($this->string$this->tokAbsStart,
  329.                                 $this->tokStart-$this->tokAbsStart);
  330.                         $this->tokStart = $this->tokPtr;
  331.                         return 'ident';
  332.                     }
  333.                     break;
  334.                     // }}}
  335.  
  336.                     // {{{ State 5: Incomplete real or int number
  337.                 case 5:
  338.                     $c $this->get();
  339.                     if (ctype_digit(ord($c))) {
  340.                         $state = 5;
  341.                         break;
  342.                     else if ($c == '.'{
  343.                         $t $this->get();
  344.                         if($t == '.'// ellipsis
  345.                             $this->unget();
  346.                         else // real number
  347.                             $state = 7;
  348.                             break;
  349.                         }
  350.                     else if(ctype_alpha(ord($c))) {
  351.                         // Do we allow idents to begin with a digit?
  352.                         if ($this->allowIdentFirstDigit{
  353.                             $state = 1;
  354.                         else // a number must end with non-alpha character
  355.                             $state = 999;
  356.                         }
  357.                         break;
  358.                     else {
  359.                         // complete number, or '-'
  360.                         $state = 6;
  361.                         break;
  362.                     }
  363.                     // }}}
  364.  
  365.                     // {{{ State 6: Complete integer number
  366.                 case 6:
  367.                     $this->unget();
  368.                     // '-' or negative number
  369.                     $val substr($this->string$this->tokStart$this->tokLen);
  370.                     if ($val === '-'{
  371.                         $this->tokText = $val;
  372.                     else {
  373.                         $this->tokText = intval($val);
  374.                     }
  375.                     $this->skipText = substr($this->string$this->tokAbsStart,
  376.                             $this->tokStart-$this->tokAbsStart);
  377.                     $this->tokStart = $this->tokPtr;
  378.                     if ($this->tokText == '-'{
  379.                         return $this->tokText;
  380.                     else {
  381.                         return 'int_val';
  382.                     }
  383.                     break;
  384.                     // }}}
  385.  
  386.                     // {{{ State 7: Incomplete real number
  387.                 case 7:
  388.                     $c $this->get();
  389.  
  390.                     if ($c == 'e' || $c == 'E'{
  391.                         $state = 15;
  392.                         break;
  393.                     }
  394.  
  395.                     if (ctype_digit(ord($c))) {
  396.                         $state = 7;
  397.                         break;
  398.                     }
  399.                     $state = 8;
  400.                     break;
  401.                     // }}}
  402.  
  403.                     // {{{ State 8: Complete real number
  404.                 case 8:
  405.                     $this->unget();
  406.                     $this->tokText = floatval(substr($this->string$this->tokStart,
  407.                                 $this->tokLen));
  408.                     $this->skipText = substr($this->string$this->tokAbsStart,
  409.                             $this->tokStart-$this->tokAbsStart);
  410.                     $this->tokStart = $this->tokPtr;
  411.                     return 'real_val';
  412.                     // }}}
  413.  
  414.                     // {{{ State 10: Incomplete comparison operator
  415.                 case 10:
  416.                     $c $this->get();
  417.                     if ($this->isCompop($c)) {
  418.                         $state = 10;
  419.                         break;
  420.                     }
  421.                     $state = 11;
  422.                     break;
  423.                     // }}}
  424.  
  425.                     // {{{ State 11: Complete comparison operator
  426.                 case 11:
  427.                     $this->unget();
  428.                     $this->tokText = substr($this->string$this->tokStart,
  429.                             $this->tokLen);
  430.                     if ($this->tokText{
  431.                         $this->skipText = substr($this->string$this->tokAbsStart,
  432.                                 $this->tokStart-$this->tokAbsStart);
  433.                         $this->tokStart = $this->tokPtr;
  434.                         return $this->tokText;
  435.                     }
  436.                     $state = 999;
  437.                     break;
  438.                     // }}}
  439.  
  440.                     // {{{ State 12: Incomplete quoted string or ident
  441.                 case 12:
  442.                     $bail = false;
  443.                     while ($bail{
  444.                         switch ($this->get()) {
  445.                             case '':
  446.                                 $this->tokText = null;
  447.                                 $bail = true;
  448.                                 break;
  449.                             case "\\":
  450.                                 if ($this->get()) {
  451.                                     $this->tokText = null;
  452.                                     $bail = true;
  453.                                 }
  454.                                 //$bail = true;
  455.                                 break;
  456.                             case $quote:
  457.                                 if ($quote != $this->get()) {
  458.                                     $this->unget();
  459.                                     $this->tokText = stripslashes(
  460.                                         substr($this->string$this->tokStart + 1,
  461.                                             $this->tokLen - 2));
  462.                                     $bail = true;
  463.                                     break;
  464.                                 }
  465.                         }
  466.                     }
  467.                     if (is_null($this->tokText)) {
  468.                         $state = 13;
  469.                         break;
  470.                     }
  471.                     $state = 999;
  472.                     break;
  473.                     // }}}
  474.  
  475.                     // {{{ State 13: Complete quoted string or ident
  476.                 case 13:
  477.                     $this->skipText = substr($this->string$this->tokAbsStart,
  478.                             $this->tokStart - $this->tokAbsStart);
  479.                     $this->tokStart = $this->tokPtr;
  480.                     switch ($this->quotes[$quote]{
  481.                         case 'ident' :
  482.                             return 'ident';
  483.                             break;
  484.                         case 'string' :
  485.                         default :
  486.                             return 'text_val';
  487.                             break;
  488.                     }
  489.                     break;
  490.                     // }}}
  491.  
  492.                     // {{{ State 14: Comment
  493.                 case 14:
  494.                     $c $this->skip();
  495.                     if (null === $c
  496.                      || ($comment_end == "\n" && ($c == "\n" || $c == "\r"))
  497.                      || substr($this->string$this->tokPtrstrlen($comment_end)) === $comment_end{
  498.                         $this->tokPtr += strlen($comment_end);
  499.  
  500.                         if ($c == "\n" || $c == "\r"{
  501.                             // Handle MAC/Unix/Windows line endings.
  502.                             if ($c == "\r"{
  503.                                 $c $this->skip();
  504.  
  505.                                 // If not DOS newline
  506.                                 if ($c != "\n"{
  507.                                     $this->unget();
  508.                                 else {
  509.                                     ++$this->tokPtr;
  510.                                 }
  511.                             }
  512.                             ++$this->lineNo;
  513.                             $this->lineBegin = $this->tokPtr;
  514.                         }
  515.  
  516.                         $this->tokStart = $this->tokPtr;
  517.                         $this->tokLen = 0;
  518.                         $state = 0;
  519.  
  520.                     else {
  521.                         $state = 14;
  522.                     }
  523.                     break;
  524.                     // }}}
  525.  
  526.                     // {{{ State 15: Exponent Sign in Scientific Notation
  527.                 case 15:
  528.                     $c $this->get();
  529.                     if($c == '-' || $c == '+'{
  530.                         $state = 16;
  531.                         break;
  532.                     }
  533.                     $state = 999;
  534.                     break;
  535.                     // }}}
  536.  
  537.                     // {{{ state 16: Exponent Value-first digit in Scientific Notation
  538.                 case 16:
  539.                     $c $this->get();
  540.                     if (ctype_digit(ord($c))) {
  541.                         $state = 17;
  542.                         break;
  543.                     }
  544.                     $state = 999;  // if no digit, then token is unknown
  545.                     break;
  546.                     // }}}
  547.  
  548.                     // {{{ State 17: Exponent Value in Scientific Notation
  549.                 case 17:
  550.                     $c $this->get();
  551.                     if (ctype_digit(ord($c))) {
  552.                         $state = 17;
  553.                         break;
  554.                     }
  555.                     $state = 8;  // At least 1 exponent digit was required
  556.                     break;
  557.                     // }}}
  558.  
  559.                     // {{{ State 18 : Incomplete System Variable
  560.                 case 18:
  561.                     $c $this->get();
  562.                     if (ctype_alnum(ord($c)) || $c == '_'{
  563.                         $state = 18;
  564.                         break;
  565.                     }
  566.                     $state = 19;
  567.                     break;
  568.                     // }}}
  569.  
  570.                     // {{{ State 19: Complete Sys Var
  571.                 case 19:
  572.                     $this->unget();
  573.                     $this->tokText = substr($this->string$this->tokStart,
  574.                             $this->tokLen);
  575.                     $this->skipText = substr($this->string$this->tokAbsStart,
  576.                             $this->tokStart-$this->tokAbsStart);
  577.                     $this->tokStart = $this->tokPtr;
  578.                     return 'sys_var';
  579.                     // }}}
  580.  
  581.                     // {{{ State 999 : Unknown token.  Revert to single char
  582.                 case 999:
  583.                     $this->revert();
  584.                     $this->tokText = $this->get();
  585.                     $this->skipText = substr($this->string$this->tokAbsStart,
  586.                             $this->tokStart-$this->tokAbsStart);
  587.                     $this->tokStart = $this->tokPtr;
  588.                     return $this->tokText;
  589.                     // }}}
  590.  
  591.                     // {{{ State 1000 : End Of Input
  592.                 case 1000:
  593.                     $this->tokText = '*end of input*';
  594.                     $this->skipText = substr($this->string$this->tokAbsStart,
  595.                             $this->tokStart-$this->tokAbsStart);
  596.                     $this->tokStart = $this->tokPtr;
  597.                     return null;
  598.                     // }}}
  599.             }
  600.         }
  601.     }
  602.     // }}}
  603. }

Documentation generated on Mon, 11 Mar 2019 15:39:48 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.