Source for file Parser.php
Documentation is available at Parser.php
// ----------------------------------------------------------------------------------
// Class: RDF_RDQL_Parser
// ----------------------------------------------------------------------------------
* This class contains methods for parsing an RDQL query string into PHP variables.
* The output of the RDQLParser is an array with variables and constraints
* of each query clause (Select, From, Where, And, Using).
* To perform an RDQL query this array has to be passed to the RDQLEngine.
* @author Radoslaw Oldakowski <radol@gmx.de>
* Parsed query variables and constraints.
* { } are only used within the parser class and are not returned as parsed query.
* ( [] stands for an integer index - 0..N )
* @var array ['selectVars'][] = ?VARNAME
* ['sources'][]{['value']} = URI | QName
* {['is_qname'] = boolean}
* ['patterns'][]['subject']['value'] = VARorURI
* {['is_qname'] = boolean}
* ['predicate']['value'] = VARorURI
* {['is_qname'] = boolean}
* ['object']['value'] = VARorURIorLiterl
* {['is_qname'] = boolean}
* ['is_literal'] = boolean
* {['l_dtype_is_qname'] = boolean}
* ['filters'][]['string'] = string
* ['evalFilterStr'] = string
* ['reqexEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['strEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
* ['value_lang'] = string
* ['value_dtype'] = string
* {['value_dtype_is_qname'] = boolean}
* ['numExpr']['vars'][] = ?VARNAME
* {['ns'][PREFIX] = NAMESPACE}
* Query string divided into a sequence of tokens.
* A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
* or a string containing any characters except from the above.
* Parse the given RDQL query string and return an array with query variables and constraints.
* @param string $queryString
* @return array $this->parsedQuery
if (PEAR ::isError ($cleanQueryString)) {
return $cleanQueryString;
if (PEAR ::isError ($result)) {
* Remove comments from the passed query string.
for ($i = 0; $i <= $last; $i++ ) {
// don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
if ($query{$i} == "'" || $query{$i} == '"') {
} while ($i < $last && $query{$i} != $quotMark);
if ($query{$i+1 } == '@') {
if ($query{$i+1 } == '^' && $query{$i+2 } == '^') {
} while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
&& $query{$i} != "\n" && $query{$i} != "\r");
if ($query{$i+1 } == '^' && $query{$i+2 } == '^') {
} while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
&& $query{$i} != "\n" && $query{$i} != "\r");
// don't search for comments inside an <URI> either
} elseif ($query{$i} == '<') {
} while ($i < $last && $query{$i} != '>');
} elseif ($query{$i} == '/') {
if ($i < $last && $query{$i+1 } == '/') {
while ($i < $last && $query{$i} != "\n" && $query{$i} != "\r") {
} elseif ($i < $last-2 && $query{$i+1 } == '*') {
while ($i < $last && ($query{$i} != '*' || $query{$i+1 } != '/')) {
if ($i >= $last && ($query{$last-1 } != '*' || $query{$last} != '/')) {
$errmsg = "unterminated comment - '*/' missing";
* Divide the query string into tokens.
* A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
* or a string containing any character except from the above.
* @param string $queryString
$queryString = trim($queryString, " \r\n\t");
$specialChars = array (" ", "\t", "\r", "\n", ",", "(", ")");
for ($i = 0; $i < $len; ++ $i) {
if (!in_array($queryString{$i}, $specialChars)) {
$this->tokens[$n] .= $queryString{$i};
if ($this->tokens[$n] != '') {
$this->tokens[$n] = $queryString{$i};
* Start parsing of the tokenized query string.
* Parse the SELECT clause of an RDQL query.
* When the parsing of the SELECT clause is finished, this method will call
* a suitable method to parse the subsequent clause.
// Check if the queryString contains a "SELECT" token
$errmsg = current($this->tokens) . "' - SELECT keyword expected";
' - SOURCE or WHERE clause expected';
// Parse SELECT ?Var (, ?Var)*
$errmsg = 'unexpected comma';
$errmsg = " '$token' - illegal input";
$errmsg = 'unexpected comma';
} elseif (!strcasecmp('WHERE', $token) && !$comma) {
$errmsg = 'unexpected comma';
if (PEAR ::isError ($result)) {
$errmsg = " '$token' - '?' missing";
$errmsg = 'WHERE clause missing';
* Parse the FROM/SOURCES clause of an RDQL query
* When the parsing of this clause is finished, parseWhere() will be called.
$errmsg = 'unexpected comma';
$errmsg = 'unecpected comma';
if (PEAR ::isError ($result)) {
$this->parsedQuery['sources'][++ $i]['value'] = $result;
$errmsg = 'WHERE clause missing';
* Parse the WHERE clause of an RDQL query.
* When the parsing of the WHERE clause is finished, this method will call
* a suitable method to parse the subsequent clause if provided.
$errmsg = 'unexpected comma';
$errmsg = 'unexpected comma';
if (PEAR ::isError ($result)) {
if (PEAR ::isError ($result)) {
if (PEAR ::isError ($result)) {
if (PEAR ::isError ($result)) {
$this->parsedQuery['patterns'][$i++ ]['object'] = $result;
$errmsg = 'unexpected comma';
* Parse the AND clause of an RDQL query
if (PEAR ::isError ($result)) {
} elseif ($token == ',') {
if (PEAR ::isError ($result)) {
* Parse the USING clause of an RDQL query
if (PEAR ::isError ($result)) {
if (PEAR ::isError ($prefix)) {
$errmsg = "keyword 'FOR' missing in the namespace declaration";
if (PEAR ::isError ($result)) {
$errmsg = 'unexpected comma';
* Check if a filter from the AND clause contains an equal number of '(' and ')'
* and parse filter expressions.
$errmsg = 'unexpected comma';
$errmsg = " '{htmlspecialchars($filter)}' - ')' missing ";
$errmsg = " '{htmlspecialchars($filter)}' - too many ')' ";
* Parse expressions inside the passed filter:
* 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
* 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
* 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
* 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
* In cases 1-2 parse each expression of the given filter into an array of variables.
* For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
* The RDQLengine will then replace each place holder with the outcomming boolean value
* of the corresponding expression.
* The remaining filterStr contains only numerical expressions and place holders.
* @param string $filteStr
* @return array ['string'] = string
* ['evalFilterStr'] = string
* ['reqexEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['strEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['value_type'] = ('variable' | 'URI' 'QName' | | 'Literal')
* ['value_lang'] = string
* ['value_dtype'] = string
* ['value_dtype_is_qname'] = boolean
* ['numExpr']['vars'][] = ?VARNAME
$parsedFilter['string'] = $filterStr;
$parsedFilter['regexEqExprs'] = array ();
$parsedFilter['strEqExprs'] = array ();
$parsedFilter['numExprVars'] = array ();
// parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
$reg_ex = "/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
foreach ($eqExprs[0 ] as $i => $eqExpr) {
if (PEAR ::isError ($result)) {
$parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1 ][$i]);
if (PEAR ::isError ($parsedFilter['regexEqExprs'][$i]['var'])) {
return $parsedFilter['regexEqExprs'][$i]['var'];
$parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2 ][$i];
$parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4 ][$i];
$filterStr = str_replace($eqExpr, " ##RegEx_$i## " , $filterStr);
// parse ?var [eq | ne] "literal"@lang^^dtype
$reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
$reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
foreach ($eqExprs[0 ] as $i => $eqExpr) {
$parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1 ][$i]);
if (PEAR ::isError ($parsedFilter['strEqExprs'][$i]['var'])) {
return $parsedFilter['strEqExprs'][$i]['var'];
$parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2 ][$i]);
$parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3 ][$i],"'\"");
$parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
$parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4 ][$i], 1 );
$dtype = substr($eqExprs[5 ][$i], 2 );
if (PEAR ::isError ($result)) {
$parsedFilter['strEqExprs'][$i]['value_dtype'] = $result;
$parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = true;
$parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
$filterStr = str_replace($eqExprs[0 ][$i], " ##strEqExpr_$i## " , $filterStr);
// parse ?var [eq | ne] ?var
$ii = count($parsedFilter['strEqExprs']);
$reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
foreach ($eqExprs[0 ] as $i => $eqExpr) {
$parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1 ][$i]);
if (PEAR ::isError ($parsedFilter['strEqExprs'][$ii]['var'])) {
return $parsedFilter['strEqExprs'][$ii]['var'];
$parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2 ][$i]);
$parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3 ][$i]);
if (PEAR ::isError ($parsedFilter['strEqExprs'][$ii]['value'])) {
return $parsedFilter['strEqExprs'][$ii]['value'];
$parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
$filterStr = str_replace($eqExprs[0 ][$i], " ##strEqExpr_$ii## " , $filterStr);
// parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
$reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
foreach ($eqExprs[0 ] as $i => $eqExpr) {
$parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1 ][$i]);
if (PEAR ::isError ($parsedFilter['strEqExprs'][$ii]['var'])) {
return $parsedFilter['strEqExprs'][$ii]['var'];
$parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2 ][$i]);
$parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4 ][$i], "<>");
$parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
} else if($eqExprs[5 ][$i]) {
if (PEAR ::isError ($result)) {
$parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5 ][$i];
$parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
$filterStr = str_replace($eqExprs[0 ][$i], " ##strEqExpr_$ii## " , $filterStr);
$parsedFilter['evalFilterStr'] = $filterStr;
// all that is left are numerical expressions and place holders for the above expressions
foreach ($vars[0 ] as $var) {
if (PEAR ::isError ($result)) {
$parsedFilter['numExprVars'][] = $result;
* Find all query variables used in the WHERE clause.
* @return array [] = ?VARNAME
foreach ($pattern as $v) {
if ($v['value'] && $v['value']{0 } == '?') {
$errmsg = 'pattern contains no variables';
* Replace all namespace prefixes in the pattern and constraint clause of an RDQL query
* with the namespaces declared in the USING clause and default namespaces.
// add default namespaces
// if in an RDQL query a reserved prefix (e.g. rdf: rdfs:) is used
// it will be overridden by the default namespace defined in constants.php
// replace namespace prefixes in the FROM clause
foreach ($this->parsedQuery['sources'] as $n => $source) {
if (isset ($source['is_qname'])) {
if (PEAR ::isError ($result)) {
foreach ($this->parsedQuery['ns'] as $prefix => $uri) {
$source['value'] = preg_replace('/'. $prefix. ':/i', $uri, $source['value']);
// replace namespace prefixes in the where clause
foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
foreach ($pattern as $key => $v) {
if ($v['value'] && $v['value']{0 } != '?') {
if (isset ($v['is_qname'])) {
if (PEAR ::isError ($result)) {
$this->parsedQuery['patterns'][$n][$key]['value'] = $result;
unset ($this->parsedQuery['patterns'][$n][$key]['is_qname']);
} else { // is quoted URI (== <URI>) or Literal
if (isset ($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
if (isset ($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
if (PEAR ::isError ($result)) {
$this->parsedQuery['patterns'][$n][$key]['l_dtype'] = $result;
unset ($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
foreach ($this->parsedQuery['ns'] as $prefix => $uri) {
foreach ($this->parsedQuery['ns'] as $prefix => $uri) {
// replace prefixes in the constraint clause
foreach ($this->parsedQuery['filters'] as $n => $filter) {
foreach ($filter['strEqExprs'] as $i => $expr) {
if ($expr['value_type'] == 'QName') {
if (PEAR ::isError ($result)) {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value'] = $result;
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';
if ($expr['value_type'] == 'URI') {
foreach ($this->parsedQuery['ns'] as $prefix => $uri) {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
} elseif ($expr['value_type'] == 'Literal') {
if (isset ($expr['value_dtype_is_qname'])) {
if (PEAR ::isError ($result)) {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype'] = $result;
unset ($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
foreach ($this->parsedQuery['ns'] as $prefix => $uri) {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
// =============================================================================
// *************************** helper functions ********************************
// =============================================================================
* Remove whitespace-tokens from the array $this->tokens
* Check if the query string of the given clause contains an undesired ','.
* If a comma was correctly placed then remove it and clear all whitespaces.
* @param string $commaExpected
* @param string $clause_error
$errmsg = 'unexpected comma';
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
if (PEAR ::isError ($result)) {
* Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
* In case of an URI this function returns the whole URI string.
* @return array ['value'] = string
if (PEAR ::isError ($token_res['value'])) {
return $token_res['value'];
$token_res['is_qname'] = true;
* Check if the given token is either a variable (?var) or the first token
* of either an URI (<URI>) or a literal ("Literal").
* In case of a literal return an array with literal properties (value, language, datatype).
* In case of a variable or an URI return only ['value'] = string.
* @return array ['value'] = string
* ['is_literal'] = boolean
if (PEAR ::isError ($result)) {
$statement_object['value'] = $result;
} elseif ($token{0 } == "'" || $token{0 } == '"') {
if (PEAR ::isError ($statement_object)) {
return $statement_object;
} elseif ($token{0 } == '<') {
if (PEAR ::isError ($statement_object['value'])) {
return $statement_object['value'];
} elseif (strpos($token, ':') !== false ) {
if (PEAR ::isError ($statement_object['value'])) {
return $statement_object['value'];
$statement_object['is_qname'] = true;
$errmsg = " '$token' - ?Variable, <URI>, QName, or \"LITERAL\" expected";
return RDF_RDQL ::raiseError (RDF_RDQL_ERROR_WHR , null , null , $errmsg);
return $statement_object;
* Check if the given token is a valid variable name (?var).
if (!isset ($match[0 ]) || $match[0 ] != $token) {
$errmsg = htmlspecialchars($token) . "' - variable name contains illegal characters";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
* Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
* @param string $clause_error
if (PEAR ::isError ($result)) {
return rtrim($token, ':');
. "' - ?Variable or <URI> or QName expected";
. "' - <URI> or QName expected";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
while ($token{strlen($token)-1 } != '>' && $token != null ) {
if ($token == '(' || $token == ')' || $token == ','
|| $token == ' ' || $token == "\n" || $token == "\r"
. " ' - illegal input: '$token' - '>' missing";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
return trim($token_res, '<>');
* Check if $token is the first token of a valid literal ("LITERAL") and
* return an array with literal properties (value, language, datatype).
* @return array ['value'] = string
* ['is_literal'] = boolean
* ['l_dtype_is_qname'] = boolean
$quotation_mark = $token{0 };
$statement_object = array (
foreach ($this->tokens as $k => $token) {
if ($token != null && $token{strlen($token)-1 } == $quotation_mark) {
$token = rtrim($token, $quotation_mark);
// parse @language (^^datatype)?
} elseif (strpos($token, $quotation_mark . '@')
|| substr($token, 0 , 2 ) == $quotation_mark . '@'
$lang = substr($token, strpos($token, $quotation_mark . '@') + 2 );
$errmsg = $quotation_mark . $statement_object['value']
. $token . " - datatype expected";
if (PEAR ::isError ($statement_object['l_dtype'])) {
return $statement_object['l_dtype'];
$statement_object['l_dtype_is_qname'] = true;
$errmsg = $quotation_mark . $statement_object['value']
. $token . " - language expected";
$statement_object['l_lang'] = $lang;
$token = substr($token, 0 , strpos($token, $quotation_mark . '@'));
} elseif (strpos($token, $quotation_mark . '^^') || substr($token, 0 , 3 ) == $quotation_mark . '^^') {
$dtype = substr($token, strpos($token, $quotation_mark . '^^') + 3 );
$errmsg = $quotation_mark . $statement_object['value']
. $token . " - datatype expected";
if (PEAR ::isError ($statement_object['l_dtype'])) {
return $statement_object['l_dtype'];
$statement_object['l_dtype_is_qname'] = true;
$token = substr($token, 0 , strpos($token, $quotation_mark . '^^'));
} elseif (strpos($token, $quotation_mark)) {
$errmsg = " '$token' - illegal input";
$statement_object['value'] .= $token;
return $statement_object;
$errmsg = " quotation end mark: $quotation_mark missing";
* Check if the given token is a valid QName.
* @param string $clause_error
$errmsg = " illegal QName: '$token'";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
$errmsg = " illegal prefix in QName: '$token'";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
$errmsg = " illegal local part in QName: '$token'";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
* Check if the given token is a valid NCName.
preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
if (isset ($match[0 ]) && $match[0 ] == $token) {
* Check if the given token is a valid namespace prefix.
. "' - illegal input, namespace prefix expected";
* Replace a prefix in a given QName and return a full URI.
* @param string $clasue_error
$qName_parts = explode(':', $qName);
$errmsg = "undefined prefix: '" . $qName_parts[0 ] . " ' in: '$qName'";
return RDF_RDQL ::raiseError ($clause_error, null , null , $errmsg);
return $this->parsedQuery['ns'][$qName_parts[0 ]] . $qName_parts[1 ];
* Check if all variables from the SELECT clause are defined in the WHERE clause
if (PEAR ::isError ($result)) {
* Check if the given variable is defined in the WHERE clause.
if (PEAR ::isError ($allQueryVars)) {
$errmsg = " '$var' - variable must be defined in the WHERE clause";
* Throw an error if the regular expression from the AND clause is not quoted.
* @param string $filterString
* @param string $lQuotMark
* @param string $rQuotMark
$errmsg = " '$filterString' - regular expressions must be quoted";
if ($lQuotMark != $rQuotMark) {
$errmsg = " '$filterString' - quotation end mark in the regular expression missing";
} // end: Class RDQLParser
Documentation generated on Mon, 11 Mar 2019 15:39:50 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|