Source for file Parser.php
Documentation is available at Parser.php
// ----------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------
* This class reads RDF data from files or URIs and generates models out of it. All valid
* RDF XML syntaxes defined by the W3C in RDF/XML Syntax Specification (Revised)
* - W3C Working Draft 10 October 2003
* (http://www.w3.org/TR/2003/WD-rdf-syntax-grammar-20031010/) are supported.
* The parser is based on the PHP version of repat
* (http://phpxmlclasses.sourceforge.net/show_doc.php?class=class_rdf_parser.html)
* by Luis Argerich (lrargerich@yahoo.com).
* @author Luis Argerich <lrargerich@yahoo.com>,
* Chris Bizer <chris@bizer.de>,
* Radoslaw Oldakowski <radol@gmx.de>
* Daniel Westphal <mail@d-westphal.de>
$e['parent'] = array (); // Parent is a blank Array
$e['has_property_atributes'] = 0;
$e['has_member_attributes'] = 0;
$e['element_base_uri'] = '';
* @param string &$destination
$destination['parent'] = $source;
$destination['state'] = $source['state'];
$destination['xml_lang'] = $source['xml_lang'];
$destination['element_base_uri'] = $source['element_base_uri'];
if (isset ($e['parent'])) {
if ($e['parent']['xml_lang'] != $e['xml_lang']) {
$e['has_property_attributes'] = 0;
$e['has_member_attributes'] = 0;
$e['element_base_uri'] = '';
if (isset ($e['parent'])) {
* @param string $local_name
* @param string $local_name
* @param string $local_name
if ($local_name{0 } == '_') {
$ordinal = substr($local_name, 1 ) + 1 ;
return ($ordinal > 0 ) ? $ordinal : 0;
* @param string $local_name
function _is_forbidden_rdf_property_attribute ($local_name)
return ($local_name == RDF)
* @param string $local_name
|| ($local_name{0 } == '_'
* @param string $local_name
return ($local_name == RDF)
* @param string $local_name
|| ($local_name{0 } == '_'
* @param string $local_name
return ($local_name == RDF)
if ($uri && $this->_is_alpha($uri{$uri_p})) {
|| ($uri{$uri_p} == '.'))
$result = ($uri{$uri_p} == ':');
* This function returns an associative array returning any of the various components of the URL that are present. This includes the
* query - after the question mark ?
* fragment - after the hashmark #
* @param string &$authority
* @param string &$fragment
function _parse_uri ($uri, $buffer, &$scheme, &$authority, &$path, &$query, &$fragment)
if (isset ($parsed['scheme'])) {
$scheme = $parsed['scheme'];
if (isset ($parsed['host'])) {
if (isset ($parsed['host'])) {
$authority = $parsed['host'];
if (isset ($parsed['path'])) {
if (isset ($parsed['query'])) {
$query = $parsed['query'];
if (isset ($parsed['fragment'])) {
$fragment = $parsed['fragment'];
* @param string $base_uri
* @param string $reference_uri
if ($reference_uri == '') {
return ($buffer = preg_replace("/\#[^\/\\\]*$/", "", $base_uri));
$this->_parse_uri ($reference_uri,
$this->_parse_uri ($base_uri,
if ($reference_scheme == '' && $reference_authority == ''
&& $reference_path == '' && $reference_query == '') {
if ($reference_fragment != '') {
if ($base_path == '' || $base_path == '/' || $base_path == '\\') {
if (isset ($this->rdf_parser["document_base_uri"])) {
// CB: Changed for base URI
if (!($c == '#' || $c == ':' || $c == '/' || $c == '\\')) {
$buffer .= $reference_fragment;
} else if ($reference_scheme != '') {
$buffer = $reference_uri;
$result_scheme = $base_scheme;
if ($reference_authority != '') {
$result_authority = $reference_authority;
$result_authority = $base_authority;
if ($reference_path != '') {
if ($reference_path{0 } == '/' || $reference_path{0 } == '\\') {
if ($reference_path{1 } == '/' || $reference_path{1 } == '\\') {
$result_path = $reference_path;
$result_path = $reference_path;
} elseif (substr($reference_path, 0 , 3 ) == '../'
|| substr($reference_path, 0 , 3 ) == '..\\'
$slash = $reference_path{2 };
&& (substr($reference_path, 0 , 3 ) == '../'
|| substr($reference_path, 0 , 3 ) == '..\\')
$base_path = preg_replace("/((\/)|(\\\))[^\/\\\]*$/", '', $base_path);
$base_path = preg_replace("/((\/)|(\\\))[^\/\\\]*$/", '', $base_path);
$reference_path = substr($reference_path, 3 );
$result_path = $base_path . $slash . $reference_path;
$result_path = preg_replace("/[^\/\\\]*$/", $reference_path, $base_path, 1 );
$result_path = '/' . $reference_path;
if ($result_scheme != '') {
$buffer = $result_scheme;
if ($result_authority != '') {
$buffer .= $result_authority;
if ($result_path != '') {
if ($reference_query != '') {
$buffer .= $reference_query;
if ($reference_fragment != '') {
$buffer .= $reference_fragment;
* IDs which contain CombiningChars or Extenders
* (see http://www.w3.org/TR/REC-xml-names/#NT-NCName) are assumed to be invalid.
* If you want to use IDs containing these characters you can turn off
* the validating by setting the constant VALIDATE_IDS to FALSE (see constants.php).
if ($this->_is_alpha($id{0 }) || $id{0 } == '_') {
while ($result != false && ++ $i < $len) {
if (!($this->_is_alnum($id{$i}) || $id{$i} == '.'
|| $id{$i} == '-' || $id{$i} == '_')
'illegal ID, nodeID or bagID attribute value');
if (PEAR ::isError ($result)) {
* @param string &$namespace_uri
* @param string &$local_name
function _split_name($name, &$buffer, &$namespace_uri, &$local_name)
if (strstr ($buffer, RDF_NAMESPACE_SEPARATOR_CHAR )) {
$cosas = explode (RDF_NAMESPACE_SEPARATOR_CHAR , $buffer);
$namespace_uri = $cosas[0 ];
if (($buffer{ 0 } == 'x') && ($buffer{ 1 } == 'm')
&& ($buffer{ 2 } == 'l') && ($buffer{ 3 } == ':')
$local_name = substr($buffer, 4 );
* @param string $subject_type
* @param string $predicate
* @param string $object_type
* @param string $xml_lang
* @param string $statements
* @param string $statement_id
$object_type, $object, $xml_lang, $bag_id, $statements, $statement_id, $datatype)
$statement_id_buffer = '';
$xml_lang = $this->rdf_parser['document_xml_lang'];
if (PEAR ::isError ($result)) {
$statement_id = $statement_id_buffer;
$predicate_buffer = 'RDF_NAMESPACE_URI_' . $statements;
// rdf:type = rdf:Statement
* @param string $subject_type
* @param string $attributes
* @param string $xml_lang
* @param string $statements
$xml_lang, $bag_id, $statements)
$attribute_namespace_uri = '';
$attribute_local_name = '';
for ($i = 0; isset ($attributes[ $i ]); $i += 2 ) {
$attribute_namespace_uri,
$attribute_value = $attributes[ $i + 1 ];
$predicate = $attribute_namespace_uri;
$predicate .= $attribute_local_name;
} else if (($ordinal = $this->_is_rdf_ordinal($attribute_local_name)) != 0 ) {
} else if (($attribute_local_name != RDF_ABOUT)
&& ($attribute_local_name != RDF)
&& ($attribute_local_name != RDF_ID)
&& ($attribute_local_name != RDF_LI)
&& ($attribute_local_name != RDF_NIL)
if ($attribute_local_name == 'base') {
$this->rdf_parser['top']['element_base_uri'] = $attribute_value;
} else if ($attribute_namespace_uri) {
// is it required that property attributes be in an explicit namespace?
* @param string $namespace_uri
* @param string $local_name
* @param string $attributes
foreach($attributes as $atkey => $atvalue) {
$attribute_namespace_uri = '';
$attribute_local_name = '';
$this->rdf_parser['top']['has_property_attributes'] = false;
$this->rdf_parser['top']['has_member_attributes'] = false;
$errmsg = 'unknown or out of context rdf node element: ' . $local_name;
$this->_report_warning ($errmsg);
// examine each attribute for the standard RDF "keywords"
for ($i = 0; isset ($attributes[$i]); $i += 2 ) {
$attribute_namespace_uri,
$attribute_value = $attributes[ $i + 1 ];
// if the attribute is not in any namespace
// or the attribute is in the RDF namespace
if (($attribute_namespace_uri == '')
if ($attribute_local_name == RDF_ID) {
} else if ($attribute_local_name == RDF_ABOUT) {
$about = '_' . $attribute_value;
$node_id = $attribute_value;
$errmsg = 'aboutEach has been removed from the RDF specifications';
$errmsg = 'aboutEachPrefix has been removed from the RDF specifications';
$bag_id = $attribute_value;
$datatype = $attribute_value;
$this->rdf_parser['top']['has_property_attributes'] = true;
$this->rdf_parser['top']['has_property_attributes'] = true;
$this->rdf_parser['top']['has_member_attributes'] = true;
$this->rdf_parser['top']['has_property_attributes'] = true;
$errmsg = 'unknown or out of context rdf attribute: ' . $attribute_local_name;
if ($this->_is_forbidden_rdf_property_attribute ($attribute_local_name)) {
$this->_report_warning ($errmsg);
$this->rdf_parser['top']['xml_lang'] = $attribute_value;
} elseif ($attribute_local_name == 'base') {
$this->rdf_parser['top']['element_base_uri'] = $attribute_value;
} else if ($attribute_namespace_uri) {
$this->rdf_parser['top']['has_property_attributes'] = true;
// if no subjects were found, generate one.
if ($subjects_found == 0 ) {
} else if ($subjects_found > 1 ) {
return RDF::raiseError (RDF_ERROR, null , null , 'ID, about and nodeID are mutually exclusive');
if (PEAR ::isError ($result)) {
// if the subject is empty, assign it the document uri
// only report the type for non-rdf:Description elements.
// if this element is the child of some property,
// report the appropriate statement.
$parent['parent']['subject_type'],
$parent['parent']['subject'],
$parent['parent']['bag_id'],
$parent['parent']['statements'],
if ($this->rdf_parser['top']['has_property_attributes']) {
* @param string &$namespace_uri
* @param string &$local_name
* @param string &$attributes
foreach($attributes as $atkey => $atvalue) {
$attribute_namespace_uri = '';
$attribute_local_name = '';
$errmsg = 'unknown or out of context rdf property element: ' . $local_name;
$this->_report_warning ($errmsg);
$buffer = $namespace_uri;
$buffer .= '_' . $this->rdf_parser['top']['ordinal'];
$this->rdf_parser['top']['has_property_attributes'] = false;
$this->rdf_parser['top']['has_member_attributes'] = false;
for ($i = 0; isset ($attributes[$i]); $i += 2 ) {
$attribute_namespace_uri,
$attribute_value = $attributes[$i + 1 ];
// if the attribute is not in any namespace
// or the attribute is in the RDF namespace
if (($attribute_namespace_uri == '')
if (($attribute_local_name == RDF_ID)) {
$statement_id = $attribute_value;
$parse_type = $attribute_value;
$resource = $attribute_value;
$node_id = $attribute_value;
$bag_id = $attribute_value;
$datatype = $attribute_value;
$this->rdf_parser['top']['datatype'] = $attribute_value;
$this->rdf_parser['top']['has_property_attributes'] = true;
$this->_report_warning ('unknown rdf attribute: ' . $attribute_local_name);
$this->rdf_parser['top']['xml_lang'] = $attribute_value;
} elseif ($attribute_local_name == 'base') {
$this->rdf_parser['top']['element_base_uri'] = $attribute_value;
} else if ($attribute_namespace_uri) {
$this->rdf_parser['top']['has_property_attributes'] = true;
$this->rdf_parser['top']['statement_id'] = $buffer;
if (PEAR ::isError ($result)) {
'nodeID and resource are mutually exclusive');
$this->rdf_parser['top']['parent']['subject_type'],
$this->rdf_parser['top']['parent']['subject_type'],
'property elements with rdf:parseType do not allow rdf:resource');
$this->_report_warning ('property elements with rdf:parseType do not allow rdf:bagID');
if ($this->rdf_parser['top']['has_property_attributes']) {
'property elements with rdf:parseType do not allow property attributes');
// since we are sure that this is now a resource property we can report it
$this->rdf_parser['top']['parent']['subject_type'],
$this->rdf_parser['top']['parent']['subject_type'],
$this->rdf_parser['top']['collection']['first_blank_node_id'] = $buffer;
$this->rdf_parser['top']['parent']['subject_type'],
} else if ($resource || $bag_id
|| $this->rdf_parser['top']['has_property_attributes']
// since we are sure that this is now a resource property we can report it.
$this->rdf_parser['top']['parent']['subject_type'],
); // should we allow IDs?
if ($this->rdf_parser['top']['has_property_attributes']) {
* @param string &$namespace_uri
* @param string &$local_name
* @param string &$attributes
foreach($attributes as $atkey => $atvalue) {
for( $i = 0; isset ($attributes[$i]); $i += 2 ) {
$attribute_namespace_uri,
$attribute_value = $attributes[ $i + 1 ];
if( $attribute_namespace_uri == '' || $attribute_namespace_uri == RDF_NAMESPACE_URI) {
} elseif ( $attribute_local_name == RDF_NODEID ) {
$this->rdf_parser['top']['parent']['collection']['object_label'][] = $id_buffer;
$namespace_uri. $local_name,
* @param string &$namespace_uri
* @param string &$local_name
* @param string &$attributes
foreach($attributes as $atkey => $atvalue) {
for ($i = 0; isset ($attributes[$i]); $i += 2 ) {
$attribute_namespace_uri,
$attribute_value = $attributes[ $i + 1 ];
$element .= '=\"' . $attribute_value . '\"';
$this->rdf_parser['xml_literal']['buffer'] .= $element;
if ($namespace_uri && isset ($this->rdf_parser['default_namespace'])
&& $namespace_uri != $this->rdf_parser['default_namespace']
$element .= $this->rdf_parser['namespaces'][$namespace_uri] . ':';
$element .= $local_name . '>';
$this->rdf_parser['xml_literal']['buffer'] .= $element;
$depth = $this->rdf_parser['xml_literal']['depth']--;
if (isset ($this->rdf_parser['xml_literal']['declared_ns'])) {
foreach ($this->rdf_parser['xml_literal']['declared_ns'] as $prefix => $_depth) {
unset ($this->rdf_parser['xml_literal']['declared_ns'][$prefix]);
* @param string $namespace_uri
* @param string $local_name
&& $namespace_uri == $this->rdf_parser['default_namespace']
if (!isset ($this->rdf_parser['xml_literal']['declared_ns']['_DEFAULT_'])
$name .= ' xmlns=' . '\"' . $namespace_uri . '\"';
$this->rdf_parser['xml_literal']['declared_ns']['_DEFAULT_'] =
$ns_prefix = $this->rdf_parser['namespaces'][$namespace_uri];
$name .= $ns_prefix . ':' . $local_name;
if (!isset ($this->rdf_parser['xml_literal']['declared_ns'][$ns_prefix])
$name .= " xmlns:$ns_prefix=" . '\"' . $namespace_uri . '\"';
$this->rdf_parser['xml_literal']['declared_ns'][$ns_prefix] =
if (isset ($this->rdf_parser['top']['collection'])) {
$subject = $this->rdf_parser['top']['collection']['first_blank_node_id'];
for ($i = 0; isset ($this->rdf_parser['top']['collection']['object_label'][$i]); $i++ ) {
$this->rdf_parser['top']['collection']['object_type'][$i],
$this->rdf_parser['top']['collection']['object_label'][$i],
if (!isset ($this->rdf_parser['top']['collection']['object_label'][$i + 1 ])) {
* @param string $attributes
// set base_uri, if possible
foreach ($attributes as $key => $value)
if (!($c == '#' || $c == ':' || $c == '/' || $c == '\\')) {
$this->rdf_parser['normalized_base_uri'] = $value . '#';
$this->rdf_parser['normalized_base_uri'] = $value;
} elseif ($key == XML_NAMESPACE_URI . NAMESPACE_SEPARATOR_CHAR . 'lang') {
if (PEAR ::isError ($result)) {
if (PEAR ::isError ($result)) {
/* if we're in a property with an unknown object type and we encounter
an element, the object must be a resource, */
if (PEAR ::isError ($result)) {
$this->_report_warning ('no markup allowed in literals');
$this->_report_warning ('only one element allowed inside a property element');
$this->_report_warning ('no content allowed in property with rdf:resource, rdf:bagID, or property attributes');
* property elements with text only as content set the state to
* RDF_IN_PROPERTY_LITERAL. as character data is received from expat,
* it is saved in a buffer and reported when the end tag is
if (!isset ($this->rdf_parser['top']['statement_id'])) {
if (!isset ($this->rdf_parser['top']['parent']['subject_type'])) {
$this->rdf_parser['top']['parent']['subject_type'] = '';
if (!isset ($this->rdf_parser['top']['parent']['subject'])) {
$this->rdf_parser['top']['parent']['subject'] = '';
if (!isset ($this->rdf_parser['top']['parent']['bag_id'])) {
$this->rdf_parser['top']['parent']['bag_id'] = '';
if (!isset ($this->rdf_parser['top']['parent']['statements'])) {
$this->rdf_parser['top']['parent']['statements'] = 0;
if (!isset ($this->rdf_parser["top"]["predicate"])) {
if (!isset ($this->rdf_parser['top']['datatype'])) {
if (!isset ($this->rdf_parser['top']['ordinal'])) {
$this->rdf_parser['top']['parent']['subject_type'],
// $search = array((0) => chr(10), (1) => chr(13), (2) => chr(9));
// $replace = array((0) => '\n' , (1) => '\r' , (2) => '\t');
// $this->rdf_parser["xml_literal"]["buffer"]
// = str_replace($search, $replace, $this->rdf_parser["xml_literal"]["buffer"]);
/* look for non-whitespace */
for ($i = 0; (($i < $len) && (preg_match("/[ \n\t]/", $s{ $i }))); $i++ );
/* if we found non-whitespace, this is a literal */
* Adds a new statement to the model
* This method is called by generateModel().
* @param string &$user_data
* @param string $subject_type
* @param string $predicate
* @param string $object_type
* @param string $xml_lang )
* @return object Model_Memory
$predicate, $ordinal, $object_type, $object, $xml_lang, $datatype)
if (PEAR ::isError ($objsub)) {
if (PEAR ::isError ($objpred)) {
if (PEAR ::isError ($objobj)) {
if (PEAR ::isError ($objobj)) {
$objobj->setDatatype ($datatype);
} elseif ($xml_lang != "") {
$objobj->setLanguage ($xml_lang);
if (PEAR ::isError ($statement)) {
// add statement to model
return $this->model->add ($statement);
* Generates a new Model_Memory from a URI, a file or from memory.
* If you want to parse an RDF document, pass the URI or location in the filesystem
* of the RDF document. You can also pass RDF code direct to the function. If you pass
* RDF code directly to the parser and there is no xml:base included, you should set
* the base URI manually using the optional second parameter $rdfBaseURI.
* Make sure that here are proper namespace declarations in your input document.
* @param boolean $rdfBaseURI
* @return object Model_Memory
// Check if $base is a URI or filename or a string containing RDF code.
// $base is URL or filename
$input = @fopen($base, 'r');
$errmsg = " RDF Parser: Could not open File: $base. Stopped parsing.";
$buf = fread($input, 512 );
$errmsg = 'XML-parser-error ' . $err_code . ' in Line ' . $line . ' of input document.';
if ($rdfBaseURI!==false ) {
$errmsg = '(class: parser; method: generateModel): XML-parser-error ' .
$err_code . ' in Line ' . $line . ' of input document.';
// base_uri could have changed while parsing
* @param string $encoding
'_start_element_handler',
'_character_data_handler'
'_start_ns_declaration_handler'
* @param resource &$parser
* @param string $ns_prefix
$this->rdf_parser['namespaces'][$ns_uri] = $ns_prefix;
* @param string $is_final
if (!($c == '#' || $c == ':' || $c == '/' || $c == '\\')) {
$this->rdf_parser['normalized_base_uri'] = $base . '#';
if ($this->rdf_parser['top']['element_base_uri']) {
return $this->rdf_parser['top']['element_base_uri'];
Documentation generated on Mon, 11 Mar 2019 15:39:29 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|