Source for file Type.php
Documentation is available at Type.php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
require_once 'XML/Feed/Parser/Sanitizer.php';
* Abstract class providing common methods for XML_Feed_Parser feeds.
* LICENSE: This source file is subject to version 3.0 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_0.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
* @package XML_Feed_Parser
* @author James Stewart <james@jystewart.net>
* @copyright 2005 James Stewart <james@jystewart.net>
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
* @link http://pear.php.net/package/XML_Feed_Parser/
* This abstract class provides some general methods that are likely to be
* implemented exactly the same way for all feed types.
* @package XML_Feed_Parser
* @author James Stewart <james@jystewart.net>
* @version Release: @package_version@
* Where we store our DOM object for this feed
* For iteration we'll want a count of the number of entries
* Where we store our entry objects once instantiated
* Store mappings between entry IDs and their position in the feed
* Proxy to allow use of element names as method names
* We are not going to provide methods for every entry type so this
* function will allow for a lot of mapping. We rely pretty heavily
* on this to handle our mappings between other feed types and atom.
* @param string $call - the method attempted
* @param array $arguments - arguments to that method
function __call($call, $arguments = array ())
if (isset ($this->compatMap[$call])) {
$tempMap = $this->compatMap;
/* To be helpful, we allow a case-insensitive search for this method */
if (! isset ($this->map[$call])) {
if (empty ($this->map[$call])) {
$method = 'get' . $this->map[$call][0 ];
if ($method == 'getLink') {
$offset = empty ($arguments[0 ]) ? 0 : $arguments[0 ];
$attribute = empty ($arguments[1 ]) ? 'href' : $arguments[1 ];
$params = isset ($arguments[2 ]) ? $arguments[2 ] : array ();
return $this->getLink ($offset, $attribute, $params);
return $this->$method($call, $arguments);
public function setSanitizer(XML_Feed_Parser_Sanitizer $sanitizer) {
foreach ($this->entries as $entry) {
$entry->setSanizier ($sanitizer);
* Proxy to allow use of element names as attribute names
* For many elements variable-style access will be desirable. This function
* @param string $value - the variable required
return $this->__call($value, array ());
* Utility function to help us resolve xml:base values
* We have other methods which will traverse the DOM and work out the different
* xml:base declarations we need to be aware of. We then need to combine them.
* If a declaration starts with a protocol then we restart the string. If it
* starts with a / then we add on to the domain name. Otherwise we simply tag
* @param string $base - the base to add the link to
/* Extract domain and suffix link to that */
preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
$firstLayer = $results[0 ];
return $firstLayer . "/" . $link;
/* Step up link to find place to be */
for ($i = 0; $i <= $count; $i++ ) {
return implode("/", $url) . "/" . $suffix;
/* Just stick it on the end */
* Determine whether we need to apply our xml:base rules
* Gets us the xml:base data and then processes that with regard
* Get an entry by its position in the feed, starting from zero
* As well as allowing the items to be iterated over we want to allow
* users to be able to access a specific entry. This is one of two ways of
* doing that, the other being by ID.
* @return XML_Feed_Parser_RSS1Element
if (! isset ($this->entries[$offset])) {
$entries = $this->model->getElementsByTagName ($this->itemElement);
if ($entries->length > $offset) {
$xmlBase = $entries->item ($offset)->baseURI;
/** @todo Remove this behaviour - each driver should control this better */
/** @todo Try to avoid new here */
$this->entries[$offset] = new $this->itemClass (
$entries->item ($offset), $this, $xmlBase);
if ($id = $this->entries[$offset]->id ) {
* Return a date in seconds since epoch.
* Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
* is the number of seconds since 1970-01-01 00:00:00.
* @link http://php.net/strtotime
* @param string $method The name of the date construct we want
* @param array $arguments Included for compatibility with our __call usage
* @return int|falsedatetime
protected function getDate($method, $arguments)
$time = $this->model->getElementsByTagName ($method);
if ($time->length == 0 || empty ($time->item (0 )->nodeValue )) {
* @param string $method The name of the text construct we want
* @param array $arguments Included for compatibility with our __call usage
protected function getText($method, $arguments = array ())
$tags = $this->model->getElementsByTagName ($method);
$value = $tags->item (0 )->nodeValue;
* Apply various rules to retrieve category data.
* There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
* and Atom. Instead the usual approach is to use the dublin core namespace to
* declare categories. For example delicious use both:
* <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
* <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
* to declare a categorisation of 'PEAR'.
* We need to be sensitive to this where possible.
* @param string $call for compatibility with our overloading
* @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array
* @return string|array|false
$categories = $this->model->getElementsByTagName ('subject');
$offset = empty ($arguments[0 ]) ? 0 : $arguments[0 ];
$array = empty ($arguments[1 ]) ? false : true;
if ($categories->length <= $offset) {
foreach ($categories as $category) {
return $this->sanitizer->sanitize ($categories->item ($offset)->nodeValue );
* Count occurrences of an element
* This function will tell us how many times the element $type
* appears at this level of the feed.
* @param string $type the element we want to get a count of
protected function count($type)
if ($tags = $this->model->getElementsByTagName ($type)) {
* Part of our xml:base processing code
* We need a couple of methods to access XHTML content stored in feeds.
* This is because we dereference all xml:base references before returning
* the element. This method handles the attributes.
* @param DOMElement $node The DOM node we are iterating over
foreach ($node->attributes as $attribute) {
if ($attribute->name == 'src' or $attribute->name == 'href') {
$attribute->value = $this->addBase(htmlentities($attribute->value , NULL , 'utf-8'), $attribute);
if ($attribute->name == 'base') {
$return .= $attribute->name . '="' . htmlentities($attribute->value , NULL , 'utf-8') . '" ';
return ' ' . trim($return);
* Convert HTML entities based on the current character set.
$current_encoding = $node->ownerDocument ->encoding;
// Charset left blank to trigger autodetection
} else if (strtoupper($current_encoding) == 'ISO-8859-1') {
* Part of our xml:base processing code
* We need a couple of methods to access XHTML content stored in feeds.
* This is because we dereference all xml:base references before returning
* the element. This method recurs through the tree descending from the node
* @param DOMElement $node The DOM node we are processing
/* Add the opening of this node to the content */
if ($node instanceof DOMElement ) {
$content .= '<' . $node->tagName .
if ($node->hasChildNodes ()) {
foreach ($node->childNodes as $child) {
if ($node instanceof DOMText ) {
/* Add the closing of this node to the content */
if ($node instanceof DOMElement ) {
$content .= '</' . $node->tagName . '>';
* Get content from RSS feeds (atom has its own implementation)
* The official way to include full content in an RSS1 entry is to use
* the content module's element 'encoded', and RSS2 feeds often duplicate that.
* Often, however, the 'description' element is used instead. We will offer that
* as a fallback. Atom uses its own approach and overrides this method.
$options = array ('encoded', 'description');
foreach ($options as $element) {
$test = $this->model->getElementsByTagName ($element);
if ($test->length == 0 ) {
if ($test->item (0 )->hasChildNodes ()) {
foreach ($test->item (0 )->childNodes as $child) {
if ($child instanceof DOMText ) {
$value .= $this->sanitizer->sanitize ($child->nodeValue );
$value .= $simple->asXML ();
} else if ($test->length > 0 ) {
return $this->sanitizer->sanitize ($test->item (0 )->nodeValue );
* Checks if this element has a particular child element.
function hasKey($name, $offset = 0 )
$search = $this->model->getElementsByTagName ($name);
return $search->length > $offset;
* Return an XML serialization of the feed, should it be required. Most
* users however, will already have a serialization that they used when
* instantiating the object.
* @return string XML serialization of element
* Get directory holding RNG schemas. Method is based on that
* found in Contact_AddressBook.
* @return string PEAR data directory.
require_once 'PEAR/Config.php';
$config = new PEAR_Config;
return $config->get ('data_dir') . '/XML_Feed_Parser/schemas';
$dir = self ::getSchemaDir ();
$path = $dir . '/' . $this->relax;
return $this->model->relaxNGValidate ($path);
Documentation generated on Mon, 11 Mar 2019 15:47:22 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|