Source for file BBCodeParser2.php
Documentation is available at BBCodeParser2.php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +----------------------------------------------------------------------+
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2003 The PHP Group |
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.02 of the PHP license, |
// | that is bundled with this package in the file LICENSE, and is |
// | available at through the world-wide-web at |
// | http://www.php.net/license/2_02.txt. |
// | If you did not receive a copy of the PHP license and are unable to |
// | obtain it through the world-wide-web, please send a note to |
// | license@php.net so we can mail you a copy immediately. |
// +----------------------------------------------------------------------+
// | Author: Stijn de Reede <sjr@gmx.co.uk> |
// +----------------------------------------------------------------------+
* @package HTML_BBCodeParser2
* @author Stijn de Reede <sjr@gmx.co.uk>
* This is a parser to replace UBB style tags with their html equivalents. It
* does not simply do some regex calls, but is complete stack based
* parse engine. This ensures that all tags are properly nested, if not,
* extra tags are added to maintain the nesting. This parser should only produce
* xhtml 1.0 compliant code. All tags are validated and so are all their attributes.
* It should be easy to extend this parser with your own tags, see the _definedTags
* format description below.
* $parser = new HTML_BBCodeParser2($options = array(...));
* $parser->setText('normal [b]bold[/b] and normal again');
* echo $parser->getParsed();
* $parser = new HTML_BBCodeParser2($options = array(...));
* echo $parser->qparse('normal [b]bold[/b] and normal again');
* echo HTML_BBCodeParser2::staticQparse('normal [b]bold[/b] and normal again');
* Setting the options from the ini file:
* $config = parse_ini_file('BBCodeParser.ini', true);
* $options = $config['HTML_BBCodeParser2'];
* The _definedTags variables should be in this format:
* array('tag' // the actual tag used
* => array('htmlopen' => 'open', // the opening tag in html
* 'htmlclose' => 'close', // the closing tag in html,
* can be set to an empty string
* if no closing tag is present
* 'allowed' => 'allow', // tags that are allowed inside
* this tag. Values can be all
* or none, or either of these
* two, followed by a ^ and then
* followed by a comma seperated
* list of exceptions on this
* 'attributes' => array() // an associative array containing
* the tag attributes and their
* printf() html equivalents, to
* which the first argument is
* the value, and the second is
* the quote. Default would be
* 'attr' => 'attr=%2$s%1$s%2$s'
* An array of tags parsed by the engine, should be overwritten by filters
var $_definedTags = array ();
* A string containing the input
* A string containing the preparsed input
* An array tags and texts build from the input text
var $_tagArray = array ();
* A string containing the parsed version of the text
* An array of options, filled by an ini file or through the contructor
'quotestyle' => 'double',
* An array of filters used for parsing
* Constructor, initialises the options and filters
* Sets options to properly escape the tag
* characters in preg_replace() etc.
* All the filters in the options are initialised and their defined tags
* are copied into the private variable _definedTags.
* @param array options to use, can be left out
* @author Stijn de Reede <sjr@gmx.co.uk>
// set the options passed as an argument
foreach ($options as $k => $v ) {
$this->_options[$k] = $v;
// add escape open and close chars to the options for preg escaping
$preg_escape = '\^$.[]|()?*+{}';
if ($this->_options['open'] != '' && strpos($preg_escape, $this->_options['open'])) {
$this->_options['open_esc'] = "\\". $this->_options['open'];
$this->_options['open_esc'] = $this->_options['open'];
if ($this->_options['close'] != '' && strpos($preg_escape, $this->_options['close'])) {
$this->_options['close_esc'] = "\\". $this->_options['close'];
$this->_options['close_esc'] = $this->_options['close'];
// set the options back so that child classes can use them */
$baseoptions = $this->_options;
// return if this is a subclass
// extract the definedTags from subclasses */
* @param string option name
* @param mixed option value
* @author Lorenzo Alberton <l.alberton@quipo.it>
$this->_options[$name] = $value;
* @author Lorenzo Alberton <l.alberton@quipo.it>
$class = 'HTML_BBCodeParser2_Filter_'. $filter;
@include_once 'HTML/BBCodeParser2/Filter/'. $filter. '.php';
throw new InvalidArgumentException (" Failed to load filter $filter" );
$this->_filters[$filter] = new $class;
$this->_filters[$filter]->_definedTags
* Remove an existing filter
* @author Lorenzo Alberton <l.alberton@quipo.it>
unset ($this->_filters[$filter]);
// also remove the related $this->_definedTags for this filter,
$this->_definedTags = array ();
foreach (array_keys($this->_filters) as $filter) {
$this->_filters[$filter]->_definedTags
* @param mixed (array or string)
* @return boolean true if all ok, false if not.
* @author Lorenzo Alberton <l.alberton@quipo.it>
if (strpos($filters, ',') !== false ) {
$filters = array ($filters);
foreach ($filters as $filter) {
* Executes statements before the actual array building starts
* This method should be overwritten in a filter if you want to do
* something before the parsing process starts. This can be useful to
* allow certain short alternative tags which then can be converted into
* proper tags with preg_replace() calls.
* The main class walks through all the filters and and calls this
* method. The filters should modify their private $_preparsed
* variable, with input from $_text.
* @author Stijn de Reede <sjr@gmx.co.uk>
// default: assign _text to _preparsed, to be overwritten by filters
$this->_preparsed = $this->_text;
// return if this is a subclass
// walk through the filters and execute _preparse
foreach ($this->_filters as $filter) {
$filter->setText ($this->_preparsed);
$this->_preparsed = $filter->getPreparsed ();
* Builds the tag array from the input string $_text
* An array consisting of tag and text elements is contructed from the
* $_preparsed variable. The method uses _buildTag() to check if a tag is
* valid and to build the actual tag to be added to the tag array.
* TODO: - rewrite whole method, as this one is old and probably slow
* - see if a recursive method would be better than an iterative one
* @author Stijn de Reede <sjr@gmx.co.uk>
function _buildTagArray ()
$this->_tagArray = array ();
$str = $this->_preparsed;
while (($strPos < $strLength)) {
$openPos = strpos($str, $this->_options['open'], $strPos);
if ($openPos === false ) {
$nextOpenPos = $strLength;
if ($openPos + 1 > $strLength) {
$nextOpenPos = $strLength;
$nextOpenPos = strpos($str, $this->_options['open'], $openPos + 1 );
if ($nextOpenPos === false ) {
$nextOpenPos = $strLength;
$closePos = strpos($str, $this->_options['close'], $strPos);
if ($closePos === false ) {
$closePos = $strLength + 1;
if ($openPos == $strPos) {
if (($nextOpenPos < $closePos)) {
// new open tag before closing tag: treat as text
$tag['text'] = substr($str, $strPos, $nextOpenPos - $strPos);
$newTag = $this->_buildTag (substr($str, $strPos, $closePos - $strPos + 1 ));
if (($newTag !== false )) {
// no valid tag after all
$tag['text'] = substr($str, $strPos, $closePos - $strPos + 1 );
$tag['text'] = substr($str, $strPos, $openPos - $strPos);
// join 2 following text elements
if ($tag['type'] === 0 && isset ($prev) && $prev['type'] === 0 ) {
$tag['text'] = $prev['text']. $tag['text'];
$this->_tagArray[] = $tag;
* Builds a tag from the input string
* This method builds a tag array based on the string it got as an
* argument. If the tag is invalid, <false> is returned. The tag
* attributes are extracted from the string and stored in the tag
* array as an associative array.
* @param string string to build tag from
* @return array tag in array format
* @author Stijn de Reede <sjr@gmx.co.uk>
$tag = array ('text' => $str, 'attributes' => array ());
if (substr($str, 1 , 1 ) == '/') { // closing tag
return false; // nope, it's not valid
return false; // nope, it's not valid
// tnx to Onno for the regex
// split the tag with arguments and all
$oe = $this->_options['open_esc'];
$ce = $this->_options['close_esc'];
if (preg_match(" !$oe([a-z0-9]+)[^$ce]*$ce!i" , $str, $tagArray) == 0 ) {
return false; // nope, it's not valid
// tnx to Onno for the regex
// validate the arguments
$attributeArray = array ();
$regex = " ![\s$oe]([a-z0-9]+)=(\"[^\s$ce]+\"|[^\s$ce]";
if ($tag['tag'] != 'url') {
$regex .= " +)(?=[\s$ce])!i";
foreach ($attributeArray as $attribute) {
if ($attribute[2 ][0 ] == '"' && $attribute[2 ][strlen($attribute[2 ])-1 ] == '"') {
$tag['attributes'][$attNam] = substr($attribute[2 ], 1 , -1 );
$tag['attributes'][$attNam] = $attribute[2 ];
* Validates the tag array, regarding the allowed tags
* While looping through the tag array, two following text tags are
* joined, and it is checked that the tag is allowed inside the
* By remembering what tags have been opened it is checked that
* there is correct (xml compliant) nesting.
* In the end all still opened tags are closed.
* @author Stijn de Reede <sjr@gmx.co.uk>, Seth Price <seth@pricepages.org>
function _validateTagArray ()
foreach ($this->_tagArray as $tag) {
$prevTag = end($newTagArray);
if (($child = $this->_childNeeded (end($openTags), 'text')) &&
* No idea what to do in this case: A child is needed, but
* no valid one is returned. We'll ignore it here and live
* with it until someone reports a valid bug.
if (trim($tag['text']) == '') {
//just an empty indentation or newline without value?
$openTags[] = $child['tag'];
if ($prevTag['type'] === 0 ) {
$tag['text'] = $prevTag['text']. $tag['text'];
if (!$this->_isAllowed (end($openTags), $tag['tag']) ||
($parent = $this->_parentNeeded (end($openTags), $tag['tag'])) === true ||
($child = $this->_childNeeded (end($openTags), $tag['tag'])) === true ) {
if ($prevTag['type'] === 0 ) {
$tag['text'] = $prevTag['text']. $tag['text'];
* Avoid use of parent if we can help it. If we are
* trying to insert a new parent, but the current tag is
* the same as the previous tag, then assume that the
* previous tag structure is valid, and add this tag as
* a sibling. To add as a sibling, we need to close the
if ($tag['tag'] == end($openTags)){
$newTagArray[] = $this->_buildTag ('[/'. $tag['tag']. ']');
$newTagArray[] = $parent;
$openTags[] = $parent['tag'];
$openTags[] = $child['tag'];
$openTags[] = $tag['tag'];
if (($tag['tag'] == end($openTags) || $this->_isAllowed (end($openTags), $tag['tag']))) {
while (end($openTags) != $tag['tag']) {
$newTagArray[] = $this->_buildTag ('[/'. end($openTags). ']');
$tmpOpenTags[] = end($openTags);
/* why is this here? it just seems to break things
* (nested lists where closing tags need to be
while (end($tmpOpenTags)) {
$tmpTag = $this->_buildTag('['.end($tmpOpenTags).']');
$newTagArray[] = $tmpTag;
$openTags[] = $tmpTag['tag'];
if ($prevTag['type'] === 0 ) {
$tag['text'] = $prevTag['text']. $tag['text'];
$newTagArray[] = $this->_buildTag ('[/'. end($openTags). ']');
$this->_tagArray = $newTagArray;
* Checks to see if a parent is needed
* Checks to see if the current $in tag has an appropriate parent. If it
* does, then it returns false. If a parent is needed, then it returns the
* first tag in the list to add to the stack.
* @param array tag that is on the outside
* @param array tag that is on the inside
* @return boolean false if not needed, tag if needed, true if out
* @see _validateTagArray()
* @author Seth Price <seth@pricepages.org>
function _parentNeeded ($out, $in)
if (!isset ($this->_definedTags[$in]['parent']) ||
($this->_definedTags[$in]['parent'] == 'all')
$ar = explode('^', $this->_definedTags[$in]['parent']);
//Create a tag from the first one on the list
return $this->_buildTag ('['. $tags[0 ]. ']');
if ($ar[0 ] == 'all' && $out && !in_array($out, $tags)) {
// Tag is needed, we don't know which one. We could make something up,
// but it would be so random, I think that it would be worthless.
* Checks to see if a child is needed
* Checks to see if the current $out tag has an appropriate child. If it
* does, then it returns false. If a child is needed, then it returns the
* first tag in the list to add to the stack.
* @param array tag that is on the outside
* @param array tag that is on the inside
* @return boolean false if not needed, tag if needed, true if out
* @see _validateTagArray()
* @author Seth Price <seth@pricepages.org>
function _childNeeded ($out, $in)
if (!isset ($this->_definedTags[$out]['child']) ||
($this->_definedTags[$out]['child'] == 'all')
$ar = explode('^', $this->_definedTags[$out]['child']);
//Create a tag from the first one on the list
return $this->_buildTag ('['. $tags[0 ]. ']');
if ($ar[0 ] == 'all' && $in && !in_array($in, $tags)) {
// Tag is needed, we don't know which one. We could make something up,
// but it would be so random, I think that it would be worthless.
* Checks to see if a tag is allowed inside another tag
* The allowed tags are extracted from the private _definedTags array.
* @param array tag that is on the outside
* @param array tag that is on the inside
* @return boolean return true if the tag is allowed, false
* @see _validateTagArray()
* @author Stijn de Reede <sjr@gmx.co.uk>
function _isAllowed ($out, $in)
if (!$out || ($this->_definedTags[$out]['allowed'] == 'all')) {
if ($this->_definedTags[$out]['allowed'] == 'none') {
$ar = explode('^', $this->_definedTags[$out]['allowed']);
if ($ar[0 ] == 'none' && in_array($in, $tags)) {
if ($ar[0 ] == 'all' && in_array($in, $tags)) {
* Builds a parsed string based on the tag array
* The correct html and attribute values are extracted from the private
* @author Stijn de Reede <sjr@gmx.co.uk>
function _buildParsedString ()
foreach ($this->_tagArray as $tag) {
$this->_parsed .= $tag['text'];
$this->_parsed .= '<'. $this->_definedTags[$tag['tag']]['htmlopen'];
if ($this->_options['quotestyle'] == 'single') $q = "'";
if ($this->_options['quotestyle'] == 'double') $q = '"';
foreach ($tag['attributes'] as $a => $v) {
//prevent XSS attacks. IMHO this is not enough, though...
//@see http://pear.php.net/bugs/bug.php?id=5609
$v = preg_replace('#(script|about|applet|activex|chrome):#is', "\\1:", $v);
if (($this->_options['quotewhat'] == 'nothing') ||
(($this->_options['quotewhat'] == 'strings') && is_numeric($v))
$this->_parsed .= ' '. sprintf($this->_definedTags[$tag['tag']]['attributes'][$a], $v, '');
$this->_parsed .= ' '. sprintf($this->_definedTags[$tag['tag']]['attributes'][$a], $v, $q);
if ($this->_definedTags[$tag['tag']]['htmlclose'] == '' && $this->_options['xmlclose']) {
if ($this->_definedTags[$tag['tag']]['htmlclose'] != '') {
$this->_parsed .= '</'. $this->_definedTags[$tag['tag']]['htmlclose']. '>';
* Sets text in the object to be parsed
* @param string the text to set in the object
* @author Stijn de Reede <sjr@gmx.co.uk>
* Gets the unparsed text from the object
* @return string the text set in the object
* @author Stijn de Reede <sjr@gmx.co.uk>
* Gets the preparsed text from the object
* @return string the text set in the object
* @author Stijn de Reede <sjr@gmx.co.uk>
return $this->_preparsed;
* Gets the parsed text from the object
* @return string the parsed text set in the object
* @author Stijn de Reede <sjr@gmx.co.uk>
* Parses the text set in the object
* @see _validateTagArray()
* @see _buildParsedString()
* @author Stijn de Reede <sjr@gmx.co.uk>
$this->_validateTagArray ();
$this->_buildParsedString ();
* Quick method to do setText(), parse() and getParsed at once
* @author Stijn de Reede <sjr@gmx.co.uk>
* Quick static method to do setText(), parse() and getParsed at once
* @author Stijn de Reede <sjr@gmx.co.uk>
Documentation generated on Mon, 11 Mar 2019 15:51:51 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|