Source for file mimeDecode.php
Documentation is available at mimeDecode.php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
// +-----------------------------------------------------------------------+
// | Copyright (c) 2002-2003 Richard Heyes |
// | Copyright (c) 2003-2005 The PHP Group |
// | All rights reserved. |
// | Redistribution and use in source and binary forms, with or without |
// | modification, are permitted provided that the following conditions |
// | o Redistributions of source code must retain the above copyright |
// | notice, this list of conditions and the following disclaimer. |
// | o Redistributions in binary form must reproduce the above copyright |
// | notice, this list of conditions and the following disclaimer in the |
// | documentation and/or other materials provided with the distribution.|
// | o The names of the authors may not be used to endorse or promote |
// | products derived from this software without specific prior written |
// | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
// | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
// | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
// | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
// | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
// | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
// | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
// | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
// | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
// | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// +-----------------------------------------------------------------------+
// | Author: Richard Heyes <richard@phpguru.org> |
// +-----------------------------------------------------------------------+
* +----------------------------- IMPORTANT ------------------------------+
* | Usage of this class compared to native php extensions such as |
* | mailparse or imap, is slow and may be feature deficient. If available|
* | you are STRONGLY recommended to use the php extensions. |
* +----------------------------------------------------------------------+
* This class will parse a raw mime email and return
* the structure. Returned structure is similar to
* that returned by imap_fetchstructure().
* USAGE: (assume $input is your raw email)
* $decode = new Mail_mimeDecode($input, "\r\n");
* $structure = $decode->decode();
* $params['input'] = $input;
* $structure = Mail_mimeDecode::decode($params);
* o Implement multipart/appledouble
> 4. We have also found a solution for decoding the UTF-8
> headers. Therefore I made the following function:
> function decode_utf8($txt) {
> $txt=strtr($txt,$trans);
> return(utf8_decode($txt));
> And I have inserted the following line to the class:
> if (strtolower($charset)=="utf-8") $text=decode_utf8($text);
> ... before the following one in the "_decodeHeader" function:
> $input = str_replace($encoded, $text, $input);
> This way from now on it can easily decode the UTF-8 headers too.
* @author Richard Heyes <richard@phpguru.org>
* @version $Revision: 1.46 $
* The raw email to decode
* The header part of the input
* The body part of the input
* If an error occurs, this is used to store the message
* Flag to determine whether to include bodies in the
* Flag to determine whether to decode bodies
* Flag to determine whether to decode headers
* Sets up the object, initialise the variables, and splits and
* stores the header and body of the input.
* @param string The input to decode
list ($header, $body) = $this->_splitBodyHeader ($input);
$this->_header = $header;
$this->_decode_bodies = false;
$this->_include_bodies = true;
* Begins the decoding process. If called statically
* it will create an object and call the decode() method
* @param array An array of various parameters that determine
* include_bodies - Whether to include the body in the returned
* decode_bodies - Whether to decode the bodies
* of the parts. (Transfer encoding)
* decode_headers - Whether to decode headers
* input - If called statically, this will be treated
* @return object Decoded results
function decode($params = null )
// determine if this method has been called statically
$isStatic = !(isset ($this) && get_class($this) == __CLASS__ );
// Have we been called statically?
// If so, create an object and pass details to that.
if ($isStatic AND isset ($params['input'])) {
$structure = $obj->decode ($params);
// Called statically but no input
return PEAR ::raiseError ('Called statically and no input given');
$this->_include_bodies = isset ($params['include_bodies']) ?
$params['include_bodies'] : false;
$this->_decode_bodies = isset ($params['decode_bodies']) ?
$params['decode_bodies'] : false;
$this->_decode_headers = isset ($params['decode_headers']) ?
$params['decode_headers'] : false;
$structure = $this->_decode ($this->_header, $this->_body);
if ($structure === false ) {
$structure = $this->raiseError ($this->_error);
* Performs the decoding. Decodes the body string passed to it
* If it finds certain content-types it will call itself in a
* @param string Header section
* @param string Body section
* @return object Results of decoding process
function _decode ($headers, $body, $default_ctype = 'text/plain')
$return->headers = array ();
$headers = $this->_parseHeaders ($headers);
foreach ($headers as $value) {
$return->headers [strtolower($value['name'])][] = $value['value'];
} elseif (isset ($return->headers [strtolower($value['name'])])) {
$return->headers [strtolower($value['name'])][] = $value['value'];
$return->headers [strtolower($value['name'])] = $value['value'];
while (list ($key, $value) = each($headers)) {
$headers[$key]['name'] = strtolower($headers[$key]['name']);
switch ($headers[$key]['name']) {
$content_type = $this->_parseHeaderValue ($headers[$key]['value']);
if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
$return->ctype_primary = $regs[1 ];
$return->ctype_secondary = $regs[2 ];
if (isset ($content_type['other'])) {
while (list ($p_name, $p_value) = each($content_type['other'])) {
$return->ctype_parameters [$p_name] = $p_value;
case 'content-disposition':
$content_disposition = $this->_parseHeaderValue ($headers[$key]['value']);
$return->disposition = $content_disposition['value'];
if (isset ($content_disposition['other'])) {
while (list ($p_name, $p_value) = each($content_disposition['other'])) {
$return->d_parameters [$p_name] = $p_value;
case 'content-transfer-encoding':
$content_transfer_encoding = $this->_parseHeaderValue ($headers[$key]['value']);
if (isset ($content_type)) {
$encoding = isset ($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
$this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody ($body, $encoding) : $body) : null;
$encoding = isset ($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
$this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody ($body, $encoding) : $body) : null;
case 'multipart/parallel':
case 'multipart/report': // RFC1892
case 'multipart/signed': // PGP
case 'multipart/alternative':
case 'multipart/related':
if(!isset ($content_type['other']['boundary'])){
$this->_error = 'No boundary found for ' . $content_type['value'] . ' part';
$default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' : 'text/plain';
$parts = $this->_boundarySplit ($body, $content_type['other']['boundary']);
for ($i = 0; $i < count($parts); $i++ ) {
list ($part_header, $part_body) = $this->_splitBodyHeader ($parts[$i]);
$part = $this->_decode ($part_header, $part_body, $default_ctype);
$part = $this->raiseError ($this->_error);
$return->parts [] = $part;
$return->parts [] = $obj->decode (array ('include_bodies' => $this->_include_bodies,
'decode_bodies' => $this->_decode_bodies,
'decode_headers' => $this->_decode_headers));
if(!isset ($content_transfer_encoding['value']))
$content_transfer_encoding['value'] = '7bit';
$this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody ($body, $content_transfer_encoding['value']) : $body) : null;
$ctype = explode('/', $default_ctype);
$return->ctype_primary = $ctype[0 ];
$return->ctype_secondary = $ctype[1 ];
$this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody ($body) : $body) : null;
* Given the output of the above function, this will return an
* array of references to the parts, indexed by mime number.
* @param object $structure The structure to go through
* @param string $mime_number Internal use only.
* @return array Mime numbers
function &getMimeNumbers(&$structure, $no_refs = false , $mime_number = '', $prepend = '')
if (!empty ($structure->parts )) {
if ($mime_number != '') {
$structure->mime_id = $prepend . $mime_number;
$return[$prepend . $mime_number] = &$structure;
for ($i = 0; $i < count($structure->parts ); $i++ ) {
if (!empty ($structure->headers ['content-type']) AND substr(strtolower($structure->headers ['content-type']), 0 , 8 ) == 'message/') {
$prepend = $prepend . $mime_number . '.';
$_mime_number = ($mime_number == '' ? $i + 1 : sprintf('%s.%s', $mime_number, $i + 1 ));
foreach ($arr as $key => $val) {
$no_refs ? $return[$key] = '' : $return[$key] = &$arr[$key];
if ($mime_number == '') {
$structure->mime_id = $prepend . $mime_number;
$no_refs ? $return[$prepend . $mime_number] = '' : $return[$prepend . $mime_number] = &$structure;
* Given a string containing a header and body
* section, this function will split them (at the first
* blank line) and return them.
* @param string Input to split apart
* @return array Contains header and body section
function _splitBodyHeader ($input)
if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
return array ($match[1 ], $match[2 ]);
$this->_error = 'Could not split header and body';
* Parse headers given in $input and return
* @param string Headers to parse
* @return array Contains parsed headers
function _parseHeaders ($input)
foreach ($headers as $value) {
$hdr_value = substr($value, $pos+1 );
$hdr_value = substr($hdr_value, 1 );
'value' => $this->_decode_headers ? $this->_decodeHeader ($hdr_value) : $hdr_value
* Function to parse a header value,
* extract first part, and any secondary
* parts (after ;) This function is not as
* robust as it could be. Eg. header comments
* in the wrong place will probably break it.
* @param string Header value to parse
* @return array Contains parsed result
function _parseHeaderValue ($input)
if (($pos = strpos($input, ';')) !== false ) {
// This splits on a semi-colon, if there's no preceeding backslash
// Now works with quoted values; had to glue the \; breaks in PHP
// the regex is already bordering on incomprehensible
$splitRegex = '/([^;\'"]*[\'"]([^\'"]*([^\'"]*)*)[\'"][^;\'"]*|([^;]+))(;|$)/';
for ($i=0; $i< count($matches[0 ]); $i++ ) {
$param = $matches[0 ][$i];
while (substr($param, -2 ) == '\;') {
$param .= $matches[0 ][++ $i];
for ($i = 0; $i < count($parameters); $i++ ) {
$param_name = trim(substr($parameters[$i], 0 , $pos = strpos($parameters[$i], '=')), "'\";\t\\ ");
if ($param_value[0 ] == '"') {
$param_value = substr($param_value, 1 , -1 );
$return['other'][$param_name] = $param_value;
$return['other'][strtolower($param_name)] = $param_value;
$return['value'] = trim($input);
* This function splits the input based
* @param string Input to parse
* @return array Contains array of resulting mime parts
function _boundarySplit ($input, $boundary)
$bs_possible = substr($boundary, 2 , -2 );
$bs_check = '\"' . $bs_possible . '\"';
if ($boundary == $bs_check) {
$boundary = $bs_possible;
$tmp = explode('--' . $boundary, $input);
for ($i = 1; $i < count($tmp) - 1; $i++ ) {
* Given a header, this function will decode it
* according to RFC2047. Probably not *exactly*
* conformant, but it does pass all the given
* @param string Input header value to decode
* @return string Decoded header value
function _decodeHeader ($input)
// Remove white space between encoded-words
$input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
// For each encoded-word...
while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
foreach($matches[1 ] as $value)
* Given a body string and an encoding type,
* this function will decode and return it.
* @param string Input body to decode
* @param string Encoding type to use.
* @return string Decoded body
function _decodeBody ($input, $encoding = '7bit')
return $this->_quotedPrintableDecode ($input);
* Given a quoted-printable string, this
* function will decode and return it.
* @param string Input body to decode
* @return string Decoded body
function _quotedPrintableDecode ($input)
// Remove soft line breaks
// Replace encoded characters
$input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);
* Checks the input for uuencoded files and returns
* an array of them. Can be called statically, eg:
* $files =& Mail_mimeDecode::uudecode($some_text);
* It will check for the begin 666 ... end syntax
* however and won't just blindly decode whatever you
* @param string Input body to look for attahcments in
* @return array Decoded bodies, filenames and permissions
// Find all uuencoded sections
preg_match_all("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $input, $matches);
for ($j = 0; $j < count($matches[3 ]); $j++ ) {
$filename = $matches[2 ][$j];
$fileperm = $matches[1 ][$j];
for ($i = 0; $i < $strlen; $i++ ) {
$len=(int) (((ord(substr($str[$i],0 ,1 )) -32 ) - ' ') & 077 );
while (($d + 3 <= $len) AND ($pos + 4 <= strlen($str[$i]))) {
$c1 = (ord(substr($str[$i],$pos+1 ,1 )) ^ 0x20 );
$c2 = (ord(substr($str[$i],$pos+2 ,1 )) ^ 0x20 );
$c3 = (ord(substr($str[$i],$pos+3 ,1 )) ^ 0x20 );
$file .= chr(((($c0 - ' ') & 077 ) << 2 ) | ((($c1 - ' ') & 077 ) >> 4 ));
$file .= chr(((($c1 - ' ') & 077 ) << 4 ) | ((($c2 - ' ') & 077 ) >> 2 ));
$file .= chr(((($c2 - ' ') & 077 ) << 6 ) | (($c3 - ' ') & 077 ));
if (($d + 2 <= $len) && ($pos + 3 <= strlen($str[$i]))) {
$c1 = (ord(substr($str[$i],$pos+1 ,1 )) ^ 0x20 );
$c2 = (ord(substr($str[$i],$pos+2 ,1 )) ^ 0x20 );
$file .= chr(((($c0 - ' ') & 077 ) << 2 ) | ((($c1 - ' ') & 077 ) >> 4 ));
$file .= chr(((($c1 - ' ') & 077 ) << 4 ) | ((($c2 - ' ') & 077 ) >> 2 ));
if (($d + 1 <= $len) && ($pos + 2 <= strlen($str[$i]))) {
$c1 = (ord(substr($str[$i],$pos+1 ,1 )) ^ 0x20 );
$file .= chr(((($c0 - ' ') & 077 ) << 2 ) | ((($c1 - ' ') & 077 ) >> 4 ));
$files[] = array ('filename' => $filename, 'fileperm' => $fileperm, 'filedata' => $file);
* getSendArray() returns the arguments required for Mail::send()
* used to build the arguments for a mail::send() call
* $mailtext = Full email (for example generated by a template)
* $decoder = new Mail_mimeDecode($mailtext);
* $parts = $decoder->getSendArray();
* if (!PEAR::isError($parts) {
* list($recipents,$headers,$body) = $parts;
* $mail = Mail::factory('smtp');
* $mail->send($recipents,$headers,$body);
* @return mixed array of recipeint, headers,body or Pear_Error
* @author Alan Knowles <alan@akbkhome.com>
// prevent warning if this is not set
$this->_decode_headers = FALSE;
$headerlist = $this->_parseHeaders ($this->_header);
return $this->raiseError ("Message did not contain headers");
foreach($headerlist as $item) {
$header[$item['name']] = $item['value'];
$to = ",". $item['value'];
return $this->raiseError ("Message did not contain any recipents");
return array ($to,$header,$this->_body);
* Returns a xml copy of the output of
* Mail_mimeDecode::decode. Pass the output in as the
* argument. This function can be called statically. Eg:
* $output = $obj->decode();
* $xml = Mail_mimeDecode::getXML($output);
* The DTD used for this should have been in the package. Or
* alternatively you can get it from cvs, or here:
* http://www.phpguru.org/xmail/xmail.dtd.
* @param object Input to convert to xml. This should be the
* output of the Mail_mimeDecode::decode function
* @return string XML version of input
$output = '<?xml version=\'1.0\'?>' . $crlf .
'<!DOCTYPE email SYSTEM "http://www.phpguru.org/xmail/xmail.dtd">' . $crlf .
* Function that does the actual conversion to xml. Does a single
* @param object Input to convert to xml. This is a mimepart object.
* It may or may not contain subparts.
* @param integer Number of tabs to indent
* @return string XML version of input
function _getXML ($input, $indent = 1 )
$headers = @(array) $input->headers;
foreach ($headers as $hdr_name => $hdr_value) {
// Multiple headers with this name
for ($i = 0; $i < count($hdr_value); $i++ ) {
$output .= Mail_mimeDecode::_getXML_helper ($hdr_name, $hdr_value[$i], $indent);
// Only one header of this sort
$output .= Mail_mimeDecode::_getXML_helper ($hdr_name, $hdr_value, $indent);
if (!empty ($input->parts )) {
for ($i = 0; $i < count($input->parts ); $i++ ) {
$output .= $crlf . str_repeat($htab, $indent) . '<mimepart>' . $crlf .
str_repeat($htab, $indent) . '</mimepart>' . $crlf;
} elseif (isset ($input->body )) {
$output .= $crlf . str_repeat($htab, $indent) . '<body><![CDATA[' .
$input->body . ']]></body>' . $crlf;
* Helper function to _getXML(). Returns xml of a header.
* @param string Name of header
* @param string Value of header
* @param integer Number of tabs to indent
* @return string XML version of input
function _getXML_helper ($hdr_name, $hdr_value, $indent)
$new_hdr_value = ($hdr_name != 'received') ? Mail_mimeDecode::_parseHeaderValue ($hdr_value) : array ('value' => $hdr_value);
// Sort out any parameters
if (!empty ($new_hdr_value['other'])) {
foreach ($new_hdr_value['other'] as $paramname => $paramvalue) {
$params[] = str_repeat($htab, $indent) . $htab . '<parameter>' . $crlf .
str_repeat($htab, $indent) . $htab . '</parameter>' . $crlf;
$return = str_repeat($htab, $indent) . '<header>' . $crlf .
Documentation generated on Mon, 11 Mar 2019 14:18:34 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|