Source for file RFC822.php
Documentation is available at RFC822.php
// +-----------------------------------------------------------------------+
// | Copyright (c) 2001-2002, Richard Heyes |
// | All rights reserved. |
// | Redistribution and use in source and binary forms, with or without |
// | modification, are permitted provided that the following conditions |
// | o Redistributions of source code must retain the above copyright |
// | notice, this list of conditions and the following disclaimer. |
// | o Redistributions in binary form must reproduce the above copyright |
// | notice, this list of conditions and the following disclaimer in the |
// | documentation and/or other materials provided with the distribution.|
// | o The names of the authors may not be used to endorse or promote |
// | products derived from this software without specific prior written |
// | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
// | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
// | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
// | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
// | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
// | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
// | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
// | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
// | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
// | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// +-----------------------------------------------------------------------+
// | Authors: Richard Heyes <richard@phpguru.org> |
// | Chuck Hagenbuch <chuck@horde.org> |
// +-----------------------------------------------------------------------+
* RFC 822 Email address list validation Utility
* This class will take an address string, and parse it into it's consituent
* parts, be that either addresses, groups, or combinations. Nested groups
* are not supported. The structure it returns is pretty straight forward,
* and is similar to that provided by the imap_rfc822_parse_adrlist(). Use
* print_r() to view the structure.
* $address_string = 'My Group: "Richard" <richard@localhost> (A comment), ted@example.com (Ted Bloggs), Barney;';
* $structure = Mail_RFC822::parseAddressList($address_string, 'example.com', true)
* @author Richard Heyes <richard@phpguru.org>
* @author Chuck Hagenbuch <chuck@horde.org>
* @version $Revision: 1.23 $
* The address being parsed by the RFC822 object.
* The default domain to use for unqualified addresses.
* @var string $default_domain
* Should we return a nested array showing groups, or flatten everything?
* @var boolean $nestGroups
* Whether or not to validate atoms for non-ascii characters.
* The array of raw addresses built up as we parse.
* The final array of parsed address information that we build up.
* The current error message, if any.
* An internal counter/pointer.
* The number of groups that have been found in the address list.
* @var integer $num_groups
* A variable so that we can tell whether or not we're inside a
* @var boolean $mailRFC822
* A limit after which processing stops
* Sets up the object. The address must either be set here or when
* calling parseAddressList(). One or the other.
* @param string $address The address(es) to validate.
* @param string $default_domain Default domain/host etc. If not supplied, will be set to localhost.
* @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
* @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
* @return object Mail_RFC822 A new Mail_RFC822 object.
function Mail_RFC822($address = null , $default_domain = null , $nest_groups = null , $validate = null , $limit = null )
if (isset ($address)) $this->address = $address;
if (isset ($nest_groups)) $this->nestGroups = $nest_groups;
if (isset ($validate)) $this->validate = $validate;
if (isset ($limit)) $this->limit = $limit;
* Starts the whole process. The address must either be set here
* or when creating the object. One or the other.
* @param string $address The address(es) to validate.
* @param string $default_domain Default domain/host etc.
* @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
* @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
* @return array A structured array of addresses.
function parseAddressList($address = null , $default_domain = null , $nest_groups = null , $validate = null , $limit = null )
$obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit);
return $obj->parseAddressList ();
if (isset ($address)) $this->address = $address;
if (isset ($nest_groups)) $this->nestGroups = $nest_groups;
if (isset ($validate)) $this->validate = $validate;
if (isset ($limit)) $this->limit = $limit;
// Unfold any long lines in $this->address.
return PEAR ::raiseError ($this->error);
// Validate each address individually. If we encounter an invalid
// address, stop iterating and return an error immediately.
$valid = $this->_validateAddress ($address);
if ($valid === false || isset ($this->error)) {
return PEAR ::raiseError ($this->error);
* Splits an address into separate addresses.
* @param string $address The addresses to split.
* @return boolean Success or failure.
function _splitAddresses ($address)
if ($this->_isGroup ($address) && !isset ($this->error)) {
} elseif (!isset ($this->error)) {
} elseif (isset ($this->error)) {
// Split the string based on the above ten or so lines.
$parts = explode($split_char, $address);
$string = $this->_splitCheck ($parts, $split_char);
// If $string does not contain a colon outside of
// brackets/quotes etc then something's fubar.
// First check there's a colon at all:
if (strpos($string, ':') === false ) {
$this->error = 'Invalid address: ' . $string;
// Now check it's outside of brackets/quotes:
if (!$this->_splitCheck (explode(':', $string), ':')) {
// We must have a group at this point, so increase the counter:
// $string now contains the first full address/group.
// Add to the addresses array.
'address' => trim($string),
// Remove the now stored address from the initial line, the +1
// is to account for the explode character.
// If the next char is a comma and this was a group, then
// there are more addresses, otherwise, if there are any more
// chars, then there is another address.
if ($is_group && substr($address, 0 , 1 ) == ','){
} elseif (strlen($address) > 0 ) {
// If you got here then something's off
* Checks for a group at the start of the string.
* @param string $address The address to check.
* @return boolean Whether or not there is a group at the start of the string.
function _isGroup ($address)
// First comma not in quotes, angles or escaped:
$string = $this->_splitCheck ($parts, ',');
// Now we have the first address, we can reliably check for a
// group by searching for a colon that's not escaped or in
// quotes or angle brackets.
$string2 = $this->_splitCheck ($parts, ':');
return ($string2 !== $string);
* A common function that will check an exploded string.
* @param array $parts The exloded string.
* @param string $char The char that was exploded on.
* @return mixed False if the string contains unclosed quotes/brackets, or the string on success.
function _splitCheck ($parts, $char)
for ($i = 0; $i < count($parts); $i++ ) {
if ($this->_hasUnclosedQuotes ($string)
|| $this->_hasUnclosedBrackets ($string, '<>')
|| $this->_hasUnclosedBrackets ($string, '[]')
|| $this->_hasUnclosedBrackets ($string, '()')
|| substr($string, -1 ) == '\\') {
if (isset ($parts[$i + 1 ])) {
$string = $string . $char . $parts[$i + 1 ];
$this->error = 'Invalid address spec. Unclosed bracket or quotes';
* Checks if a string has an unclosed quotes or not.
* @param string $string The string to check.
* @return boolean True if there are unclosed quotes inside the string, false otherwise.
function _hasUnclosedQuotes ($string)
$string_cnt = count($string);
for ($i = 0; $i < (count($string) - 1 ); $i++ )
if (substr($string[$i], -1 ) == '\\')
return ($string_cnt % 2 === 0 );
* Checks if a string has an unclosed brackets or not. IMPORTANT:
* This function handles both angle brackets and square brackets;
* @param string $string The string to check.
* @param string $chars The characters to check for.
* @return boolean True if there are unclosed brackets inside the string, false otherwise.
function _hasUnclosedBrackets ($string, $chars)
$this->_hasUnclosedBracketsSub ($string, $num_angle_start, $chars[0 ]);
$this->_hasUnclosedBracketsSub ($string, $num_angle_end, $chars[1 ]);
if ($num_angle_start < $num_angle_end) {
$this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')';
return ($num_angle_start > $num_angle_end);
* Sub function that is used only by hasUnclosedBrackets().
* @param string $string The string to check.
* @param integer &$num The number of occurences.
* @param string $char The character to count.
* @return integer The number of occurences of $char in $string, adjusted for backslashes.
function _hasUnclosedBracketsSub ($string, &$num, $char)
for ($i = 0; $i < count($parts); $i++ ){
if (substr($parts[$i], -1 ) == '\\' || $this->_hasUnclosedQuotes ($parts[$i]))
if (isset ($parts[$i + 1 ]))
$parts[$i + 1 ] = $parts[$i] . $char . $parts[$i + 1 ];
* Function to begin checking the address.
* @param string $address The address to validate.
* @return mixed False on failure, or a structured array of address information on success.
function _validateAddress ($address)
// Get the group part of the name
$parts = explode(':', $address['address']);
$groupname = $this->_splitCheck ($parts, ':');
// And validate the group part of the name.
if (!$this->_validatePhrase ($groupname)){
$this->error = 'Group name did not validate.';
// Don't include groups if we are not nesting
// them. This avoids returning invalid addresses.
$structure = new stdClass;
$structure->groupname = $groupname;
// If a group then split on comma and put into an array.
// Otherwise, Just put the whole address in an array.
while (strlen($address['address']) > 0 ) {
$parts = explode(',', $address['address']);
$addresses[] = $this->_splitCheck ($parts, ',');
$addresses[] = $address['address'];
// Check that $addresses is set, if address like this:
// Then errors were appearing.
$this->error = 'Empty group.';
// Trim the whitespace from all of the address strings.
// Validate each mailbox.
// Format could be one of: name <geezer@domain.com>
// ... or any other format valid by RFC 822.
for ($i = 0; $i < count($addresses); $i++ ) {
if (empty ($this->error)) {
$this->error = 'Validation failed for: ' . $addresses[$i];
$structure->addresses = $addresses;
$structure = $addresses[0 ];
* Function to validate a phrase.
* @param string $phrase The phrase to check.
* @return boolean Success or failure.
function _validatePhrase ($phrase)
// Splits on one or more Tab or space.
$parts = preg_split('/[ \\x09]+/', $phrase, -1 , PREG_SPLIT_NO_EMPTY );
while (count($parts) > 0 ){
$phrase_parts[] = $this->_splitCheck ($parts, ' ');
for ($i = 0; $i < $this->index + 1; $i++ )
foreach ($phrase_parts as $part) {
if (substr($part, 0 , 1 ) == '"') {
if (!$this->_validateQuotedString ($part)) {
// Otherwise it's an atom:
if (!$this->_validateAtom ($part)) return false;
* Function to validate an atom which from rfc822 is:
* atom = 1*<any CHAR except specials, SPACE and CTLs>
* If validation ($this->validate) has been turned off, then
* validateAtom() doesn't actually check anything. This is so that you
* can split a list of addresses up before encoding personal names
* (umlauts, etc.), for example.
* @param string $atom The string to check.
* @return boolean Success or failure.
function _validateAtom ($atom)
// Validation has been turned off; assume the atom is okay.
// Check for any char from ASCII 0 - ASCII 127
if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) {
// Check for control characters (ASCII 0-31):
* Function to validate quoted string, which is:
* quoted-string = <"> *(qtext/quoted-pair) <">
* @param string $qstring The string to check
* @return boolean Success or failure.
function _validateQuotedString ($qstring)
// Leading and trailing "
$qstring = substr($qstring, 1 , -1 );
// Perform check, removing quoted characters first.
* Function to validate a mailbox, which is:
* mailbox = addr-spec ; simple address
* / phrase route-addr ; name and route-addr
* @param string &$mailbox The string to check.
* @return boolean Success or failure.
// Catch any RFC822 comments and store them separately.
$before_comment = $this->_splitCheck ($parts, '(');
if ($before_comment != $_mailbox) {
// First char should be a (.
$comment = $this->_splitCheck ($parts, ')');
// +1 is for the trailing )
foreach ($comments as $comment) {
$mailbox = trim($mailbox);
// Check for name + route-addr
if (substr($mailbox, -1 ) == '>' && substr($mailbox, 0 , 1 ) != '<') {
$name = $this->_splitCheck ($parts, '<');
if ($this->_validatePhrase ($phrase) === false || ($route_addr = $this->_validateRouteAddr ($route_addr)) === false ) {
// First snip angle brackets if present.
if (substr($mailbox, 0 , 1 ) == '<' && substr($mailbox, -1 ) == '>') {
$addr_spec = substr($mailbox, 1 , -1 );
if (($addr_spec = $this->_validateAddrSpec ($addr_spec)) === false ) {
// Construct the object that will be returned.
// Add the phrase (even if empty) and comments
$mbox->personal = $phrase;
$mbox->comment = isset ($comments) ? $comments : array ();
if (isset ($route_addr)) {
$mbox->mailbox = $route_addr['local_part'];
$mbox->host = $route_addr['domain'];
$route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : '';
$mbox->mailbox = $addr_spec['local_part'];
$mbox->host = $addr_spec['domain'];
* This function validates a route-addr which is:
* route-addr = "<" [route] addr-spec ">"
* Angle brackets have already been removed at the point of
* getting to this function.
* @param string $route_addr The string to check.
* @return mixed False on failure, or an array containing validated address/route information on success.
function _validateRouteAddr ($route_addr)
if (strpos($route_addr, ':') !== false ) {
$parts = explode(':', $route_addr);
$route = $this->_splitCheck ($parts, ':');
// If $route is same as $route_addr then the colon was in
// quotes or brackets or, of course, non existent.
if ($route === $route_addr){
$addr_spec = $route_addr;
if (($addr_spec = $this->_validateAddrSpec ($addr_spec)) === false ) {
if (($route = $this->_validateRoute ($route)) === false ) {
// Validate addr-spec part.
if (($addr_spec = $this->_validateAddrSpec ($addr_spec)) === false ) {
* Function to validate a route, which is:
* route = 1#("@" domain) ":"
* @param string $route The string to check.
* @return mixed False on failure, or the validated $route on success.
function _validateRoute ($route)
foreach ($domains as $domain) {
if (!$this->_validateDomain ($domain)) return false;
* Function to validate a domain, though this is not quite what
* you expect of a strict internet domain.
* domain = sub-domain *("." sub-domain)
* @param string $domain The string to check.
* @return mixed False on failure, or the validated domain on success.
function _validateDomain ($domain)
// Note the different use of $subdomains and $sub_domains
$subdomains = explode('.', $domain);
while (count($subdomains) > 0 ) {
$sub_domains[] = $this->_splitCheck ($subdomains, '.');
for ($i = 0; $i < $this->index + 1; $i++ )
foreach ($sub_domains as $sub_domain) {
if (!$this->_validateSubdomain (trim($sub_domain)))
// Managed to get here, so return input.
* Function to validate a subdomain:
* subdomain = domain-ref / domain-literal
* @param string $subdomain The string to check.
* @return boolean Success or failure.
function _validateSubdomain ($subdomain)
if (!$this->_validateDliteral ($arr[1 ])) return false;
if (!$this->_validateAtom ($subdomain)) return false;
// Got here, so return successful.
* Function to validate a domain literal:
* domain-literal = "[" *(dtext / quoted-pair) "]"
* @param string $dliteral The string to check.
* @return boolean Success or failure.
function _validateDliteral ($dliteral)
return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && $matches[1 ] != '\\';
* Function to validate an addr-spec.
* addr-spec = local-part "@" domain
* @param string $addr_spec The string to check.
* @return mixed False on failure, or the validated addr-spec on success.
function _validateAddrSpec ($addr_spec)
$addr_spec = trim($addr_spec);
// Split on @ sign if there is one.
if (strpos($addr_spec, '@') !== false ) {
$local_part = $this->_splitCheck ($parts, '@');
// No @ sign so assume the default domain.
$local_part = $addr_spec;
if (($local_part = $this->_validateLocalPart ($local_part)) === false ) return false;
if (($domain = $this->_validateDomain ($domain)) === false ) return false;
// Got here so return successful.
return array ('local_part' => $local_part, 'domain' => $domain);
* Function to validate the local part of an address:
* local-part = word *("." word)
* @param string $local_part
* @return mixed False on failure, or the validated local part on success.
function _validateLocalPart ($local_part)
$parts = explode('.', $local_part);
// Split the local_part into words.
while (count($parts) > 0 ){
$words[] = $this->_splitCheck ($parts, '.');
for ($i = 0; $i < $this->index + 1; $i++ ) {
foreach ($words as $word) {
// If this word contains an unquoted space, it is invalid. (6.2.4)
if (strpos($word, ' ') && $word[0 ] !== '"')
if ($this->_validatePhrase (trim($word)) === false ) return false;
// Managed to get here, so return the input.
* Returns an approximate count of how many addresses are in the
* given string. This is APPROXIMATE as it only splits based on a
* comma which has no preceding backslash. Could be useful as
* large amounts of addresses will end up producing *large*
* structures when used with parseAddressList().
* @param string $data Addresses to count
* @return int Approximate count
* This is a email validating function separate to the rest of the
* class. It simply validates whether an email is of the common
* internet form: <user>@<domain>. This can be sufficient for most
* people. Optional stricter mode can be utilised which restricts
* mailbox characters allowed to alphanumeric, full stop, hyphen
* @param string $data Address to check
* @param boolean $strict Optional stricter mode
* @return mixed False if it fails, an indexed array
* username/domain if it matches
$regex = $strict ? '/^([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i';
return array ($matches[1 ], $matches[2 ]);
Documentation generated on Mon, 11 Mar 2019 14:46:47 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|