Package home | Report new bug | New search | Development Roadmap Status: Open | Feedback | All | Closed Since Version 1.8.8

Request #12411 Support RFC 2047 standard for non-ascii characters as attachment filename.
Submitted: 2007-11-09 02:58 UTC Modified: 2010-01-01 14:11 UTC
From: bmdevelopment Assigned: alec
Status: Closed Package: Mail_Mime (version 1.5.2)
PHP Version: 5.2.4 OS: FreeBSD 6.2-RELEASE #0
Roadmaps: 1.6.0RC1    
Subscription  



Patch mime_bulk.patch Revisions
Revision 2009-06-17 10:54 UTC
Developer alec
 
Download patch

This patch is obsolete

Obsoleted by patches:

--- mimePart.old	2009-06-17 11:15:44.833573598 +0200
+++ mimePart.php	2009-06-17 09:43:36.137446221 +0200
@@ -182,15 +182,19 @@
 
             }
         }
+	
         if (isset($contentType['type'])) {
             $headers['Content-Type'] = $contentType['type'];
+	    if (isset($contentType['charset'])) {
+                $headers['Content-Type'] .= " charset={$contentType['charset']};";
+            }
             if (isset($contentType['name'])) {
                 $headers['Content-Type'] .= ';' . MAIL_MIMEPART_CRLF;
-                $headers['Content-Type'] .= $this->_buildHeaderParam('name', $contentType['name'], 
-                                                isset($contentType['charset']) ? $contentType['charset'] : 'US-ASCII', 
-                                                isset($contentType['language']) ? $contentType['language'] : NULL);
-            } elseif (isset($contentType['charset'])) {
-                $headers['Content-Type'] .= "; charset=\"{$contentType['charset']}\"";
+                $headers['Content-Type'] .=
+		    $this->_buildHeaderParam('name', $contentType['name'], 
+                        isset($contentType['charset']) ? $contentType['charset'] : 'US-ASCII', 
+                        isset($contentType['language']) ? $contentType['language'] : NULL,
+			isset($params['name-encoding']) ?  $params['name-encoding'] : NULL);
             }
         }
 
@@ -199,15 +203,14 @@
             $headers['Content-Disposition'] = $contentDisp['disp'];
             if (isset($contentDisp['filename'])) {
                 $headers['Content-Disposition'] .= ';' . MAIL_MIMEPART_CRLF;
-                $headers['Content-Disposition'] .= $this->_buildHeaderParam('filename', $contentDisp['filename'], 
-                                                isset($contentDisp['charset']) ? $contentDisp['charset'] : 'US-ASCII', 
-                                                isset($contentDisp['language']) ? $contentDisp['language'] : NULL);
+                $headers['Content-Disposition'] .=
+		    $this->_buildHeaderParam('filename', $contentDisp['filename'], 
+                        isset($contentDisp['charset']) ? $contentDisp['charset'] : 'US-ASCII', 
+                        isset($contentDisp['language']) ? $contentDisp['language'] : NULL,
+			isset($params['filename-encoding']) ? $params['filename-encoding'] : NULL);
             }
         }
-        
-        
-        
-        
+
         // Default content-type
         if (!isset($headers['Content-Type'])) {
             $headers['Content-Type'] = 'text/plain';
@@ -254,8 +257,8 @@
             }
 
             $encoded['body'] = '--' . $boundary . MAIL_MIMEPART_CRLF . 
-                               rtrim(implode('--' . $boundary . MAIL_MIMEPART_CRLF , $subparts), MAIL_MIMEPART_CRLF) . MAIL_MIMEPART_CRLF . 
-                               '--' . $boundary.'--' . MAIL_MIMEPART_CRLF;
+				implode('--' . $boundary . MAIL_MIMEPART_CRLF , $subparts) .
+                        	'--' . $boundary.'--' . MAIL_MIMEPART_CRLF;
 
         } else {
             $encoded['body'] = $this->_getEncodedData($this->_body, $this->_encoding);
@@ -387,39 +390,43 @@
      * @param $value        The value of the paramter
      * @param $charset      The characterset of $value
      * @param $language     The language used in $value
-     * @param $maxLength    The maximum length of a line. Defauls to 75
+     * @param $paramEnc     Parameter encoding type
+     * @param $maxLength    The maximum length of a line. Defauls to 78
      *
      * @access private
      */
-    function _buildHeaderParam($name, $value, $charset=NULL, $language=NULL, $maxLength=75)
+    function _buildHeaderParam($name, $value, $charset=NULL, $language=NULL, $paramEnc=NULL, $maxLength=78)
     {
-        //If we find chars to encode, or if charset or language
-        //is not any of the defaults, we need to encode the value.
-        $shouldEncode = 0;
-        $secondAsterisk = '';
-        if (preg_match('#([\x80-\xFF]){1}#', $value)) {
-            $shouldEncode = 1;
-        } elseif ($charset && (strtolower($charset) != 'us-ascii')) {
-            $shouldEncode = 1;
-        } elseif ($language && ($language != 'en' && $language != 'en-us')) {
-            $shouldEncode = 1;
-        }
-        if ($shouldEncode) {
-            $search  = array('%',   ' ',   "\t");
-            $replace = array('%25', '%20', '%09');
-            $encValue = str_replace($search, $replace, $value);
-            $encValue = preg_replace('#([\x80-\xFF])#e', '"%" . strtoupper(dechex(ord("\1")))', $encValue);
-            $value = "$charset'$language'$encValue";
-            $secondAsterisk = '*';
-        }
-        $header = " {$name}{$secondAsterisk}=\"{$value}\"; ";
+        // RFC 2045: 
+	// value needs encoding if contains non-ASCII chars or is longer than 78 chars
+        if (!preg_match('#[^\x20-\x7E]#', $value)) { // ASCII
+	    // token
+    	    if (!preg_match('#([^\x21,\x23-\x27,\x2A,\x2B,\x2D,\x2E,\x30-\x39,\x41-\x5A,\x5E-\x7E])#', $value)) {
+		if (strlen($name) + strlen($value) + 3 <= $maxLength)
+		    return " {$name}={$value};";
+	    } else { // quoted-string
+		$quoted = addcslashes($value, '\\"');
+		if (strlen($name) + strlen($quoted) + 5 <= $maxLength)
+		    return " {$name}=\"{$quoted}\";";
+	    }
+	}
+
+	// RFC2047: use quoted-printable/base64 encoding
+	if ($paramEnc == 'quoted-printable' || $paramEnc == 'base64')
+	    return $this->_buildRFC2047Param($name, $value, $charset, $paramEnc);
+
+	// RFC2231:
+        $encValue = preg_replace('#([^\x21,\x23,\x24,\x26,\x2B,\x2D,\x2E,\x30-\x39,\x41-\x5A,\x5E-\x7E])#e',
+			'"%" . strtoupper(dechex(ord("\1")))', $value);
+        $value = "$charset'$language'$encValue";
+
+        $header = " {$name}*={$value};";
         if (strlen($header) <= $maxLength) {
             return $header;
         }
 
-        $preLength = strlen(" {$name}*0{$secondAsterisk}=\"");
-        $sufLength = strlen("\";");
-        $maxLength = MAX(16, $maxLength - $preLength - $sufLength - 2);
+        $preLength = strlen(" {$name}*0*=");
+        $maxLength = max(16, $maxLength - $preLength - 3);
         $maxLengthReg = "|(.{0,$maxLength}[^\%][^\%])|";
 
         $headers = array();
@@ -428,15 +435,95 @@
             $matches = array();
             $found = preg_match($maxLengthReg, $value, $matches);
             if ($found) {
-                $headers[] = " {$name}*{$headCount}{$secondAsterisk}=\"{$matches[0]}\"";
+                $headers[] = " {$name}*{$headCount}*={$matches[0]}";
                 $value = substr($value, strlen($matches[0]));
             } else {
-                $headers[] = " {$name}*{$headCount}{$secondAsterisk}=\"{$value}\"";
-                $value = "";
+                $headers[] = " {$name}*{$headCount}*={$value}";
+                $value = '';
             }
             $headCount++;
         }
-        $headers = implode(MAIL_MIMEPART_CRLF, $headers) . ';';
+        $headers = implode(';' . MAIL_MIMEPART_CRLF, $headers) . ';';
         return $headers;
     }
+
+    /**
+     * Encodes header parameter as per RFC2047 if needed (values too long will be truncated)
+     *
+     * @param string $name  The parameter name
+     * @param string $value  The parameter value
+     * @param string $charset  The parameter charset
+     * @param string $encoding  Encoding type (quoted-printable or base64)
+     * @param int $maxLength  Encoded parameter max length (75 is the value specified in the RFC)
+     *
+     * @return string Parameter line
+     * @access private
+     */
+    function _buildRFC2047Param($name, $value, $charset, $encoding='quoted-printable', $maxLength=75)
+    {
+        if (!preg_match('#([^\x20-\x7E]){1}#', $value))
+	{
+	    $quoted = addcslashes($value, '\\"');
+	    $maxLength = $maxLength - 6;
+	    if (strlen($quoted) > $maxLength)
+	    {
+		// truncate filename leaving extension
+		$ext = strrchr($quoted, '.');
+		$quoted = substr($quoted, 0, $maxLength - strlen($ext));
+		// remove backslashes from the end of filename
+		preg_replace('/[\\\\]+$/', '', $quoted);
+		$quoted .= $ext;
+	    }
+	}
+	else if ($encoding == 'base64')
+	{
+	    $ext = strrchr($value, '.');
+            $value = substr($value, 0, strlen($value) - strlen($ext));
+	    
+            $ext = base64_encode($ext);
+	    $value = base64_encode($value);
+
+            $prefix = '=?' . $charset . '?B?';
+            $suffix = '?=';
+            $maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;
+
+            //We can cut base64 every 4 characters, so the real max
+            //we can get must be rounded down.
+            $maxLength = $maxLength - ($maxLength % 4);
+            $quoted = $prefix . substr($value, 0, $maxLength) . $ext . $suffix;
+        }
+	else // quoted-printable
+	{
+	    $ext = strrchr($value, '.');
+            $value = substr($value, 0, strlen($value) - strlen($ext));
+
+	    // Replace all special characters used by the encoder.
+            $search  = array('=',   '_',   '?',   ' ');
+	    $replace = array('=3D', '=5F', '=3F', '_');
+	    $ext = str_replace($search, $replace, $ext);
+	    $value = str_replace($search, $replace, $value);
+
+	    // Replace all extended characters (\x80-xFF) with their
+	    // ASCII values.
+	    $ext = preg_replace('/([\x80-\xFF])/e', 
+		'"=" . strtoupper(dechex(ord("\1")))', $ext);
+	    $value = preg_replace('/([\x80-\xFF])/e', 
+		'"=" . strtoupper(dechex(ord("\1")))', $value);
+
+            $prefix = '=?' . $charset . '?Q?';
+            $suffix = '?=';
+
+            $maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;
+	    
+	    // Truncate QP-encoded text at $maxLength
+	    // but not break any encoded letters.
+	    if(preg_match("/^(.{0,$maxLength}[^\=][^\=])/", $value, $matches))
+    		$value = $matches[1];
+	
+	    $quoted = $prefix . $value . $ext . $suffix;
+        }
+
+	return " {$name}=\"{$quoted}\"; ";
+    }
+
 } // End of class
--- mime.old	2009-06-17 11:45:03.805026259 +0200
+++ mime.php	2009-06-15 08:17:23.000000000 +0200
@@ -323,6 +323,8 @@
      *                             of this attachment.
      * @param string $language    The language of the attachment
      * @param string $location    The RFC 2557.4 location of the attachment
+     * @param string $n_encoding      Use RFC 2047 for attachment name (Content-Type) encoding
+     * @param string $f_encoding      Use RFC 2047 for attachment filename (Content-Disposition) encoding
      *
      * @return mixed true on success or PEAR_Error object
      * @access public
@@ -335,7 +337,9 @@
                            $disposition = 'attachment',
                            $charset     = '',
                             $language   = '',
-                           $location    = '')
+                           $location    = '',
+			   $n_encoding	= NULL,
+			   $f_encoding   = NULL)
     {
         $filedata = ($isfile === true) ? $this->_file2str($file)
                                            : $file;
@@ -350,7 +354,7 @@
             $err = PEAR::raiseError($msg);
             return $err;
         }
-        $filename = basename($filename);
+        $filename = $this->_basename($filename);
         if (PEAR::isError($filedata)) {
             return $filedata;
         }
@@ -363,7 +367,9 @@
                                 'charset'     => $charset,
                                 'language'    => $language,
                                 'location'    => $location,
-                                'disposition' => $disposition
+                                'disposition' => $disposition,
+				'name-encoding'    => $n_encoding,
+				'filename-encoding'=> $f_encoding
                                );
         return true;
     }
@@ -532,6 +538,12 @@
         $params['disposition']  = 'inline';
         $params['dfilename']    = $value['name'];
         $params['cid']          = $value['cid'];
+	if ($value['name-encoding']) {
+	    $params['name-encoding'] = $value['name-encoding'];
+	}
+	if ($value['filename-encoding']) {
+	    $params['filename-encoding'] = $value['filename-encoding'];
+	}
         
         $ret = $obj->addSubpart($value['body'], $params);
         return $ret;
@@ -561,6 +573,12 @@
         if ($value['location']) {
             $params['location'] = $value['location'];
         }
+	if ($value['name-encoding']) {
+	    $params['name-encoding'] = $value['name-encoding'];
+	}
+	if ($value['filename-encoding']) {
+	    $params['filename-encoding'] = $value['filename-encoding'];
+	}
         $params['content_type'] = $value['c_type'];
         $params['disposition']  = isset($value['disposition']) ? 
                                   $value['disposition'] : 'attachment';
@@ -667,7 +685,7 @@
 
                 $this->_htmlbody = preg_replace($regex, $rep, $this->_htmlbody);
                 $this->_html_images[$key]['name'] = 
-                    basename($this->_html_images[$key]['name']);
+                    $this->_basename($this->_html_images[$key]['name']);
             }
         }
 
@@ -1114,6 +1130,21 @@
         }
     }
 
-    
+    /**
+     * Get file's basename (locale independent) 
+     *
+     * @param string Filename
+     *
+     * @return string Basename
+     * @access private
+     */
+    function _basename($filename)
+    {
+	// basename() is not unicode safe and locale dependent
+	if (stristr(PHP_OS, 'win') || stristr(PHP_OS, 'netware'))
+	    return preg_replace('/^.*[\\\\\\/]/', '', $filename);
+	else
+	    return preg_replace('/^.*[\/]/', '', $filename);
+    }
 
 } // End of class