// // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the // Free Software Foundation, either version 3 of the License, or (at your // option) any later version. // // This program is distributed in the hope that it will be useful, but // *WITHOUT ANY WARRANTY*; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General // Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program. If not, see . // Private Group: Private Functions // Private Function: email_parseHeaderAddr // // Splits a header line (without the introductory "Name: ") into an array of // names and addresses. No syntax checking is done, just basic parsing. // Similar to PHP's imap_rfc822_parse_adrlist(); I just didn't want to depend // on the IMAP extension just for this one function. // // TODO: // // Currently, the full "Name" <address> mode and plain mode cannot be // mixed. (i.e. a list of naked addresses is okay, a list of complete named // addresses is okay, but mixing the two types doesn't work.) I don't know for // sure whether naked addresses is that legal to begin with, but I thought // supporting them couldn't hurt. Point being this could sometimes fail with // odd formatting. // // Parameters: // head - The header string. // // Returns: // An indexed array listing addresses found, each being an associative array // with "name" and "email" keys defined. // function email_parseHeaderAddr($head) { $result = Array(); $matches = Array(); // Try the "Name"
[, ...] format preg_match_all('/[ \t]*"?([^"<]*)"?[ \t]*<([^>]+)>,?/u', $head, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $result[] = Array( 'name' => $match[1], 'email' => $match[2] ); }; if (count($result) == 0) { // Fall back to address[, ...] format preg_match_all('/[ \t]*([^,]+),?/u', $head, $matches, PREG_SET_ORDER); foreach($matches as $match) { $result[] = Array( 'name' => '', 'email' => $match[1] ); }; }; return $result; } // Private Function: email_parseHeaderIds // // Splits a header line (without the introductory "Name: ") into an array of // message IDs. No syntax checking is done, just basic parsing. // // Parameters: // head - The header string. // // Returns: // An indexed array listing the message IDs found as strings. // function email_parseHeaderIds($head) { $result = Array(); $matches = Array(); preg_match_all('/[ \t]*<([^>]+)>,?/u', $head, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $result[] = $match[1]; }; return $result; } // Private Function: email_parseHeaderContent // // Splits a header line (without the introductory "Name: ") into its // MIME constituents. Suitable for Content-Type and Content-Disposition. // // Parameters: // head - The header string. // name - Name to use as key for the first part in the resulting array. // (Optional.) // result - Pointer to an existing array to avoid creating one. (Optional.) // // Returns: // An associative array of the constituents of the header. // function email_parseHeaderContent($head, $name = 'type', $result = null) { if (!$result) $result = Array(); if ($sc = strpos($head, ';')) { $result[$name] = trim(substr($head, 0, $sc)); $rest = trim(substr($head, $sc+1)); while ($rest) { $now = ''; if ($sc = strpos($rest, ';')) { $now = trim(substr($rest, 0, $sc)); $rest = trim(substr($rest, $sc+1)); } else { $now = $rest; $rest = null; }; $matches = Array(); preg_match('/^([^=]+)="(.*)"$/', $now, $matches); $result[$matches[1]] = $matches[2]; }; } else { $result['type'] = $cType; }; if (($name == 'type') && ($slash = strpos($result[$name], '/'))) { $result['major'] = substr($result[$name], 0, $slash); $result['minor'] = substr($result[$name], $slash+1); if ($plus = strpos($result['minor'], '+')) { $result['minorLeft'] = substr($result['minor'], 0, $plus); $result['minorRight'] = substr($result['minor'], $plus+1); }; }; return $result; } // Private Function: email_getHeaders // // Get RFC822 headers from an input stream. Stops processing when an empty // line is encountered. Multi-line headers are merged and any inline encoding // is decoded. For consistency, header names (what is left of ':') are // converted to lowercase. // // Parameters: // fp - Input stream resource. // // Caveats: // Because headers are stored in an arrayhash, if one header name is // encountered multiple times (i.e. "Received:"), only the last occurence is // ultimately stored, the others being lost. // // Returns: // Arrayhash of headers found. function email_getHeaders($fp) { $result = Array(); // Get headers // WARNING: Repeated headers overwrite previous occurences. $curHead = null; while (($line = rtrim(fgets($fp))) && ($line != '')) { if ($line[0] == ' ' || $line[0] == "\t") { $result[$curHead] .= $line; } else { $colon = strpos($line, ':'); if ($colon > 0) { // New header, decode previous one before moving on. if ($curHead) { $result[$curHead] = mb_decode_mimeheader($result[$curHead]); }; $curHead = strtolower(substr($line,0,$colon)); $result[$curHead] = substr($line,$colon+2); } else { // WARNING: This bad line is likely _not_ desirable. $result[$curHead] .= $line; }; }; }; return $result; } // Private Function: email_getData // // Read a body of data from an input stream. If a MIME boundary is specified, // contents prior to the boundary is ignored, then parts are split at each of // the boundaries found, up to a closing boundary or EOF. // // As per MIME standards, after each boundary is found, // is invoked to fetch the part's headers, then if a new boundary was found, // recurses for that new multipart section. For parts of // other types, a string is constructed with the input. // // Parameters: // fp - Input stream resource. // boundary - MIME Boundary (without preceding '--') to look for. Set to // /null/ if there is none. // encoding - Content transfer encoding to decode. // // Returns: // String or array of arrayhashes (one for each part) depending on whether the // part is a single body or a multipart group, respectively. // function email_getData($fp, $boundary, $encoding) { $result = ''; if ($boundary) { $i = -1; $result = Array(); $boundary = '--' . $boundary; $bLen = strlen($boundary); while ($line = fgets($fp)) { if (strncmp($line, $boundary, $bLen) == 0) { if (substr($line, $bLen, 2) == '--') { break; } else { $i++; $result[$i] = email_getHeaders($fp); if (isset($result[$i]['content-type'])) { email_parseHeaderContent($result[$i]['content-type'], 'type', &$result[$i]); }; if (isset($result[$i]['content-disposition'])) { email_parseHeaderContent($result[$i]['content-disposition'], 'disposition', &$result[$i]); }; if (isset($result[$i]['boundary'])) { $result[$i]['data'] = email_getData($fp, $result[$i]['boundary'], $result[$i]['encoding']); } else { $result[$i]['data'] = ''; }; }; } else { if (($i >= 0) && isset($result[$i]['data']) && is_string($result[$i]['data'])) { $result[$i]['data'] .= $line; } else { // Ignore if we haven't had part headers yet. }; }; }; for ($j=0; $j <= $i; $j++) { if (is_string($result[$j]['data'])) { switch($result[$j]['content-transfer-encoding']) { case 'quoted-printable' : $result[$j]['data'] = quoted_printable_decode($result[$j]['data']); $result[$j]['content-transfer-encoding'] = '8bit'; break; case 'base64' : $result[$j]['data'] = base64_decode($result[$j]['data']); $result[$j]['content-transfer-encoding'] = '8bit'; default : }; }; }; } else { while ($line = fgets($fp)) { $result .= $line; }; switch ($encoding) { case 'quoted-printable' : $result = quoted_printable_decode($result); break; case 'base64' : $result = base64_decode($result); break; default : }; }; return $result; } // Group: Functions // Function: email_parse // // Parse a MIME message into an array of constituents. // // Parameters: // from - String containing the entire message, or input stream (i.e. stdin). // // Returns: // Arrayhash containing "envelope", "addresses", "ids" and "body" keys. // // The "envelope" key contains an arrayhash of the message's headers, with // keys lowercased. Due to the nature of this hash, for headers encountered // multiple times (i.e. "Received:") only the last occurence is stored. // // The "addresses" key contains an arrayhash: for each address-type header // (From, To, Reply-To, Cc, Bcc, Sender, Return-Path) found, a key in its name // contains an indexed array, itself an arrayhash with "name" and "email" // keys. // // The "ids" key contains an arrayhash: for each ID-type header (Message-ID, // References, In-Reply-To) found, a key in its name contains an indexed array // listing the message IDs found in the line. (Typically, Message-Id and // In-Reply-To will contain only one, and References may contain several.) // // The "body" key contains an arrayhash with part-related header keys and a // 'data' key holding the decoded data or an indexed array of constituent // parts, themselves structured like this part. Keys are: // // type - MIME type (i.e. 'text/plain') // major - MIME type, major portion only (i.e. 'text') // minor - MIME type, minor portion only (i.e. 'plain') // minorLeft - MIME type, minor portion, first part of plus-style only (i.e. // for 'text/xml+xhtml', would be 'xml') (Optional.) // minorRight - MIME type, minor portion, second part of plus-style only (i.e. // for 'text/xml+xhtml', would be 'xhtml') (Optional, provided if // minorLeft is.) // charset - Specified character set. (Only supplied if found.) // name - Suggested file name from Content-Type header. (Only supplied // if found.) // disposition - MIME content disposition. (Only supplied if found.) // boundary - MIME boundary string. (Only supplied if found.) // encoding - MIME content transfer encoding. (Defaults to '8bit'.) // filename - Suggested file name from Content-Disposition header. (Only // supplied if found.) // data - String of decoded data body or Array of body arrayhashes if // multipart. function email_parse($from) { $fp = $from; if (is_string($from)) { $fp = fopen('php://temp','w+'); fwrite($fp, $from); rewind($fp); }; $result = Array(); if (!$fp) return $result; // Bail on fopen() or empty string. $result['envelope'] = email_getHeaders($fp); $result['addresses'] = Array(); foreach ($result['envelope'] as $key => $value) { switch ($key) { case 'to' : case 'from' : case 'reply-to' : case 'cc' : case 'bcc' : case 'return-path' : case 'sender' : $result['addresses'][$key] = email_parseHeaderAddr($value); break; case 'message-id' : case 'references' : case 'in-reply-to' : $result['ids'][$key] = email_parseHeaderIds($value); break; default : }; }; // Rest of file is a single part // Discover type, charset if supplied, load body $result['body'] = Array(); if (isset($result['envelope']['content-type'])) { $result['body'] = email_parseHeaderContent($result['envelope']['content-type'], 'type'); } else { $result['body'] = Array( 'type' => 'text/plain', 'major' => 'text', 'minor' => 'plain', 'charset' => 'us-ascii' ); }; if (isset($result['envelope']['content-transfer-encoding'])) { $result['body']['encoding'] = strtolower($result['envelope']['content-transfer-encoding']); } else { $result['body']['encoding'] = '8bit'; }; $result['body']['data'] = email_getData($fp, ((isset($result['body']['boundary']) && $result['body']['major'] == 'multipart') ? $result['body']['boundary'] : false), $result['body']['encoding']); if (is_string($from)) { fclose($fp); }; return $result; } ?>