//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the
// Free Software Foundation, either version 3 of the License, or (at your
// option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// *WITHOUT ANY WARRANTY*; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
// Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. If not, see .
// Private Group: Private Functions
// Private Function: email_parseHeaderAddr
//
// Splits a header line (without the introductory "Name: ") into an array of
// names and addresses. No syntax checking is done, just basic parsing.
// Similar to PHP's imap_rfc822_parse_adrlist(); I just didn't want to depend
// on the IMAP extension just for this one function.
//
// TODO:
//
// Currently, the full "Name" <address> mode and plain mode cannot be
// mixed. (i.e. a list of naked addresses is okay, a list of complete named
// addresses is okay, but mixing the two types doesn't work.) I don't know for
// sure whether naked addresses is that legal to begin with, but I thought
// supporting them couldn't hurt. Point being this could sometimes fail with
// odd formatting.
//
// Parameters:
// head - The header string.
//
// Returns:
// An indexed array listing addresses found, each being an associative array
// with "name" and "email" keys defined.
//
function email_parseHeaderAddr($head) {
$result = Array();
$matches = Array();
// Try the "Name"
[, ...] format
preg_match_all('/[ \t]*"?([^"<]*)"?[ \t]*<([^>]+)>,?/u', $head, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$result[] = Array( 'name' => $match[1], 'email' => $match[2] );
};
if (count($result) == 0) {
// Fall back to address[, ...] format
preg_match_all('/[ \t]*([^,]+),?/u', $head, $matches, PREG_SET_ORDER);
foreach($matches as $match) {
$result[] = Array( 'name' => '', 'email' => $match[1] );
};
};
return $result;
}
// Private Function: email_parseHeaderIds
//
// Splits a header line (without the introductory "Name: ") into an array of
// message IDs. No syntax checking is done, just basic parsing.
//
// Parameters:
// head - The header string.
//
// Returns:
// An indexed array listing the message IDs found as strings.
//
function email_parseHeaderIds($head) {
$result = Array();
$matches = Array();
preg_match_all('/[ \t]*<([^>]+)>,?/u', $head, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$result[] = $match[1];
};
return $result;
}
// Private Function: email_parseHeaderContent
//
// Splits a header line (without the introductory "Name: ") into its
// MIME constituents. Suitable for Content-Type and Content-Disposition.
//
// Parameters:
// head - The header string.
// name - Name to use as key for the first part in the resulting array.
// (Optional.)
// result - Pointer to an existing array to avoid creating one. (Optional.)
//
// Returns:
// An associative array of the constituents of the header.
//
function email_parseHeaderContent($head, $name = 'type', $result = null) {
if (!$result) $result = Array();
if ($sc = strpos($head, ';')) {
$result[$name] = trim(substr($head, 0, $sc));
$rest = trim(substr($head, $sc+1));
while ($rest) {
$now = '';
if ($sc = strpos($rest, ';')) {
$now = trim(substr($rest, 0, $sc));
$rest = trim(substr($rest, $sc+1));
} else {
$now = $rest;
$rest = null;
};
$matches = Array();
preg_match('/^([^=]+)="(.*)"$/', $now, $matches);
$result[$matches[1]] = $matches[2];
};
} else {
$result['type'] = $cType;
};
if (($name == 'type') && ($slash = strpos($result[$name], '/'))) {
$result['major'] = substr($result[$name], 0, $slash);
$result['minor'] = substr($result[$name], $slash+1);
if ($plus = strpos($result['minor'], '+')) {
$result['minorLeft'] = substr($result['minor'], 0, $plus);
$result['minorRight'] = substr($result['minor'], $plus+1);
};
};
return $result;
}
// Private Function: email_getHeaders
//
// Get RFC822 headers from an input stream. Stops processing when an empty
// line is encountered. Multi-line headers are merged and any inline encoding
// is decoded. For consistency, header names (what is left of ':') are
// converted to lowercase.
//
// Parameters:
// fp - Input stream resource.
//
// Caveats:
// Because headers are stored in an arrayhash, if one header name is
// encountered multiple times (i.e. "Received:"), only the last occurence is
// ultimately stored, the others being lost.
//
// Returns:
// Arrayhash of headers found.
function email_getHeaders($fp) {
$result = Array();
// Get headers
// WARNING: Repeated headers overwrite previous occurences.
$curHead = null;
while (($line = rtrim(fgets($fp))) && ($line != '')) {
if ($line[0] == ' ' || $line[0] == "\t") {
$result[$curHead] .= $line;
} else {
$colon = strpos($line, ':');
if ($colon > 0) {
// New header, decode previous one before moving on.
if ($curHead) {
$result[$curHead] = mb_decode_mimeheader($result[$curHead]);
};
$curHead = strtolower(substr($line,0,$colon));
$result[$curHead] = substr($line,$colon+2);
} else {
// WARNING: This bad line is likely _not_ desirable.
$result[$curHead] .= $line;
};
};
};
return $result;
}
// Private Function: email_getData
//
// Read a body of data from an input stream. If a MIME boundary is specified,
// contents prior to the boundary is ignored, then parts are split at each of
// the boundaries found, up to a closing boundary or EOF.
//
// As per MIME standards, after each boundary is found,
// is invoked to fetch the part's headers, then if a new boundary was found,
// recurses for that new multipart section. For parts of
// other types, a string is constructed with the input.
//
// Parameters:
// fp - Input stream resource.
// boundary - MIME Boundary (without preceding '--') to look for. Set to
// /null/ if there is none.
// encoding - Content transfer encoding to decode.
//
// Returns:
// String or array of arrayhashes (one for each part) depending on whether the
// part is a single body or a multipart group, respectively.
//
function email_getData($fp, $boundary, $encoding) {
$result = '';
if ($boundary) {
$i = -1;
$result = Array();
$boundary = '--' . $boundary;
$bLen = strlen($boundary);
while ($line = fgets($fp)) {
if (strncmp($line, $boundary, $bLen) == 0) {
if (substr($line, $bLen, 2) == '--') {
break;
} else {
$i++;
$result[$i] = email_getHeaders($fp);
if (isset($result[$i]['content-type'])) {
email_parseHeaderContent($result[$i]['content-type'], 'type', &$result[$i]);
};
if (isset($result[$i]['content-disposition'])) {
email_parseHeaderContent($result[$i]['content-disposition'], 'disposition', &$result[$i]);
};
if (isset($result[$i]['boundary'])) {
$result[$i]['data'] = email_getData($fp, $result[$i]['boundary'], $result[$i]['encoding']);
} else {
$result[$i]['data'] = '';
};
};
} else {
if (($i >= 0) && isset($result[$i]['data']) && is_string($result[$i]['data'])) {
$result[$i]['data'] .= $line;
} else {
// Ignore if we haven't had part headers yet.
};
};
};
for ($j=0; $j <= $i; $j++) {
if (is_string($result[$j]['data'])) {
switch($result[$j]['content-transfer-encoding']) {
case 'quoted-printable' :
$result[$j]['data'] = quoted_printable_decode($result[$j]['data']);
$result[$j]['content-transfer-encoding'] = '8bit';
break;
case 'base64' :
$result[$j]['data'] = base64_decode($result[$j]['data']);
$result[$j]['content-transfer-encoding'] = '8bit';
default :
};
};
};
} else {
while ($line = fgets($fp)) {
$result .= $line;
};
switch ($encoding) {
case 'quoted-printable' :
$result = quoted_printable_decode($result);
break;
case 'base64' :
$result = base64_decode($result);
break;
default :
};
};
return $result;
}
// Group: Functions
// Function: email_parse
//
// Parse a MIME message into an array of constituents.
//
// Parameters:
// from - String containing the entire message, or input stream (i.e. stdin).
//
// Returns:
// Arrayhash containing "envelope", "addresses", "ids" and "body" keys.
//
// The "envelope" key contains an arrayhash of the message's headers, with
// keys lowercased. Due to the nature of this hash, for headers encountered
// multiple times (i.e. "Received:") only the last occurence is stored.
//
// The "addresses" key contains an arrayhash: for each address-type header
// (From, To, Reply-To, Cc, Bcc, Sender, Return-Path) found, a key in its name
// contains an indexed array, itself an arrayhash with "name" and "email"
// keys.
//
// The "ids" key contains an arrayhash: for each ID-type header (Message-ID,
// References, In-Reply-To) found, a key in its name contains an indexed array
// listing the message IDs found in the line. (Typically, Message-Id and
// In-Reply-To will contain only one, and References may contain several.)
//
// The "body" key contains an arrayhash with part-related header keys and a
// 'data' key holding the decoded data or an indexed array of constituent
// parts, themselves structured like this part. Keys are:
//
// type - MIME type (i.e. 'text/plain')
// major - MIME type, major portion only (i.e. 'text')
// minor - MIME type, minor portion only (i.e. 'plain')
// minorLeft - MIME type, minor portion, first part of plus-style only (i.e.
// for 'text/xml+xhtml', would be 'xml') (Optional.)
// minorRight - MIME type, minor portion, second part of plus-style only (i.e.
// for 'text/xml+xhtml', would be 'xhtml') (Optional, provided if
// minorLeft is.)
// charset - Specified character set. (Only supplied if found.)
// name - Suggested file name from Content-Type header. (Only supplied
// if found.)
// disposition - MIME content disposition. (Only supplied if found.)
// boundary - MIME boundary string. (Only supplied if found.)
// encoding - MIME content transfer encoding. (Defaults to '8bit'.)
// filename - Suggested file name from Content-Disposition header. (Only
// supplied if found.)
// data - String of decoded data body or Array of body arrayhashes if
// multipart.
function email_parse($from) {
$fp = $from;
if (is_string($from)) {
$fp = fopen('php://temp','w+');
fwrite($fp, $from);
rewind($fp);
};
$result = Array();
if (!$fp) return $result; // Bail on fopen() or empty string.
$result['envelope'] = email_getHeaders($fp);
$result['addresses'] = Array();
foreach ($result['envelope'] as $key => $value) {
switch ($key) {
case 'to' :
case 'from' :
case 'reply-to' :
case 'cc' :
case 'bcc' :
case 'return-path' :
case 'sender' :
$result['addresses'][$key] = email_parseHeaderAddr($value);
break;
case 'message-id' :
case 'references' :
case 'in-reply-to' :
$result['ids'][$key] = email_parseHeaderIds($value);
break;
default :
};
};
// Rest of file is a single part
// Discover type, charset if supplied, load body
$result['body'] = Array();
if (isset($result['envelope']['content-type'])) {
$result['body'] = email_parseHeaderContent($result['envelope']['content-type'], 'type');
} else {
$result['body'] = Array( 'type' => 'text/plain', 'major' => 'text', 'minor' => 'plain', 'charset' => 'us-ascii' );
};
if (isset($result['envelope']['content-transfer-encoding'])) {
$result['body']['encoding'] = strtolower($result['envelope']['content-transfer-encoding']);
} else {
$result['body']['encoding'] = '8bit';
};
$result['body']['data'] = email_getData($fp, ((isset($result['body']['boundary']) && $result['body']['major'] == 'multipart') ? $result['body']['boundary'] : false), $result['body']['encoding']);
if (is_string($from)) {
fclose($fp);
};
return $result;
}
?>