// // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but // *WITHOUT ANY WARRANTY*; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // Private Group: Private Functions // Private Method: _msgToXHTML // // Format a text message into friendly XHTML. Takes care of nested quotes, // Hotmail's crazy lack of EOLs in quote parchments (seriously, wow!) // // Called from . // // Parameters: // $body - Message to process. // // Note: // Formerly named "message_tohtml()" in rtphp. // // Returns: // The XHTML version. // function _msgToXHTML($body) { // Heavily copied from msgWrap(). $lines = preg_split('/\r?\n/', $body); $line_depths = array(); $out = ''; // Work around Hotmail's total lack of EOLs in quote parchments. Seriously, what the hell? $imax = count($lines); for ($i = 0; $i < $imax; $i++) { // Most of them are > but double quoted quotes get ' > ' to this point. // We just avoid the worst of the worst, for now. if ((strlen($lines[$i]) > 100) && (preg_match_all('/(\S)\s?\>/', $lines[$i], $ignore) > 5)) { // For now, all I do is truncate to first '>'. // I should actually recreate lines and insert them in $lines. $parts = explode('>', $lines[$i]); $lines[$i] = $parts[0]; }; }; // Identify quote depth for each line; foreach ($lines as $key => $line) { $depth = preg_match_all('/(\>\s?)/', $line, $ignore); array_push($line_depths, $depth); $lines[$key] = preg_replace('/^(\>\s?)+(.*)$/', '$2',$line); }; $level = 0; $depth = 0; $inpre = false; $current = ''; array_push($line_depths, -1); // Hack to end gracefully. array_push($lines, ''); foreach ($lines as $key => $line) { if ($line_depths[$key] == $level) { // Continue appending to $current; text within our current quote level. if ($line == '' || trim($line) == '') { if ($inpre && (strspn($lines[$key+1], " \t+-*|\\\$") > 0)) $current .= "\n"; elseif (!$inpre) $current .= "

\n\t

"; } elseif ($inpre && (strspn($line, " \t+-*|\\\$") > 0)) { $current .= "$line\n"; } elseif (!$inpre && (strspn($line, " \t+-*|\\\$") > 0) && (strspn($lines[$key+1], " \t+-*|\\\$") > 0)) { $inpre = true; $current .= "

$line\n";
					} else {
						if ($inpre) {
							$inpre = false;
							$current .= "

"; }; $current .= "$line "; }; } else { // Current line is of a different quote level. // So, close previous, open next, output, then restart $current. $current = trim($current); $diff = $line_depths[$key] - $level; if ($current != '') { $out .= "\t

$current"; if ($inpre) { $inpre = false; $out .= "\n"; } else { $out .= "

\n"; }; }; if ($diff < 0) { // Don't close diffs we never opened for first paragraph. if ($depth > 0) for ($d=0; $d > $diff; $d--) { $out .= "\n"; $depth--; }; } else { for ($d=0; $d < $diff; $d++) { $out .= "
\n"; $depth++; }; }; $level = $line_depths[$key]; $current = "$line "; }; }; if ($depth > 0) for ($d=0; $d < $level; $d++) $out .= "
"; return($out); } // Private Method: _msgFormatXHTML // // Transform common text/plain tricks into XHTML. Specifically, skip empty // paragraphs, make URLs into hyperlinks, recognize '*' and '_' surrounding // words to mean bold and underline, respectively, and restore any angle // bracket which may have been mangled by for us. // // Called from . // // Parameters: // $body - Message to process. // // Note: // Formerly named "smartize_text()" in rtphp. // // Returns: // Processed message. // function _msgFormatXHTML($body) { $needles = array( '/\\<\/p\>/', // Empty paragraphs '{(<)?((https?://|www\.)\S+\.[a-zA-Z0-9.,_:;/%#=+@()?&\-]+)(\]=\])?}', // URLs '/(\s*)\*(\S+)\*/', // Bold '/(\s)_(\S+)_/', // Underline '/\]=\]/', // Trapped angle brackets. ); $replacements = array( '', '$2', // URLs '$1$2', // Bold '$1$2', // Underline '>', // Trapped angle brackets. ); return(trim(preg_replace($needles, $replacements, $body))); } // Group: Functions // Method: msgWrap // // Perform word-wrap on a text message. Takes care of quote indentations. // // Usually called from , but useful when saving messages coming // from forms as well. (Messages coming from an e-mail gateway should be // stored as-is in your database and formatted for displaying only, to // avoid data loss if any reformatting were to have a bug.) // // Parameters: // $body - String containing the possibly multiline text. // $width - Number of columns to use. (Optional.) // // Note: // Formerly named "wrap_message()" in rtphp. // // Returns: // The reformatted text string. // function msgWrap($body, $width = 76) { $lines = preg_split('/\r?\n/', $body); $line_depths = array(); $out = ''; // Identify quote depth for each line; foreach ($lines as $key => $line) { $depth = preg_match_all('/(>\s?)/', $line, $ignore); array_push($line_depths, $depth); $lines[$key] = preg_replace('/^(>\s?)+(.*)$/', '$2',$line); }; $level = 0; $current = ''; array_push($line_depths, -1); // Hack to end gracefully. array_push($lines, ''); foreach ($lines as $key => $line) { if ($level == $line_depths[$key]) { if ($line == '') $current .= "\n\n"; // Preserve paragraphs. elseif (strspn($line, " \t+-|/\\\$") > 0) $current .= "$line\n"; // Preserve custom formatting. else $current .= "$line "; } else { $current = trim($current); if (strlen($current) > 0) { $current_lines = preg_split('/\r?\n/', wordwrap($current, $width - ($level * 2))); foreach ($current_lines as $linen) { for ($i=1; $i <= $level; $i++) $out .= "> "; $out .= "$linen\n"; }; $min = min($level, $line_depths[$key]); for ($i=1; $i <= $min; $i++) $out .= "> "; $out .= "\n"; }; $level = $line_depths[$key]; $current = "$line "; }; }; return($out); } // Method: msgFormat // // Reformat text/plain messages for user-friendly display. A lot of // cleaning up is performed: // // * Signatures, Hotmail signatures, and Outlook quotes are removed. // (Sadly, Outlook makes it impossible for us to distinguish legitimate // signatures from useless quotes.) Similarly, if the message ends with a // quote, that last quote is removed. This strict policy does wonders // for threaded (Gmail-like) views. If an Outlook user decided to use // rich text colours to insert replies in a full quote (a horrible // practice), this will miss it, unfortunately. It is thus always a good // idea to offer the possibility of viewing message sources unmodified as // an alternative, or perhaps to display text/html parts although that // opens a whole different can of worms, namely MS Office mark-up // pollution and viewer exploit vulnerabilities. // // * Removes angle brackets, noticing those used to surround URLs or of // course those which start quote lines. // // * If XHTML quotes were requested, each quote level is wrapped in a div // with class "quote". (I use this to darken the background with // increasing quote depths.) Also, lines beginning with any of SPACE, // TAB, '+', '-', '*', '|' or '$', go in 'pre' blocks to preserve their // line separations, to help with ASCII art and lists. // // * If XHTML quotes were not requested,, then the message is rewrapped to // a fixed width, taking care of quotes, with . // // * If XHTML markup was requested, then words surrounded by '*' are // cleaned up in a 'strong' block, those with '_' with 'u' block, and // URLs are made into hyperlinks. // // Parameters: // $body - Raw body to process. // $width - Rewrapping number of columns, useful in text output. (Optional.) // $html_markup - Set true to request XHTML formatting. (Optional.) // $html_quotes - Set true to request DIV blocks for quotes. (Optional.) // // Note: // Formerly named "prettify()" in rtphp. // // Returns: // Reformatted content. // function msgFormat($body, $width = 76, $html_markup = false, $html_quotes = false) { // First phase: remove excess contents. if ($html_markup) { $needles = array( "/(\S+)\s*[\r\n]+((--)|(__)).*\z/s", // Signatures, HotMail signatures, and Outlook quotes '/\<([^>\r\n]+)\>/', // Escape angle brackets for _msgFormatXHTML(). Needed for URLs. '/([a-zA-Z0-9!@#$%^&*()+=\[\]{}\-][ \t]*)\>\>/', // Escape non-quote GT brackets. '/([a-zA-Z0-9!@#$%^&*()+=\[\]{}\-][ \t]*)\>/', // Escape non-quote GT brackets. '/\[^\r\n]*)+[\r\n]*\z/m", // Last quote of a message. '/(\n\s*){4,}/', // Multiple empty lines: max 1 '/"/', // Quotes ); $replacements = array( '$1', // Drop signatures and Outlook quotes completely. (Often parchments.) '<$1]=]', // Escape angle brackets for _msgFormatXHTML(). '$1>', // Escape GT angle brackets for _msgFormatXHTML(). '$1>', // Escape GT angle brackets for _msgFormatXHTML(). '<', // Escape LT brackets. "\n", // Drop last quote of a message completely. (Often parchments.) "\n\n", // Multiple empty lines: max 1 '"', // Quotes ); } else { // FIXME: This looks out of date compared to XHTML above (Hotmail) $needles = array( "/(\S+)\s*[\r\n]+--.*\z/s", // Signatures and Outlook quotes '/\<(\S+)\>/', // Don't risk angle brackets. (Would use > but screws up wordwrap().) "/([\r\n]+\s*\>[^\r\n]*)+[\r\n]*\z/m", // Last quote of a message. '/(\n\s*){4,}/', // Multiple empty lines: max 2 ); $replacements = array( '$1', // Drop signatures and Outlook quotes completely. (Often parchments.) '$1', // Don't risk angle brackets. "\n", // Drop last quote of a message completely. (Often parchments.) "\n\n\n", // Multiple empty lines: max 2 ); }; $body = preg_replace($needles, $replacements, $body); // Second phase: rewrap if ($html_quotes) $body = FText::_msgToXHTML($body); else $body = FText::msgWrap($body, $width); // Third phase: add mark-up if ($html_markup) $body = FText::_msgFormatXHTML($body); return($body); } // Method: msgAbstract // // Extract most relevant start of a message body. If you've used Gmail // before, you've seen the usefulness of this kind of logic for displaying // alongside message subjects in lists. This is more intelligent than Gmail // (as of 2008/03/26) and skips quotes and quote introductions. Often, // message threads can be read just with one-line abstracts this way, // saving a lot of time to the user. // // Parameters: // $body - The raw message body to extract an abstract from. // $max - Maximum number of characters. (Optional.) // // Returns: // The abstract string. // function msgAbstract($body, $max = 500) { $needles = array( "/(\S+)\s*[\r\n]+--.*\z/s", // 1 '/^\s*\>.*$/m', // 2 '/^.*((wrote)|(said the following on .*)):$/m', // 3 '/[\<\>]/', // 4 '/\s+/', // 5 ); $replacements = array( '$1', // 1. Drop signatures and Outlook quotes completely. (Often parchments.) '', // 2. Remove all quotes: we want author-only. '', // 3. Try to remove quote introductions. '', // 4. Don't risk angle brackets. " ", // 5. All whitespace merged to one, including newlines. ); return(substr(preg_replace($needles, $replacements, $body),0,$max)); } }; ?>