.
//
// Parameters:
// $body - Message to process.
//
// Note:
// Formerly named "smartize_text()" in rtphp.
//
// Returns:
// Processed message.
//
function _msgFormatXHTML($body) {
$needles = array(
'/\\<\/p\>/', // Empty paragraphs
'{(<)?((https?://|www\.)\S+\.[a-zA-Z0-9.,_:;/%#=+@()?&\-]+)(\]=\])?}', // URLs
'/(\s*)\*(\S+)\*/', // Bold
'/(\s)_(\S+)_/', // Underline
'/\]=\]/', // Trapped angle brackets.
);
$replacements = array(
'',
'$2', // URLs
'$1$2', // Bold
'$1$2', // Underline
'>', // Trapped angle brackets.
);
return(trim(preg_replace($needles, $replacements, $body)));
}
// Group: Functions
// Method: msgWrap
//
// Perform word-wrap on a text message. Takes care of quote indentations.
//
// Usually called from , but useful when saving messages coming
// from forms as well. (Messages coming from an e-mail gateway should be
// stored as-is in your database and formatted for displaying only, to
// avoid data loss if any reformatting were to have a bug.)
//
// Parameters:
// $body - String containing the possibly multiline text.
// $width - Number of columns to use. (Optional.)
//
// Note:
// Formerly named "wrap_message()" in rtphp.
//
// Returns:
// The reformatted text string.
//
function msgWrap($body, $width = 76) {
$lines = preg_split('/\r?\n/', $body);
$line_depths = array();
$out = '';
// Identify quote depth for each line;
foreach ($lines as $key => $line) {
$depth = preg_match_all('/(>\s?)/', $line, $ignore);
array_push($line_depths, $depth);
$lines[$key] = preg_replace('/^(>\s?)+(.*)$/', '$2',$line);
};
$level = 0;
$current = '';
array_push($line_depths, -1); // Hack to end gracefully.
array_push($lines, '');
foreach ($lines as $key => $line) {
if ($level == $line_depths[$key]) {
if ($line == '') $current .= "\n\n"; // Preserve paragraphs.
elseif (strspn($line, " \t+-|/\\\$") > 0) $current .= "$line\n"; // Preserve custom formatting.
else $current .= "$line ";
} else {
$current = trim($current);
if (strlen($current) > 0) {
$current_lines = preg_split('/\r?\n/', wordwrap($current, $width - ($level * 2)));
foreach ($current_lines as $linen) {
for ($i=1; $i <= $level; $i++) $out .= "> ";
$out .= "$linen\n";
};
$min = min($level, $line_depths[$key]);
for ($i=1; $i <= $min; $i++) $out .= "> ";
$out .= "\n";
};
$level = $line_depths[$key];
$current = "$line ";
};
};
return($out);
}
// Method: msgFormat
//
// Reformat text/plain messages for user-friendly display. A lot of
// cleaning up is performed:
//
// * Signatures, Hotmail signatures, and Outlook quotes are removed.
// (Sadly, Outlook makes it impossible for us to distinguish legitimate
// signatures from useless quotes.) Similarly, if the message ends with a
// quote, that last quote is removed. This strict policy does wonders
// for threaded (Gmail-like) views. If an Outlook user decided to use
// rich text colours to insert replies in a full quote (a horrible
// practice), this will miss it, unfortunately. It is thus always a good
// idea to offer the possibility of viewing message sources unmodified as
// an alternative, or perhaps to display text/html parts although that
// opens a whole different can of worms, namely MS Office mark-up
// pollution and viewer exploit vulnerabilities.
//
// * Removes angle brackets, noticing those used to surround URLs or of
// course those which start quote lines.
//
// * If XHTML quotes were requested, each quote level is wrapped in a div
// with class "quote". (I use this to darken the background with
// increasing quote depths.) Also, lines beginning with any of SPACE,
// TAB, '+', '-', '*', '|' or '$', go in 'pre' blocks to preserve their
// line separations, to help with ASCII art and lists.
//
// * If XHTML quotes were not requested,, then the message is rewrapped to
// a fixed width, taking care of quotes, with .
//
// * If XHTML markup was requested, then words surrounded by '*' are
// cleaned up in a 'strong' block, those with '_' with 'u' block, and
// URLs are made into hyperlinks.
//
// Parameters:
// $body - Raw body to process.
// $width - Rewrapping number of columns, useful in text output. (Optional.)
// $html_markup - Set true to request XHTML formatting. (Optional.)
// $html_quotes - Set true to request DIV blocks for quotes. (Optional.)
//
// Note:
// Formerly named "prettify()" in rtphp.
//
// Returns:
// Reformatted content.
//
function msgFormat($body, $width = 76, $html_markup = false, $html_quotes = false) {
// First phase: remove excess contents.
if ($html_markup) {
$needles = array(
"/(\S+)\s*[\r\n]+((--)|(__)).*\z/s", // Signatures, HotMail signatures, and Outlook quotes
'/\<([^>\r\n]+)\>/', // Escape angle brackets for _msgFormatXHTML(). Needed for URLs.
'/([a-zA-Z0-9!@#$%^&*()+=\[\]{}\-][ \t]*)\>\>/', // Escape non-quote GT brackets.
'/([a-zA-Z0-9!@#$%^&*()+=\[\]{}\-][ \t]*)\>/', // Escape non-quote GT brackets.
'/\', // Escape LT brackets.
"/([\r\n]+\s*\>[^\r\n]*)+[\r\n]*\z/m", // Last quote of a message.
'/(\n\s*){4,}/', // Multiple empty lines: max 1
'/"/', // Quotes
);
$replacements = array(
'$1', // Drop signatures and Outlook quotes completely. (Often parchments.)
'<$1]=]', // Escape angle brackets for _msgFormatXHTML().
'$1>', // Escape GT angle brackets for _msgFormatXHTML().
'$1>', // Escape GT angle brackets for _msgFormatXHTML().
'<', // Escape LT brackets.
"\n", // Drop last quote of a message completely. (Often parchments.)
"\n\n", // Multiple empty lines: max 1
'"', // Quotes
);
} else {
// FIXME: This looks out of date compared to XHTML above (Hotmail)
$needles = array(
"/(\S+)\s*[\r\n]+--.*\z/s", // Signatures and Outlook quotes
'/\<(\S+)\>/', // Don't risk angle brackets. (Would use > but screws up wordwrap().)
"/([\r\n]+\s*\>[^\r\n]*)+[\r\n]*\z/m", // Last quote of a message.
'/(\n\s*){4,}/', // Multiple empty lines: max 2
);
$replacements = array(
'$1', // Drop signatures and Outlook quotes completely. (Often parchments.)
'$1', // Don't risk angle brackets.
"\n", // Drop last quote of a message completely. (Often parchments.)
"\n\n\n", // Multiple empty lines: max 2
);
};
$body = preg_replace($needles, $replacements, $body);
// Second phase: rewrap
if ($html_quotes) $body = FText::_msgToXHTML($body);
else $body = FText::msgWrap($body, $width);
// Third phase: add mark-up
if ($html_markup) $body = FText::_msgFormatXHTML($body);
return($body);
}
// Method: msgAbstract
//
// Extract most relevant start of a message body. If you've used Gmail
// before, you've seen the usefulness of this kind of logic for displaying
// alongside message subjects in lists. This is more intelligent than Gmail
// (as of 2008/03/26) and skips quotes and quote introductions. Often,
// message threads can be read just with one-line abstracts this way,
// saving a lot of time to the user.
//
// Parameters:
// $body - The raw message body to extract an abstract from.
// $max - Maximum number of characters. (Optional.)
//
// Returns:
// The abstract string.
//
function msgAbstract($body, $max = 500) {
$needles = array(
"/(\S+)\s*[\r\n]+--.*\z/s", // 1
'/^\s*\>.*$/m', // 2
'/^.*((wrote)|(said the following on .*)):$/m', // 3
'/[\<\>]/', // 4
'/\s+/', // 5
);
$replacements = array(
'$1', // 1. Drop signatures and Outlook quotes completely. (Often parchments.)
'', // 2. Remove all quotes: we want author-only.
'', // 3. Try to remove quote introductions.
'', // 4. Don't risk angle brackets.
" ", // 5. All whitespace merged to one, including newlines.
);
return(substr(preg_replace($needles, $replacements, $body),0,$max));
}
};
?>