// // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the // Free Software Foundation, either version 3 of the License, or (at your // option) any later version. // // This program is distributed in the hope that it will be useful, but // *WITHOUT ANY WARRANTY*; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General // Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program. If not, see . // Topic: Description // // Useful for public and private API documentation, but also for plain text // files with minimal formatting. Implements a subset of the excellent // philosophy. // // Example: // // (begin example) // $doc = new NDlite(); // $doc->parseFile($path_to_my_source_file); // echo "

Developer documentation for: " . $doc->guessTitle() . "

\n"; // echo $doc->toXHTML(array('private' => true)); // (end example) // // TODO: // * Document exactly what is supported... // // Known Bugs: // // * I use glob() with a prefix path, part of the PATH_INFO and ".*". In // unsafe installations of PHP, I'll bet this could allow malicious users // to form a PATH_INFO which could make NDlite read unintended files. For // small regular files, this is mostly harmless because the chances of // NDlite recognizing displayable comments is rather low. I see a // possible issue with large files using all of the PHP process' allowed // memory on each call. I also see an issue with devices and pipes, which // could stall NDlite processes waiting for input without reaching their // CPU clock limit if no wall clock limit is in force. // // Future Developments: // // * I'd like to see a "totroff()" for creating troff output for man pages. // I then could switch from POD to NDlite in my shell scripts. // // * Improve plurals and possessives to match NaturalDoc's versatility. // // * Add language parameter so that I can add French quote substitutions. // // * Further parse function prototypes, like NaturalDocs. // // Private Group: Constants // Private Constant: NDLITE_K_GROUP define('NDLITE_K_GROUP', 1); // Private Constant: NDLITE_K_CHILD define('NDLITE_K_CHILD', 2); // Private Constant: NDLITE_K_GENERIC define('NDLITE_K_GENERIC', 3); class NDlite { // Private Group: Properties // Private Property: grabber // // Our super-duper comment+code extracting PCRE. // // 1. Comment has to be one of: // Consecutive lines with initial non-whitespace '//' or '#' // Block starting with '/*' ending ungreedy with '*/' // Block starting with "=begin nd|NaturalDocs|Natural Docs", // ending ungreedy with "=cut" or "=end" // // 2. First line of LINES-BASED comment must contain ':' and exclude '$' // before the ':' to avoid CVS keywords. // Not enforced at this level for block comments. // // 3. Following is considered code until '=;({/#'. // If '(' was encountered first, extend until ')'. // var $grabber = "/((\/\*.*?\*\/)|(=begin ((nd)|(natural ?docs))\r?\n.*?=((end)|(cut))[^\r\n]*\r?\n)|((((\/\/)|#)[^\r\n:$]*:[^\r\n:]*\r?\n)(([ \t]*((\/\/)|#)[^\r\n]*)\r?\n)*))([^=;\(\{\/#]+(\([^\)]*\))?)/si"; // Private Property: grabberCommentIndex // Identify the PCRE match index for comment body. var $grabberCommentIndex = 1; // Private Property: grabberCodeIndex // Identify the PCRE match index for code line. var $grabberCodeIndex = 18; // Private Property: source // This instance's source array. var $source; // Private Property: intro // First block extracted from source array. var $intro; // Private Property: path // Path to the document tree on disk, or false. var $path; // Private Property: cwd // Directory of the current document on disk, or false. var $cwd; // Private Property: URL // Base URL to reach this NDlite instance, or false. var $URL; // Group: Methods // Constructor: NDlite // // Creates an instance of the NDlite class. // // If you supply the optional path and url arguments, references to // unknown symbols will cause lookups in the parsed source's current // directory, then the root of the document path, for a matching // basename. // // Example: // // > $doc = new NDlite("/usr/local/src/", "/cgi-bin/ndlite.cgi?q="); // // As you can see, it becomes easy to use a standard query if you'd // like, although I personally prefer the use of PATH_INFO to make // NDlite less apparent to the end-user on a web site. If you're using // NDlite for pre-processing, your URL might be simply the base of your // site's output directory, like "/api-docs/" for example. // // Parameters: // path - Path to your documentation tree on disk. (Optional.) // URL - Base URL to reach this NDline instance. (Optional.) // function NDlite($path = false, $URL = false) { $this->path = $path; $this->URL = $URL; } // Private Method: _stringToLines // // Convert a string containing a block of comments into an array of // lines stripped of their comment indicators. // // Parameters: // comment - The input string. // // Returns: // The array of strings, or false if the block should be discarded. // function _stringToLines($comment) { $result = array(); if (strncmp($comment, '=begin ', 7) == 0) { $tmp = preg_split("/\r?\n/", $comment); $len = count($tmp); // Skip first line, assuredly '=begin' for ($i=1; $i < $len; $i++) { if (!preg_match("/^=((end)|(cut))/i", $tmp[$i])) array_push($result, $tmp[$i]); }; } else { preg_match_all("/[ \t]*[\*#\/]+[ \t]?([^\r\n]*)\r?\n/", $comment, $lines, PREG_SET_ORDER); foreach ($lines as $line) { array_push($result, $line[1]); }; }; while ((count($result) > 0) && ($result[0] == '')) { array_shift($result); }; // Enforce ':' in first line rule. if (strpos($result[0],':') < 1) { return false; }; while ((count($result) > 0) && ($result[count($result)-1] == '')) { array_pop($result); }; return($result); } // Private Method: _mkURL // // Resolve the contents of a reference into a hyperlink. // Usually called from the PCRE in <_inlineXHTML()>. // // Parameters: // ref - The reference string. // parent - The parent id. // // Returns: // An XHTML anchor ready to display or the original ref if all attempts // to resolve failed. // function _mkURL($ref, $parent) { $result = $ref; // E-mail addresses if (preg_match('/@/', $ref)) { $result = "$ref"; // Full URL } elseif (preg_match('/^[^\s:]+:\//i', $ref)) { $result = "$ref"; // Short URL } elseif (preg_match('/^www\./i', $ref)) { $result = "$ref"; // Classed reference } elseif (preg_match('/^([^:\/]+)((::)|[\/\.])([^(]+)(\(\))?$/', $ref, $matches)) { // Is it in _this_ source? $class = $matches[1]; $id = $matches[4]; foreach ($this->source as $block) { if (($block['parent'] == $class) && ($block['id'] == $id)) { $result = "${ref}"; break; // Exact class match assuredly wins. }; // Allow plural forms. if (($block['parent'] == $class) && ($block['id'] . 's' == $id)) { $result = "${ref}"; break; // Exact class match assuredly wins. }; }; // External file reference. // I know, this is only approximative. Hey, we're file-based... if (($result == $ref) && ($this->path != '') && ($this->URL != '')) { $hits = glob($this->cwd . $class . '.*', GLOB_NOSORT); if (count($hits) > 0) { $result = "cwd)) ."-doc#${class}_${id}\">${ref}"; } else { $hits = glob($this->path . $class . '.*', GLOB_NOSORT); if (count($hits) > 0) { $result = "path)) ."-doc#${class}_${id}\">${ref}"; }; }; }; } else { preg_match('/^([^()]+)(\(\))?$/', $ref, $matches); $id = $matches[1]; foreach ($this->source as $block) { if ($block['id'] == $id) { if ($block['parent'] == $parent) { // Reference found in our same parent. Assuredly best match. $result = "${ref}"; break; } else { // Possible candidate; take note but continue. $result = "${ref}"; }; }; // Allow plural forms. if ($block['id'] . 's' == $id) { if ($block['parent'] == $parent) { // Reference found in our same parent. Assuredly best match. $result = "${ref}"; break; } else { // Possible candidate; take note but continue. $result = "${ref}"; }; }; }; // External file reference. // I know, this is only approximative. Hey, we're file-based... // Mostly copied from the case above, but without a target. if (($result == $ref) && ($this->path != '') && ($this->URL != '')) { $hits = glob($this->cwd . $id . '.*', GLOB_NOSORT); if (count($hits) > 0) { $result = "cwd)) ."-doc\">${ref}"; } else { $hits = glob($this->path . $id . '.*', GLOB_NOSORT); if (count($hits) > 0) { $result = "path)) ."-doc\">${ref}"; }; }; }; }; if ($result == $ref) $result = "<$ref>"; return($result); } // Private Method: _inlineXHTML // // *Bold*, _emphasis_, English "quotes" and hyperlinks. // Usually called from <_linesToXHTML()>. // // Note that bold only works for up to 40 characters long. This is to // try to avoid cases where two stand-alone symbols are present. // Emphasis is even more strict, and works only for single words. // (Unless underscores are used between the words.) // // Parameters: // line - The string to parse. // parent - The parent id for URL construction purposes. (Optional.) // // Returns: // The parsed string. // function _inlineXHTML($line, $parent = '') { $needles = array( '/<([^>]+)>/e', // 1a '/([a-z0-9_\.\-\+]+@[a-z0-9_\-]+\.[a-z0-9_\-\.]+)/i', // 1b '/(^|[^="])"([^">]+)"([^>]|$)/', // 2 '/(^|[\s;\'"\(])\*([^\*]{1,40})\*([\s.,;:!\'"\?\)]|$)/', // 3 '/(^|[\s;\'"\(])[_\/]([^\s]{1,40})[_\/]([\s.,;:!\'"\?\)]|$)/e', // 4 ); $replacements = array( '$this->_mkURL(\'\\1\', $parent)', // 1a. Hyperlinks '$1', // 1b. Lone e-mail addresses '$1“$2”$3', // 2. English quotes, avoid XML '$1$2$3', // 3. Bold "strtr('\\1','\\\\\','').strtr(strtr('\\2','\\\\\',''),'_',' ').strtr('\\3','\\\\\','')", // 4. Emphasis ); return(preg_replace($needles, $replacements, $line)); } // Private Method: _linesToXHTML // // Process an array of lines into an XHTML string. // Usually called from . // // Note that h4 headers can be multiline, but they cannot exceed 40 // characters overall. This helps avoid making single-sentence // paragraphs which happen to end with ':' into headers. // // Parameters: // lines - Array of strings. // parent - Parent id for URL construction purposes. (Optional.) // // Returns: // String representing the XHTML. // function _linesToXHTML($lines, $parent = '') { $result = ''; // Operating mode before the new line to process: // 0 = Out of everything, neutral. // 10 = In a temporarily-unknown block. // 21 = In a DL and DD block. // 31 = In a UL and LI block. // 40 = In a code block, block syntax. // 41 = In a code block, line syntax. $mode = 0; $tmp = ''; $is_dtdd = "/^[ \t]*([^ \t]+)[ \t]+-[ \t]+(.*)$/"; $is_ulli = "/^[ \t]*[-*o+][ \t]+(.*)$/"; $is_code = "/^[ \t]*[>|:]([ \t](.*))?$/"; $begins_code = "/^\(((start)|(begin))(\s+((code)|(sample)|(example)|(diagram)|(table)))?\)$/i"; $ends_code = "/^\(((end)|(finish)|(done)|(stop))(\s+((code)|(sample)|(example)|(diagram)|(table)))?\)$/i"; if (is_array($lines)) foreach ($lines as $inline) { $line = trim($inline); // echo "DEBUG: mode $mode parsing: $line\n"; switch ($mode) { case 0 : if ($line != '') { if (preg_match($is_dtdd, $line, $matches)) { $result .= "
\n\t
" . $this->_inlineXHTML($matches[1], $parent) . "
\n\t
"; $tmp = $matches[2]; $mode = 21; } elseif (preg_match($is_ulli, $line, $matches)) { $result .= "
    \n\t
  • "; $tmp = $matches[1]; $mode = 31; } elseif (preg_match($is_code, $line, $matches)) { $result .= "
    ". htmlspecialchars($matches[2], ENT_NOQUOTES) ."\n";
    								$mode = 41;
    							} elseif (preg_match($begins_code, $line)) {
    								$result .= "
    ";
    								$mode = 40;
    							} else {
    								$len = strlen($line);
    								if (($len < 40) && ($line[$len-1] == ':') && (!strpos($line, '.'))) {
    									$result .= "\n

    ". htmlspecialchars(substr($line, 0, strlen($line)-1), ENT_NOQUOTES) ."

    \n"; } else { $tmp .= $line . ' '; $mode = 10; }; }; }; break; case 10 : if ($line != '') { $tmp .= $line . ' '; } else { $tmp = rtrim($tmp); $len = strlen($tmp); if (($len < 40) && ($tmp[strlen($tmp)-1] == ':') && (!strpos($tmp, '.'))) { $result .= "\n

    ". htmlspecialchars(substr($tmp, 0, strlen($tmp)-1), ENT_NOQUOTES) ."

    \n"; } else { $result .= "

    " . $this->_inlineXHTML($tmp, $parent) . "

    \n"; }; $tmp = ''; $mode = 0; }; break; case 21 : if ($line != '') { if (preg_match($is_dtdd, $line, $matches)) { $result .= $this->_inlineXHTML($tmp, $parent) . "
\n\n\t
" . $this->_inlineXHTML($matches[1], $parent) . "
\n\t
"; $tmp = $matches[2]; } else { $tmp .= ' ' . $line; }; } else { $result .= $this->_inlineXHTML($tmp, $parent) . "
\n\n
\n"; $tmp = ''; $mode = 0; }; break; case 31 : if ($line != '') { if (preg_match($is_ulli, $line, $matches)) { $result .= $this->_inlineXHTML($tmp, $parent) . "\n\t
  • "; $tmp = $matches[1]; } else { $tmp .= ' ' . $line; }; } else { $result .= $this->_inlineXHTML($tmp, $parent) . "
  • \n\n"; $tmp = ''; $mode = 0; }; break; case 40 : // Separate mode to allow empty lines in code blocks. if (preg_match($ends_code, $line)) { $result .= "\n"; $mode = 0; } else { $result .= htmlspecialchars($inline, ENT_NOQUOTES) ."\n"; }; break; case 41 : if ($line != '') { if (preg_match($is_code, $line, $matches)) { $result .= htmlspecialchars($matches[2], ENT_NOQUOTES) ."\n"; } else { // WARNING: Unspecified behavior! // We're a non-empty line following a code line. // Handling as code. $result .= htmlspecialchars($inline, ENT_NOQUOTES) ."\n"; }; } else { $result .= "\n"; $mode = 0; }; break; default : // This is never reached. break; }; }; // Don't forget to close anything left hanging. switch ($mode) { case 10 : if ($tmp != '') { $result .= "

    " . $this->_inlineXHTML($tmp, $parent) . "

    \n"; }; break; case 21 : $result .= $this->_inlineXHTML($tmp, $parent) . "\n\n\n"; break; case 31 : $result .= $this->_inlineXHTML($tmp, $parent) . "\n\n"; break; case 40 : case 41 : $result .= "\n"; break; default : // 0 or unknown = do nothing break; }; return($result); } // Method: parseFile // // Load a file from disk into this instance. Internally, this is a // wrapper around which calls PHP's file_get_contents() // for you. // // Parameters: // filename - The file path to read. // // Returns: // The result of . // function parseFile($filename) { $input == false; if (file_exists($filename)) $input = file_get_contents($filename); $this->cwd = dirname($filename) . '/'; return($this->parseString($input)); } // Method: parseString // // Load a source in the form of a string, into this instance. // // Parameters: // input - The string containing source code to parse for comments. // // Returns: // Always true as of this version. // function parseString($input) { $NDlite_kwd_class = '(class)|(structure)|(struct)|(package)|(namespace)|(interface)|(object)'; $NDlite_kwd_group = '(title)|(group)|(section)|(class)|(structure)|(struct)|(package)|(namespace)|(interface)|(file)|(object)'; $NDlite_kwd_child = '(property)|(method)|(callback)|(constructor)|(destructor)'; $NDlite_kwd_generic = '(function)|(procedure)|(routine)|(subroutine)|(constant)|(type)|(typedef)|(macro)|(define)|(variable)|(var)|(array)|(hash)|(string)|(handle)|(pointer)|(reference)|(topic)|(subtitle)'; $structure = array(); $idxComment = 0; $idxCode = 1; $starter = "/^[ \t\r\n]*(private[ \t]+)?(${NDlite_kwd_group}|${NDlite_kwd_child}|${NDlite_kwd_generic}):[ \t][^\r\n]+/i"; if (preg_match($starter, $input)) { // Text file detected. Crawl line by line to split into blocks. $lines = preg_split("/\r?\n/", $input); $current = array(); foreach ($lines as $line) { if (preg_match($starter, $line)) { while ((count($current) > 0) && ($current[0] == '')) array_shift($current); while ((count($current) > 0) && ($current[count($current)-1] == '')) array_pop($current); if (count($current) > 0) { array_push($structure, array($current, '')); }; $current = array($line); } else { array_push($current, $line); }; }; while ((count($current) > 0) && ($current[0] == '')) array_shift($current); while ((count($current) > 0) && ($current[count($current)-1] == '')) array_pop($current); if (count($current) > 0) { array_push($structure, array($current, '')); }; } else { preg_match_all($this->grabber, $input, $matches, PREG_SET_ORDER); foreach ($matches as $match) { array_push($structure, array($this->_stringToLines($match[$this->grabberCommentIndex]), $match[$this->grabberCodeIndex])); }; }; // echo "
    "; print_r($structure); echo "
    \n"; exit(); $this->source = array(); $lastClass = ''; foreach ($structure as $block) { if ($next = $block[$idxComment]) { // Parse special first line // CAUTION: Not using our NDlite_kwd regex here: we're at the // first line of a comment block, which must start in this // format. If something goes wrong, we fall in $valid=false // safely. preg_match("/[ \t]*((private)[ \t]?)?([^ :]+):[ \t]*([^ \t].*)/i",$next[0],$headers); array_shift($next); while ((count($next) > 0) && ($next[0] == '')) array_shift($next); $private = false; if ($headers[2] == 'Private' || $headers[2] == 'private') $private = true; $parent = ''; $valid = true; $kwd_type = NDLITE_K_GENERIC; // CAUTION: Using class, not group, to determine parents. if (preg_match("/^(${NDlite_kwd_class})\$/i", $headers[3])) { $lastClass = htmlspecialchars($headers[4]); $kwd_type = NDLITE_K_GROUP; } elseif (preg_match("/^(${NDlite_kwd_group})\$/i", $headers[3])) { $parent = $lastClass; $kwd_type = NDLITE_K_GROUP; } elseif (preg_match("/^(${NDlite_kwd_child})\$/i", $headers[3])) { $parent = $lastClass; $kwd_type = NDLITE_K_CHILD; } elseif (!preg_match("/^(${NDlite_kwd_generic})\$/i", $headers[3])) { $valid = false; // echo "DEBUG IGNORING: '${headers[3]}'\n"; }; if ($valid) array_push($this->source, array( 'private' => $private, 'type' => $headers[3], 'NDtype' => $kwd_type, 'id' => htmlspecialchars($headers[4]), 'parent' => $parent, 'lines' => $next, 'code' => trim($block[$idxCode]) )); }; }; // First block gets special treatment. $this->intro = array_shift($this->source); return(true); } // Method: guessTitle // // Guess the title of this instance. Only useful after parsing some // source. // // Parameters: // fallback - Alternative if no suitable candidate is found. (Optional.) // // Returns: // The first of "File", "Title", "Class" or "Group" title found, or your // fallback if none is found, or false if no fallback is provided. // function guessTitle($fallback = false) { $result = $fallback; if ($this->intro['NDtype'] == NDLITE_K_GROUP) { $result = $this->intro['id']; } else { foreach ($this->source as $block) { if ($block['NDtype'] == NDLITE_K_GROUP) { $result = $block['id']; break; }; }; }; return($result); } // Private Method: _linesToAbstract // // Extract the first sentence or paragraph from an array of lines. Used // for summary in . // // Parameters: // lines - Array of strings. // parent - Parent id for URL constructino purposes. (Optional.) // // Returns: // The summary string. // function _linesToAbstract($lines, $parent = '') { $result = ''; foreach ($lines as $line) { if (trim($line) == '') break; if (preg_match('/^(.*?[.;!?]+)([ \t]|$)/', $line, $matches)) { $result .= $matches[1]; break; } else { $result .= $line . ' '; }; }; return($this->_inlineXHTML(rtrim($result), $parent)); } // Method: toXHTML // // Produce XHTML documentation from the current source. It is up to your // application to jazz it up with CSS as you'd like. // // Parameters: // flags - Named array of options. (Optional.) // // Valid flags: // private - Set true to include private topics, false or omit to limit // display to regular topics only. // summary - Set false to explicitly request omitting the summary. // // Returns: // The XHTML string ready to display or save. // function toXHTML($flags = false) { $result = ''; $flag_private = false; $flag_summary = true; if (is_array($flags)) { if (array_key_exists('private',$flags)) $flag_private = $flags['private']; if (array_key_exists('summary',$flags)) $flag_summary = $flags['summary']; }; // Introduction // if ($this->intro['code'] != '') { // $result .= "" . $this->intro['code'] . "\n\n"; // }; $result .= $this->_linesToXHTML($this->intro['lines'], $this->intro['parent']); // Table of contents if ($flag_summary) { $result .= "\n

    Summary

    \n\n
      \n"; $ingroup = false; $odd = true; foreach($this->source as $block) { if ($block['private'] && !$flag_private) continue; if ($block['NDtype'] == NDLITE_K_GROUP) { if ($ingroup) { $result .= "\t\t
    \n\t\n"; } else { $ingroup = true; }; $odd = true; // Reset zebra hint $result .= "\t
  • ${block['id']} ". $this->_linesToAbstract($block['lines'], $block['parent']) ."\n\t\t
      \n"; } else { $result .= ($ingroup ? "\t" : '') . "\t${block['id']} ". $this->_linesToAbstract($block['lines'], $block['parent']) ."\n"; $odd = !$odd; }; }; if ($ingroup) $result .= "\t
  • \n"; $result .= "\n"; }; // Actual contents foreach($this->source as $block) { if ($block['private'] && !$flag_private) continue; if ($block['NDtype'] == NDLITE_K_GROUP) { $result .= "\n

    ${block['id']}

    \n\n"; } else { $result .= "\n

    ${block['id']}

    \n\n"; if ($block['code'] != '') { $result .= "" . $block['code'] . "\n\n"; }; }; $result .= $this->_linesToXHTML($block['lines'], $block['parent']); }; return($result); } } ?>