commit 0843d76b495769fb1929af9cda136efa86d9ff6b Author: speedie Date: Mon Jan 30 15:26:06 2023 +0100 add speedie.gq wiki diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f7bd968 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2013 Steven Frank + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/config.php b/config.php new file mode 100644 index 0000000..5c31aaa --- /dev/null +++ b/config.php @@ -0,0 +1,173 @@ + + * Code may be re-used as long as the above copyright notice is retained. + * See README.txt for full details. + * + * Written with Coda: + * + */ + +// -------------------- +// Site layout settings +// -------------------- + +// BASE_PATH +// +// The base system path to W2. You only need to change this if we guess wrong. +// You should not use a trailing slash. + +define('BASE_PATH', getcwd()); + +// PAGES_PATH +// +// The path to the raw text documents maintained by W2 +// You should not use a trailing slash. + +define('PAGES_PATH', BASE_PATH . '/pages'); + +// BASE_URI +// +// The base URI for this W2 installation. You only need to change this if we guess wrong. +// You should not use a trailing slash. + +define('BASE_URI', str_replace('/index.php', '', $_SERVER['SCRIPT_NAME'])); + +// SELF +// +// The path component of the URL to the main script, such as: /w2/index.php + +define('SELF', $_SERVER['SCRIPT_NAME']); + +// VIEW +// +// Needed only if your web server spawns PHP as a CGI instead of an internal module. +// For example: define('VIEW', '?action=view&page='); + +define('VIEW', ''); + +// DEFAULT_PAGE +// +// The name of the page to show as the "Home" page. +// Value is a string, the title of a page (case-sensitive!) + +define('DEFAULT_PAGE', 'Home'); + +// CSS_FILE +// +// The CSS file to load to style the wiki, relative to BASE_URI + +define('CSS_FILE', 'index.css'); + +// -------------------- +// File upload settings +// -------------------- + +// DISABLE_UPLOADS +// +// Globally enable/disable file uploads + +define('DISABLE_UPLOADS', false); + +// VALID_UPLOAD_TYPES +// +// Acceptable file types for file uploads. This is a good idea for security. +// Value is a comma-separated string of MIME types. + +define('VALID_UPLOAD_TYPES', 'image/jpeg,image/pjpeg,image/png,image/gif,application/pdf,application/zip,application/x-diskcopy'); + +// VALID_UPLOAD_EXTS +// +// Acceptable filename extensions for file uploads +// Value is a comma-separated string of filename extensions (case-sensitive!) + +define('VALID_UPLOAD_EXTS', 'jpg,jpeg,png,gif,pdf,zip,dmg'); + +// ------------------ +// Interface settings +// ------------------ + +// TITLE_DATE +// +// The format to use when displaying page modification times. +// See the manual for the PHP 'date()' function for the specification: +// http://php.net/manual/en/function.date.php + +define('TITLE_DATE', 'j-M-Y g:i A'); +define('TITLE_DATE_NO_TIME', 'j-M-Y'); + +// EDIT_ROWS +// +// Default size of the text editing area in text rows. + +define('EDIT_ROWS', 18); + +// AUTOLINK_PAGE_TITLES +// +// Automatically converts any page titles appearing in text into links +// to the named page. This might degrade performance if you have many +// thousands of pages. + +define('AUTOLINK_PAGE_TITLES', false); + +// COLORIZE_MISSING_PAGES +// +// Automatically highlights as red links, any linked pages which are +// not yet written. Existing but blank pages are not colorized. This +// might degrade performance if you have thousands of links on a page. + +define('COLORIZE_MISSING_PAGES', true); + +// ----------------------------- +// Security and session settings +// ----------------------------- + +// REQUIRE_PASSWORD +// +// Is a password required to access this wiki? + +define('REQUIRE_PASSWORD', false); + +// W2_PASSWORD +// +// The password for the wiki, if REQUIRE_PASSWORD is true +// Replace 'secret' with your password to set your password. + +define('W2_PASSWORD', 'secret'); + +// W2_PASSWORD_HASH +// +// Alternate (more secure) password storage. +// To use a hashed password, Comment out the W2_PASSWORD definition above and uncomment +// this one, using the result of sha1('your_password') as the value. +// +// In Mac OS X, you can do this from the Terminal: +// echo -n 'your_password' | openssl sha1 +// +// define('W2_PASSWORD_HASH', 'e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4'); + +define('W2_PASSWORD_HASH', ''); + +// allowedIPs +// +// A whitelist of IP addresses that are allowed access to the wiki. +// If empty, all IPs are allowed. + +$allowedIPs = array(); + +// W2_SESSION_LIFETIME +// +// How long before a login session expires? Default is 30 days + +define('W2_SESSION_LIFETIME', 60 * 60 * 24 * 30); + +// W2_SESSION_NAME +// +// Name for session (used in the cookie) + +define('W2_SESSION_NAME', 'W2'); + +?> diff --git a/index.css b/index.css new file mode 100644 index 0000000..0024ed2 --- /dev/null +++ b/index.css @@ -0,0 +1,184 @@ +/* + * W2 + * + * Copyright (C) 2007-2011 Steven Frank + * + * Code may be re-used as long as the above copyright notice is retained. + * See README.txt for full details. + * + * Written with Coda: + * + */ + +.main { + background-color: #000000; + color: #ffffff; +} + +body { + background-color: #000000; +} + +* { + font-family: Monospace; + font-size: 11px; + margin: 0; + padding: 0; +} + +a { + color: #6666ff; +} + +blockquote { + background-color: #0f070f; + margin: 0px 0px 12px 20px; + padding-top: 4px 10px 4px 10px; +} + +form { + display: inline; +} + +li { + line-height: 1.5em; +} + +h1 { + font-size: 20px; + font-weight: bold; + margin-bottom: 8px; +} + +h2 { + font-size: 16px; + font-weight: bold; + margin-bottom: 8px; +} + +h3 { + font-size: 13px; + font-weight: bold; + margin-bottom: 8px; +} + +hr { + border-top: 1px solid #222222; + border-bottom: 0; + border-left: 0; + border-right: 0; + margin-top: 12px; + margin-bottom: 12px; +} + +ul { + padding-left: 12px; + margin-left: 12px; + margin-bottom: 12px; + list-style-position: inside; +} + +ol { + margin-left: 12px; + margin-bottom: 12px; + padding-left: 12px; + list-style-position: inside; +} + +p { + margin: 12px 12px 12px 12px; +} + +pre { + font-family: Monospace, monospace; + font-size: 11px; + margin-left: 12px; + margin-bottom: 12px; + + /* stroke each browser so that they wrap lines in the pre tag */ + white-space: pre-wrap; /* css-3 */ + white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + word-wrap: break-word; /* Internet Explorer 5.5+ */ +} + +code { + font-family: Monospace, monospace; + font-size: 11px; +} + +table { + border-collapse: collapse; +} + +table, td { + background-color: #000000; + padding: 2px; +} + +textarea { + font-family: Monospace, Courier, monospace; + font-size: 11px; + width: 99%; + background-color: #000011; + color: #ffffff; +} + +.main { + padding: 8px; +} + +.note { + background-color: #000011; + padding: 4px; + margin: 0 8px 8px 8px; + color: #9999bb; + border: solid 1px #9999bb; +} + +.titlebar { + background-color: #111111; + color: #ffffff; + font-size: 10px; + font-weight: bold; +} + +.titledate { + font-size: 10px; + color: #777777; +} + +.toolbar { + background-color: #222222; + position: sticky; + top: 0; + font-size: 8px; +} + +a.tool { + font-size: 11px; + margin-right: 8px; + color: #eeeeee; +} + +a.missing-link { + color: #ed123e; +} + +input { + background-color: #111111; + color: #ffffff; + +} + +input.tool { + font-size: 11px; + background-color: #222222; + color: #ffffff; +} + +img { + max-width: 50%; + height: auto; +} diff --git a/index.php b/index.php new file mode 100644 index 0000000..cc6e599 --- /dev/null +++ b/index.php @@ -0,0 +1,555 @@ + + * + * Code may be re-used as long as the above copyright notice is retained. + * See README.txt for full details. + * + * Written with Coda: + * + */ + +// Install PSR-4-compatible class autoloader +spl_autoload_register(function($class){ + require str_replace('\\', DIRECTORY_SEPARATOR, ltrim($class, '\\')).'.php'; +}); + + +// Get Markdown class +use mdlibs\MarkdownExtra; + + +// User configurable options: + +include_once "config.php"; + +ini_set('session.gc_maxlifetime', W2_SESSION_LIFETIME); + +session_set_cookie_params(W2_SESSION_LIFETIME); +session_name(W2_SESSION_NAME); +session_start(); + +if ( count($allowedIPs) > 0 ) +{ + $ip = $_SERVER['REMOTE_ADDR']; + $accepted = false; + + foreach ( $allowedIPs as $allowed ) + { + if ( strncmp($allowed, $ip, strlen($allowed)) == 0 ) + { + $accepted = true; + break; + } + } + + if ( !$accepted ) + { + print "Access from IP address $ip is not allowed"; + print ""; + exit; + } +} + +if ( REQUIRE_PASSWORD && !isset($_SESSION['password']) ) +{ + if ( !defined('W2_PASSWORD_HASH') || W2_PASSWORD_HASH == '' ) + define('W2_PASSWORD_HASH', sha1(W2_PASSWORD)); + + if ( (isset($_POST['p'])) && (sha1($_POST['p']) == W2_PASSWORD_HASH) ) + $_SESSION['password'] = W2_PASSWORD_HASH; + else + { + print "\n"; + print "\n"; + print "\n"; + print ""; + print "\n"; + + print "\n"; + print "Log In\n"; + print "\n"; + print "
"; + print "\n"; + print "
"; + print ""; + exit; + } +} + +// Support functions + +function _handle_links($match) +{ + $link = $match[1]; + if ( COLORIZE_MISSING_PAGES ) { + $link_page = sanitizeFilename($link); + $link_filename = PAGES_PATH . "/$link_page.txt"; + $link_page_exists = file_exists($link_filename); + } else { + $link_page_exists = true; + } + if ($link_page_exists) + return "" . htmlentities($link) . ""; + else + return "" . htmlentities($link) . ""; +} + + +function _handle_images($match) +{ + return "\"""; +} + + +function _handle_message($match) +{ + return "[email]"; +} + + +function printToolbar() +{ + global $upage, $page, $action; + + print "
"; + print "Edit "; + print "New "; + + if ( !DISABLE_UPLOADS ) + print "Upload "; + + print "All "; + print "Recent "; + print "". DEFAULT_PAGE . ""; + + if ( REQUIRE_PASSWORD ) + print 'Exit'; + + print "
\n"; + print "
\n"; + + print "
\n"; +} + + +function descLengthSort($val_1, $val_2) +{ + $retVal = 0; + + $firstVal = strlen($val_1); + $secondVal = strlen($val_2); + + if ( $firstVal > $secondVal ) + $retVal = -1; + + else if ( $firstVal < $secondVal ) + $retVal = 1; + + return $retVal; +} + + +function toHTML($inText) +{ + global $page; + + $inText = preg_replace("/<[\/]*script>/", "", $inText); + + $dir = opendir(PAGES_PATH); + while ( $filename = readdir($dir) ) + { + if ( $filename[0] == '.' ) + continue; + + $filename = preg_replace("/(.*?)\.txt/", "\\1", $filename); + $filenames[] = $filename; + } + closedir($dir); + + uasort($filenames, "descLengthSort"); + + if ( AUTOLINK_PAGE_TITLES ) + { + foreach ( $filenames as $filename ) + { + $inText = preg_replace("/(?\[\/])($filename)(?!\]\>)/im", "\\1", $inText); + } + } + + $inText = preg_replace_callback("/\[\[(.*?)\]\]/", '_handle_links', $inText); + $inText = preg_replace_callback("/\{\{(.*?)\}\}/", '_handle_images', $inText); + $inText = preg_replace_callback("/message:(.*?)\s/", '_handle_message', $inText); + + $html = MarkdownExtra::defaultTransform($inText); + $inText = htmlentities($inText); + + return $html; +} + +function sanitizeFilename($inFileName) +{ + return str_replace(array('..', '~', '/', '\\', ':'), '-', $inFileName); +} + +function destroy_session() +{ + if ( isset($_COOKIE[session_name()]) ) + setcookie(session_name(), '', time() - 42000, '/'); + + session_destroy(); + unset($_SESSION["password"]); + unset($_SESSION); +} + +// Support PHP4 by defining file_put_contents if it doesn't already exist + +if ( !function_exists('file_put_contents') ) +{ + function file_put_contents($n, $d) + { + $f = @fopen($n, "w"); + + if ( !$f ) + { + return false; + } + else + { + fwrite($f, $d); + fclose($f); + return true; + } + } +} +// Support PHP 8.1 by setting two predefined variables to empty strings if +// not already defined. Fixes a bunch of deprecation warnings. + +if (!isset($_SERVER["PATH_INFO"])) + $_SERVER["PATH_INFO"] = ''; +if (!isset($_REQUEST['page'])) + $_REQUEST['page'] = ''; + + +// Main code + +if ( isset($_REQUEST['action']) ) + $action = $_REQUEST['action']; +else + $action = 'view'; + +// Look for page name following the script name in the URL, like this: +// http://stevenf.com/w2demo/index.php/Markdown%20Syntax +// +// Otherwise, get page name from 'page' request variable. + +if ( preg_match('@^/@', @$_SERVER["PATH_INFO"]) ) + $page = sanitizeFilename(substr($_SERVER["PATH_INFO"], 1)); +else + $page = sanitizeFilename(@$_REQUEST['page']); + +$upage = urlencode($page); + +if ( $page == "" ) + $page = DEFAULT_PAGE; + +$filename = PAGES_PATH . "/$page.txt"; + +if ( file_exists($filename) ) +{ + $text = file_get_contents($filename); +} +else +{ + if ( $action != "save" && $action != "all_name" && $action != "all_date" && $action != "upload" && $action != "new" && $action != "logout" && $action != "uploaded" && $action != "search" && $action != "view" ) + { + $action = "edit"; + } +} + +if ( $action == "edit" || $action == "new" ) +{ + $formAction = SELF . (($action == 'edit') ? "/$page" : ""); + $html = "
\n"; + + if ( $action == "edit" ) + $html .= "\n"; + else + $html .= "

Title:

\n"; + + if ( $action == "new" ) + $text = ""; + + $html .= "

\n"; + $html .= "

"; + $html .= "\n"; + $html .= "

\n"; + $html .= "
\n"; +} +else if ( $action == "logout" ) +{ + destroy_session(); + header("Location: " . SELF); + exit; +} +else if ( $action == "upload" ) +{ + if ( DISABLE_UPLOADS ) + { + $html = "

Image uploading has been disabled on this installation.

"; + } + else + { + $html = "

\n"; + $html .= ""; + $html .= "\n"; + $html .= "\n"; + $html .= "\n"; + $html .= "

\n"; + } +} +else if ( $action == "uploaded" ) +{ + if ( !DISABLE_UPLOADS ) + { + $dstName = sanitizeFilename($_FILES['userfile']['name']); + $fileType = $_FILES['userfile']['type']; + preg_match('/\.([^.]+)$/', $dstName, $matches); + $fileExt = isset($matches[1]) ? $matches[1] : null; + + if (in_array($fileType, explode(',', VALID_UPLOAD_TYPES)) && + in_array($fileExt, explode(',', VALID_UPLOAD_EXTS))) + { + $errLevel = error_reporting(0); + + if ( move_uploaded_file($_FILES['userfile']['tmp_name'], + BASE_PATH . "/images/$dstName") === true ) + { + $html = "

File '$dstName' uploaded

\n"; + } + else + { + $html = "

Upload error

\n"; + } + + error_reporting($errLevel); + } else { + $html = "

Upload error: invalid file type

\n"; + } + } + + $html .= toHTML($text); +} +else if ( $action == "save" ) +{ + $newText = $_REQUEST['newText']; + + $errLevel = error_reporting(0); + $success = file_put_contents($filename, $newText); + error_reporting($errLevel); + + if ( $success ) + $html = "

Saved

\n"; + else + $html = "

Error saving changes! Make sure your web server has write access to " . PAGES_PATH . "

\n"; + + $html .= toHTML($newText); +} +/* +else if ( $action == "rename" ) +{ + $html = "
"; + $html .= "

Title: "; + $html .= ""; + $html .= "\n"; + $html .= ""; + $html .= ""; + $html .= "

"; +} +else if ( $action == "renamed" ) +{ + $pp = $_REQUEST['prevpage']; + $pg = $_REQUEST['page']; + + $prevpage = sanitizeFilename($pp); + $prevpage = urlencode($prevpage); + + $prevfilename = PAGES_PATH . "/$prevpage.txt"; + + if ( rename($prevfilename, $filename) ) + { + // Success. Change links in all pages to point to new page + if ( $dh = opendir(PAGES_PATH) ) + { + while ( ($file = readdir($dh)) !== false ) + { + $content = file_get_contents($file); + $pattern = "/\[\[" . $pp . "\]\]/g"; + preg_replace($pattern, "[[$pg]]", $content); + file_put_contents($file, $content); + } + } + } + else + { + $html = "

Error renaming file

\n"; + } +} +*/ +else if ( $action == "all_name" ) +{ + $dir = opendir(PAGES_PATH); + $filelist = array(); + + $color = "#ffffff"; + + while ( $file = readdir($dir) ) + { + if ( $file[0] == "." ) + continue; + + $afile = preg_replace("/(.*?)\.txt/", "\\1", $file); + $efile = preg_replace("/(.*?)\.txt/", "edit", urlencode($file)); + + array_push($filelist, "$afile$efile"); + + if ( $color == "#ffffff" ) + $color = "#f4f4f4"; + else + $color = "#ffffff"; + } + + closedir($dir); + + natcasesort($filelist); + + $html = ""; + + + for ($i = 0; $i < count($filelist); $i++) + { + $html .= $filelist[$i]; + } + + $html .= "
\n"; +} +else if ( $action == "all_date" ) +{ + $html = "\n"; + $dir = opendir(PAGES_PATH); + $filelist = array(); + while ( $file = readdir($dir) ) + { + if ( $file[0] == "." ) + continue; + + $filelist[preg_replace("/(.*?)\.txt/", "\\1", $file)] = filemtime(PAGES_PATH . "/$file"); + } + + closedir($dir); + + $color = "#ffffff"; + arsort($filelist, SORT_NUMERIC); + + foreach ($filelist as $key => $value) + { + $html .= "\n"; + + if ( $color == "#ffffff" ) + $color = "#f4f4f4"; + else + $color = "#ffffff"; + } + $html .= "
$key" . date(TITLE_DATE_NO_TIME, $value) . "
\n"; +} +else if ( $action == "search" ) +{ + $matches = 0; + $q = $_REQUEST['q']; + $html = "

Search: $q

\n
    \n"; + + if ( trim($q) != "" ) + { + $dir = opendir(PAGES_PATH); + + while ( $file = readdir($dir) ) + { + if ( $file[0] == "." ) + continue; + + $text = file_get_contents(PAGES_PATH . "/$file"); + + if ( preg_match("/{$q}/i", $text) || preg_match("/{$q}/i", $file) ) + { + ++$matches; + $file = preg_replace("/(.*?)\.txt/", "\\1", $file); + $html .= "
  • $file
  • \n"; + } + } + + closedir($dir); + } + + $html .= "
\n"; + $html .= "

$matches matched

\n"; +} +else +{ + $html = toHTML($text); +} + +$datetime = ''; + +if ( ($action == "all_name") || ($action == "all_date")) + $title = "All Pages"; + +else if ( $action == "upload" ) + $title = "Upload Image"; + +else if ( $action == "new" ) + $title = "New"; + +else if ( $action == "search" ) + $title = "Search"; + +else +{ + $title = $page; + + if ( TITLE_DATE ) + { + $datetime = "" . date(TITLE_DATE, @filemtime($filename)) . ""; + } +} + +// Disable caching on the client (the iPhone is pretty agressive about this +// and it can cause problems with the editing function) + +header("Cache-Control: no-cache, must-revalidate"); // HTTP/1.1 +header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past + +print "\n"; +print "\n"; +print "\n"; +print ""; +print "\n"; + +print "\n"; +print "$title\n"; +print "\n"; +print "\n"; +print "
$title $datetime
\n"; + +printToolbar(); + +print "
\n"; +print "$html\n"; +print "
\n"; + +print "\n"; +print "\n"; + +?> diff --git a/mdlibs/Markdown.inc.php b/mdlibs/Markdown.inc.php new file mode 100644 index 0000000..e2bd380 --- /dev/null +++ b/mdlibs/Markdown.inc.php @@ -0,0 +1,10 @@ + + * @copyright 2004-2019 Michel Fortin + * @copyright (Original Markdown) 2004-2006 John Gruber + */ + +namespace mdlibs; + +/** + * Markdown Parser Class + */ +class Markdown implements MarkdownInterface { + /** + * Define the package version + * @var string + */ + const MARKDOWNLIB_VERSION = "2.0"; + + /** + * Simple function interface - Initialize the parser and return the result + * of its transform method. This will work fine for derived classes too. + * + * @api + * + * @param string $text + * @return string + */ + public static function defaultTransform($text) { + // Take parser class on which this function was called. + $parser_class = static::class; + + // Try to take parser from the static parser list + static $parser_list; + $parser =& $parser_list[$parser_class]; + + // Create the parser it not already set + if (!$parser) { + $parser = new $parser_class; + } + + // Transform text using parser. + return $parser->transform($text); + } + + /** + * Configuration variables + */ + /** + * Change to ">" for HTML output. + */ + public $empty_element_suffix = " />"; + + /** + * The width of indentation of the output markup + */ + public $tab_width = 4; + + /** + * Change to `true` to disallow markup or entities. + */ + public $no_markup = false; + public $no_entities = false; + + + /** + * Change to `true` to enable line breaks on \n without two trailling spaces + * @var boolean + */ + public $hard_wrap = false; + + /** + * Predefined URLs and titles for reference links and images. + */ + public $predef_urls = array(); + public $predef_titles = array(); + + /** + * Optional filter function for URLs + * @var callable|null + */ + public $url_filter_func = null; + + /** + * Optional header id="" generation callback function. + * @var callable|null + */ + public $header_id_func = null; + + /** + * Optional function for converting code block content to HTML + * @var callable|null + */ + public $code_block_content_func = null; + + /** + * Optional function for converting code span content to HTML. + * @var callable|null + */ + public $code_span_content_func = null; + + /** + * Class attribute to toggle "enhanced ordered list" behaviour + * setting this to true will allow ordered lists to start from the index + * number that is defined first. + * + * For example: + * 2. List item two + * 3. List item three + * + * Becomes: + *
    + *
  1. List item two
  2. + *
  3. List item three
  4. + *
+ */ + public $enhanced_ordered_list = false; + + /** + * Parser implementation + */ + /** + * Regex to match balanced [brackets]. + * Needed to insert a maximum bracked depth while converting to PHP. + */ + protected $nested_brackets_depth = 6; + protected $nested_brackets_re; + + protected $nested_url_parenthesis_depth = 4; + protected $nested_url_parenthesis_re; + + /** + * Table of hash values for escaped characters: + */ + protected $escape_chars = '\`*_{}[]()>#+-.!'; + protected $escape_chars_re; + + /** + * Constructor function. Initialize appropriate member variables. + * @return void + */ + public function __construct() { + $this->_initDetab(); + $this->prepareItalicsAndBold(); + + $this->nested_brackets_re = + str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). + str_repeat('\])*', $this->nested_brackets_depth); + + $this->nested_url_parenthesis_re = + str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). + str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); + + $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; + + // Sort document, block, and span gamut in ascendent priority order. + asort($this->document_gamut); + asort($this->block_gamut); + asort($this->span_gamut); + } + + + /** + * Internal hashes used during transformation. + */ + protected $urls = array(); + protected $titles = array(); + protected $html_hashes = array(); + + /** + * Status flag to avoid invalid nesting. + */ + protected $in_anchor = false; + + /** + * Status flag to avoid invalid nesting. + */ + protected $in_emphasis_processing = false; + + /** + * Called before the transformation process starts to setup parser states. + * @return void + */ + protected function setup() { + // Clear global hashes. + $this->urls = $this->predef_urls; + $this->titles = $this->predef_titles; + $this->html_hashes = array(); + $this->in_anchor = false; + $this->in_emphasis_processing = false; + } + + /** + * Called after the transformation process to clear any variable which may + * be taking up memory unnecessarly. + * @return void + */ + protected function teardown() { + $this->urls = array(); + $this->titles = array(); + $this->html_hashes = array(); + } + + /** + * Main function. Performs some preprocessing on the input text and pass + * it through the document gamut. + * + * @api + * + * @param string $text + * @return string + */ + public function transform($text) { + $this->setup(); + + # Remove UTF-8 BOM and marker character in input, if present. + $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); + + # Standardize line endings: + # DOS to Unix and Mac to Unix + $text = preg_replace('{\r\n?}', "\n", $text); + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = $this->detab($text); + + # Turn block-level HTML blocks into hash entries + $text = $this->hashHTMLBlocks($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ ]*\n+/ . + $text = preg_replace('/^[ ]+$/m', '', $text); + + # Run document gamut methods. + foreach ($this->document_gamut as $method => $priority) { + $text = $this->$method($text); + } + + $this->teardown(); + + return $text . "\n"; + } + + /** + * Define the document gamut + */ + protected $document_gamut = array( + // Strip link definitions, store in hashes. + "stripLinkDefinitions" => 20, + "runBasicBlockGamut" => 30, + ); + + /** + * Strips link definitions from text, stores the URLs and titles in + * hash references + * @param string $text + * @return string + */ + protected function stripLinkDefinitions($text) { + + $less_than_tab = $this->tab_width - 1; + + // Link defs are in the form: ^[id]: url "optional title" + $text = preg_replace_callback('{ + ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + (?: + <(.+?)> # url = $2 + | + (\S+?) # url = $3 + ) + [ ]* + \n? # maybe one newline + [ ]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.*?) # title = $4 + [")] + [ ]* + )? # title is optional + (?:\n+|\Z) + }xm', + array($this, '_stripLinkDefinitions_callback'), + $text + ); + return $text; + } + + /** + * The callback to strip link definitions + * @param array $matches + * @return string + */ + protected function _stripLinkDefinitions_callback($matches) { + $link_id = strtolower($matches[1]); + $url = $matches[2] == '' ? $matches[3] : $matches[2]; + $this->urls[$link_id] = $url; + $this->titles[$link_id] =& $matches[4]; + return ''; // String that will replace the block + } + + /** + * Hashify HTML blocks + * @param string $text + * @return string + */ + protected function hashHTMLBlocks($text) { + if ($this->no_markup) { + return $text; + } + + $less_than_tab = $this->tab_width - 1; + + /** + * Hashify HTML blocks: + * + * We only want to do this for block-level HTML tags, such as headers, + * lists, and tables. That's because we still want to wrap

s around + * "paragraphs" that are wrapped in non-block-level tags, such as + * anchors, phrase emphasis, and spans. The list of tags we're looking + * for is hard-coded: + * + * * List "a" is made of tags which can be both inline or block-level. + * These will be treated block-level when the start tag is alone on + * its line, otherwise they're not matched here and will be taken as + * inline later. + * * List "b" is made of tags which are always block-level; + */ + $block_tags_a_re = 'ins|del'; + $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. + 'script|noscript|style|form|fieldset|iframe|math|svg|'. + 'article|section|nav|aside|hgroup|header|footer|'. + 'figure|details|summary'; + + // Regular expression for the content of a block tag. + $nested_tags_level = 4; + $attr = ' + (?> # optional tag attributes + \s # starts with whitespace + (?> + [^>"/]+ # text outside quotes + | + /+(?!>) # slash not followed by ">" + | + "[^"]*" # text inside double quotes (tolerate ">") + | + \'[^\']*\' # text inside single quotes (tolerate ">") + )* + )? + '; + $content = + str_repeat(' + (?> + [^<]+ # content without tag + | + <\2 # nested opening tag + '.$attr.' # attributes + (?> + /> + | + >', $nested_tags_level). // end of opening tag + '.*?'. // last level nested tag content + str_repeat(' + # closing nested tag + ) + | + <(?!/\2\s*> # other tags with a different name + ) + )*', + $nested_tags_level); + $content2 = str_replace('\2', '\3', $content); + + /** + * First, look for nested blocks, e.g.: + *

+ *
+ * tags for inner block must be indented. + *
+ *
+ * + * The outermost tags must start at the left margin for this to match, + * and the inner nested divs must be indented. + * We need to do this before the next, more liberal match, because the + * next match will start at the first `
` and stop at the + * first `
`. + */ + $text = preg_replace_callback('{(?> + (?> + (?<=\n) # Starting on its own line + | # or + \A\n? # the at beginning of the doc + ) + ( # save in $1 + + # Match from `\n` to `\n`, handling nested tags + # in between. + + [ ]{0,'.$less_than_tab.'} + <('.$block_tags_b_re.')# start tag = $2 + '.$attr.'> # attributes followed by > and \n + '.$content.' # content, support nesting + # the matching end tag + [ ]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + + | # Special version for tags of group a. + + [ ]{0,'.$less_than_tab.'} + <('.$block_tags_a_re.')# start tag = $3 + '.$attr.'>[ ]*\n # attributes followed by > + '.$content2.' # content, support nesting + # the matching end tag + [ ]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + + | # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + + [ ]{0,'.$less_than_tab.'} + <(hr) # start tag = $2 + '.$attr.' # attributes + /?> # the matching end tag + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # Special case for standalone HTML comments: + + [ ]{0,'.$less_than_tab.'} + (?s: + + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # PHP and ASP-style processor instructions ( + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + ) + )}Sxmi', + array($this, '_hashHTMLBlocks_callback'), + $text + ); + + return $text; + } + + /** + * The callback for hashing HTML blocks + * @param string $matches + * @return string + */ + protected function _hashHTMLBlocks_callback($matches) { + $text = $matches[1]; + $key = $this->hashBlock($text); + return "\n\n$key\n\n"; + } + + /** + * Called whenever a tag must be hashed when a function insert an atomic + * element in the text stream. Passing $text to through this function gives + * a unique text-token which will be reverted back when calling unhash. + * + * The $boundary argument specify what character should be used to surround + * the token. By convension, "B" is used for block elements that needs not + * to be wrapped into paragraph tags at the end, ":" is used for elements + * that are word separators and "X" is used in the general case. + * + * @param string $text + * @param string $boundary + * @return string + */ + protected function hashPart($text, $boundary = 'X') { + // Swap back any tag hash found in $text so we do not have to `unhash` + // multiple times at the end. + $text = $this->unhash($text); + + // Then hash the block. + static $i = 0; + $key = "$boundary\x1A" . ++$i . $boundary; + $this->html_hashes[$key] = $text; + return $key; // String that will replace the tag. + } + + /** + * Shortcut function for hashPart with block-level boundaries. + * @param string $text + * @return string + */ + protected function hashBlock($text) { + return $this->hashPart($text, 'B'); + } + + /** + * Define the block gamut - these are all the transformations that form + * block-level tags like paragraphs, headers, and list items. + */ + protected $block_gamut = array( + "doHeaders" => 10, + "doHorizontalRules" => 20, + "doLists" => 40, + "doCodeBlocks" => 50, + "doBlockQuotes" => 60, + ); + + /** + * Run block gamut tranformations. + * + * We need to escape raw HTML in Markdown source before doing anything + * else. This need to be done for each block, and not only at the + * begining in the Markdown function since hashed blocks can be part of + * list items and could have been indented. Indented blocks would have + * been seen as a code block in a previous pass of hashHTMLBlocks. + * + * @param string $text + * @return string + */ + protected function runBlockGamut($text) { + $text = $this->hashHTMLBlocks($text); + return $this->runBasicBlockGamut($text); + } + + /** + * Run block gamut tranformations, without hashing HTML blocks. This is + * useful when HTML blocks are known to be already hashed, like in the first + * whole-document pass. + * + * @param string $text + * @return string + */ + protected function runBasicBlockGamut($text) { + + foreach ($this->block_gamut as $method => $priority) { + $text = $this->$method($text); + } + + // Finally form paragraph and restore hashed blocks. + $text = $this->formParagraphs($text); + + return $text; + } + + /** + * Convert horizontal rules + * @param string $text + * @return string + */ + protected function doHorizontalRules($text) { + return preg_replace( + '{ + ^[ ]{0,3} # Leading space + ([-*_]) # $1: First marker + (?> # Repeated marker group + [ ]{0,2} # Zero, one, or two spaces. + \1 # Marker character + ){2,} # Group repeated at least twice + [ ]* # Tailing spaces + $ # End of line. + }mx', + "\n".$this->hashBlock("empty_element_suffix")."\n", + $text + ); + } + + /** + * These are all the transformations that occur *within* block-level + * tags like paragraphs, headers, and list items. + */ + protected $span_gamut = array( + // Process character escapes, code spans, and inline HTML + // in one shot. + "parseSpan" => -30, + // Process anchor and image tags. Images must come first, + // because ![foo][f] looks like an anchor. + "doImages" => 10, + "doAnchors" => 20, + // Make links out of things like `` + // Must come after doAnchors, because you can use < and > + // delimiters in inline links like [this](). + "doAutoLinks" => 30, + "encodeAmpsAndAngles" => 40, + "doItalicsAndBold" => 50, + "doHardBreaks" => 60, + ); + + /** + * Run span gamut transformations + * @param string $text + * @return string + */ + protected function runSpanGamut($text) { + foreach ($this->span_gamut as $method => $priority) { + $text = $this->$method($text); + } + + return $text; + } + + /** + * Do hard breaks + * @param string $text + * @return string + */ + protected function doHardBreaks($text) { + if ($this->hard_wrap) { + return preg_replace_callback('/ *\n/', + array($this, '_doHardBreaks_callback'), $text); + } else { + return preg_replace_callback('/ {2,}\n/', + array($this, '_doHardBreaks_callback'), $text); + } + } + + /** + * Trigger part hashing for the hard break (callback method) + * @param array $matches + * @return string + */ + protected function _doHardBreaks_callback($matches) { + return $this->hashPart("empty_element_suffix\n"); + } + + /** + * Turn Markdown link shortcuts into XHTML tags. + * @param string $text + * @return string + */ + protected function doAnchors($text) { + if ($this->in_anchor) { + return $text; + } + $this->in_anchor = true; + + // First, handle reference-style links: [link text] [id] + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + ('.$this->nested_brackets_re.') # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }xs', + array($this, '_doAnchors_reference_callback'), $text); + + // Next, inline-style links: [link text](url "optional title") + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + ('.$this->nested_brackets_re.') # link text = $2 + \] + \( # literal paren + [ \n]* + (?: + <(.+?)> # href = $3 + | + ('.$this->nested_url_parenthesis_re.') # href = $4 + ) + [ \n]* + ( # $5 + ([\'"]) # quote char = $6 + (.*?) # Title = $7 + \6 # matching quote + [ \n]* # ignore any spaces/tabs between closing quote and ) + )? # title is optional + \) + ) + }xs', + array($this, '_doAnchors_inline_callback'), $text); + + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link text][1] + // or [link text](/foo) + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + ([^\[\]]+) # link text = $2; can\'t contain [ or ] + \] + ) + }xs', + array($this, '_doAnchors_reference_callback'), $text); + + $this->in_anchor = false; + return $text; + } + + /** + * Callback method to parse referenced anchors + * @param array $matches + * @return string + */ + protected function _doAnchors_reference_callback($matches) { + $whole_match = $matches[1]; + $link_text = $matches[2]; + $link_id =& $matches[3]; + + if ($link_id == "") { + // for shortcut links like [this][] or [this]. + $link_id = $link_text; + } + + // lower-case and turn embedded newlines into spaces + $link_id = strtolower($link_id); + $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); + + if (isset($this->urls[$link_id])) { + $url = $this->urls[$link_id]; + $url = $this->encodeURLAttribute($url); + + $result = "titles[$link_id] ) ) { + $title = $this->titles[$link_id]; + $title = $this->encodeAttribute($title); + $result .= " title=\"$title\""; + } + + $link_text = $this->runSpanGamut($link_text); + $result .= ">$link_text"; + $result = $this->hashPart($result); + } else { + $result = $whole_match; + } + return $result; + } + + /** + * Callback method to parse inline anchors + * @param array $matches + * @return string + */ + protected function _doAnchors_inline_callback($matches) { + $link_text = $this->runSpanGamut($matches[2]); + $url = $matches[3] === '' ? $matches[4] : $matches[3]; + $title =& $matches[7]; + + // If the URL was of the form it got caught by the HTML + // tag parser and hashed. Need to reverse the process before using + // the URL. + $unhashed = $this->unhash($url); + if ($unhashed !== $url) + $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); + + $url = $this->encodeURLAttribute($url); + + $result = "encodeAttribute($title); + $result .= " title=\"$title\""; + } + + $link_text = $this->runSpanGamut($link_text); + $result .= ">$link_text"; + + return $this->hashPart($result); + } + + /** + * Turn Markdown image shortcuts into tags. + * @param string $text + * @return string + */ + protected function doImages($text) { + // First, handle reference-style labeled images: ![alt text][id] + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + !\[ + ('.$this->nested_brackets_re.') # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }xs', + array($this, '_doImages_reference_callback'), $text); + + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + !\[ + ('.$this->nested_brackets_re.') # alt text = $2 + \] + \s? # One optional whitespace character + \( # literal paren + [ \n]* + (?: + <(\S*)> # src url = $3 + | + ('.$this->nested_url_parenthesis_re.') # src url = $4 + ) + [ \n]* + ( # $5 + ([\'"]) # quote char = $6 + (.*?) # title = $7 + \6 # matching quote + [ \n]* + )? # title is optional + \) + ) + }xs', + array($this, '_doImages_inline_callback'), $text); + + return $text; + } + + /** + * Callback to parse references image tags + * @param array $matches + * @return string + */ + protected function _doImages_reference_callback($matches) { + $whole_match = $matches[1]; + $alt_text = $matches[2]; + $link_id = strtolower($matches[3]); + + if ($link_id == "") { + $link_id = strtolower($alt_text); // for shortcut links like ![this][]. + } + + $alt_text = $this->encodeAttribute($alt_text); + if (isset($this->urls[$link_id])) { + $url = $this->encodeURLAttribute($this->urls[$link_id]); + $result = "\"$alt_text\"";titles[$link_id])) { + $title = $this->titles[$link_id]; + $title = $this->encodeAttribute($title); + $result .= " title=\"$title\""; + } + $result .= $this->empty_element_suffix; + $result = $this->hashPart($result); + } else { + // If there's no such link ID, leave intact: + $result = $whole_match; + } + + return $result; + } + + /** + * Callback to parse inline image tags + * @param array $matches + * @return string + */ + protected function _doImages_inline_callback($matches) { + $whole_match = $matches[1]; + $alt_text = $matches[2]; + $url = $matches[3] == '' ? $matches[4] : $matches[3]; + $title =& $matches[7]; + + $alt_text = $this->encodeAttribute($alt_text); + $url = $this->encodeURLAttribute($url); + $result = "\"$alt_text\"";encodeAttribute($title); + $result .= " title=\"$title\""; // $title already quoted + } + $result .= $this->empty_element_suffix; + + return $this->hashPart($result); + } + + /** + * Parse Markdown heading elements to HTML + * @param string $text + * @return string + */ + protected function doHeaders($text) { + /** + * Setext-style headers: + * Header 1 + * ======== + * + * Header 2 + * -------- + */ + $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', + array($this, '_doHeaders_callback_setext'), $text); + + /** + * atx-style headers: + * # Header 1 + * ## Header 2 + * ## Header 2 with closing hashes ## + * ... + * ###### Header 6 + */ + $text = preg_replace_callback('{ + ^(\#{1,6}) # $1 = string of #\'s + [ ]* + (.+?) # $2 = Header text + [ ]* + \#* # optional closing #\'s (not counted) + \n+ + }xm', + array($this, '_doHeaders_callback_atx'), $text); + + return $text; + } + + /** + * Setext header parsing callback + * @param array $matches + * @return string + */ + protected function _doHeaders_callback_setext($matches) { + // Terrible hack to check we haven't found an empty list item. + if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) { + return $matches[0]; + } + + $level = $matches[2][0] == '=' ? 1 : 2; + + // ID attribute generation + $idAtt = $this->_generateIdFromHeaderValue($matches[1]); + + $block = "".$this->runSpanGamut($matches[1]).""; + return "\n" . $this->hashBlock($block) . "\n\n"; + } + + /** + * ATX header parsing callback + * @param array $matches + * @return string + */ + protected function _doHeaders_callback_atx($matches) { + // ID attribute generation + $idAtt = $this->_generateIdFromHeaderValue($matches[2]); + + $level = strlen($matches[1]); + $block = "".$this->runSpanGamut($matches[2]).""; + return "\n" . $this->hashBlock($block) . "\n\n"; + } + + /** + * If a header_id_func property is set, we can use it to automatically + * generate an id attribute. + * + * This method returns a string in the form id="foo", or an empty string + * otherwise. + * @param string $headerValue + * @return string + */ + protected function _generateIdFromHeaderValue($headerValue) { + if (!is_callable($this->header_id_func)) { + return ""; + } + + $idValue = call_user_func($this->header_id_func, $headerValue); + if (!$idValue) { + return ""; + } + + return ' id="' . $this->encodeAttribute($idValue) . '"'; + } + + /** + * Form HTML ordered (numbered) and unordered (bulleted) lists. + * @param string $text + * @return string + */ + protected function doLists($text) { + $less_than_tab = $this->tab_width - 1; + + // Re-usable patterns to match list item bullets and number markers: + $marker_ul_re = '[*+-]'; + $marker_ol_re = '\d+[\.]'; + + $markers_relist = array( + $marker_ul_re => $marker_ol_re, + $marker_ol_re => $marker_ul_re, + ); + + foreach ($markers_relist as $marker_re => $other_marker_re) { + // Re-usable pattern to match any entirel ul or ol list: + $whole_list_re = ' + ( # $1 = whole list + ( # $2 + ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces + ('.$marker_re.') # $4 = first list item marker + [ ]+ + ) + (?s:.+?) + ( # $5 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ ]* + '.$marker_re.'[ ]+ + ) + | + (?= # Lookahead for another kind of list + \n + \3 # Must have the same indentation + '.$other_marker_re.'[ ]+ + ) + ) + ) + '; // mx + + // We use a different prefix before nested lists than top-level lists. + //See extended comment in _ProcessListItems(). + + if ($this->list_level) { + $text = preg_replace_callback('{ + ^ + '.$whole_list_re.' + }mx', + array($this, '_doLists_callback'), $text); + } else { + $text = preg_replace_callback('{ + (?:(?<=\n)\n|\A\n?) # Must eat the newline + '.$whole_list_re.' + }mx', + array($this, '_doLists_callback'), $text); + } + } + + return $text; + } + + /** + * List parsing callback + * @param array $matches + * @return string + */ + protected function _doLists_callback($matches) { + // Re-usable patterns to match list item bullets and number markers: + $marker_ul_re = '[*+-]'; + $marker_ol_re = '\d+[\.]'; + $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; + $marker_ol_start_re = '[0-9]+'; + + $list = $matches[1]; + $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; + + $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); + + $list .= "\n"; + $result = $this->processListItems($list, $marker_any_re); + + $ol_start = 1; + if ($this->enhanced_ordered_list) { + // Get the start number for ordered list. + if ($list_type == 'ol') { + $ol_start_array = array(); + $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array); + if ($ol_start_check){ + $ol_start = $ol_start_array[0]; + } + } + } + + if ($ol_start > 1 && $list_type == 'ol'){ + $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . ""); + } else { + $result = $this->hashBlock("<$list_type>\n" . $result . ""); + } + return "\n". $result ."\n\n"; + } + + /** + * Nesting tracker for list levels + */ + protected $list_level = 0; + + /** + * Process the contents of a single ordered or unordered list, splitting it + * into individual list items. + * @param string $list_str + * @param string $marker_any_re + * @return string + */ + protected function processListItems($list_str, $marker_any_re) { + /** + * The $this->list_level global keeps track of when we're inside a list. + * Each time we enter a list, we increment it; when we leave a list, + * we decrement. If it's zero, we're not in a list anymore. + * + * We do this because when we're not inside a list, we want to treat + * something like this: + * + * I recommend upgrading to version + * 8. Oops, now this line is treated + * as a sub-list. + * + * As a single paragraph, despite the fact that the second line starts + * with a digit-period-space sequence. + * + * Whereas when we're inside a list (or sub-list), that line will be + * treated as the start of a sub-list. What a kludge, huh? This is + * an aspect of Markdown's syntax that's hard to parse perfectly + * without resorting to mind-reading. Perhaps the solution is to + * change the syntax rules such that sub-lists must start with a + * starting cardinal number; e.g. "1." or "a.". + */ + $this->list_level++; + + // Trim trailing blank lines: + $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); + + $list_str = preg_replace_callback('{ + (\n)? # leading line = $1 + (^[ ]*) # leading whitespace = $2 + ('.$marker_any_re.' # list marker and space = $3 + (?:[ ]+|(?=\n)) # space only required if item is not empty + ) + ((?s:.*?)) # list item text = $4 + (?:(\n+(?=\n))|\n) # tailing blank line = $5 + (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) + }xm', + array($this, '_processListItems_callback'), $list_str); + + $this->list_level--; + return $list_str; + } + + /** + * List item parsing callback + * @param array $matches + * @return string + */ + protected function _processListItems_callback($matches) { + $item = $matches[4]; + $leading_line =& $matches[1]; + $leading_space =& $matches[2]; + $marker_space = $matches[3]; + $tailing_blank_line =& $matches[5]; + + if ($leading_line || $tailing_blank_line || + preg_match('/\n{2,}/', $item)) + { + // Replace marker with the appropriate whitespace indentation + $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; + $item = $this->runBlockGamut($this->outdent($item)."\n"); + } else { + // Recursion for sub-lists: + $item = $this->doLists($this->outdent($item)); + $item = $this->formParagraphs($item, false); + } + + return "
  • " . $item . "
  • \n"; + } + + /** + * Process Markdown `
    ` blocks.
    +	 * @param  string $text
    +	 * @return string
    +	 */
    +	protected function doCodeBlocks($text) {
    +		$text = preg_replace_callback('{
    +				(?:\n\n|\A\n?)
    +				(	            # $1 = the code block -- one or more lines, starting with a space/tab
    +				  (?>
    +					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
    +					.*\n+
    +				  )+
    +				)
    +				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +			}xm',
    +			array($this, '_doCodeBlocks_callback'), $text);
    +
    +		return $text;
    +	}
    +
    +	/**
    +	 * Code block parsing callback
    +	 * @param  array $matches
    +	 * @return string
    +	 */
    +	protected function _doCodeBlocks_callback($matches) {
    +		$codeblock = $matches[1];
    +
    +		$codeblock = $this->outdent($codeblock);
    +		if (is_callable($this->code_block_content_func)) {
    +			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
    +		} else {
    +			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
    +		}
    +
    +		# trim leading newlines and trailing newlines
    +		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
    +
    +		$codeblock = "
    $codeblock\n
    "; + return "\n\n" . $this->hashBlock($codeblock) . "\n\n"; + } + + /** + * Create a code span markup for $code. Called from handleSpanToken. + * @param string $code + * @return string + */ + protected function makeCodeSpan($code) { + if (is_callable($this->code_span_content_func)) { + $code = call_user_func($this->code_span_content_func, $code); + } else { + $code = htmlspecialchars(trim($code), ENT_NOQUOTES); + } + return $this->hashPart("$code"); + } + + /** + * Define the emphasis operators with their regex matches + * @var array + */ + protected $em_relist = array( + '' => '(?:(? '(? '(? '(?:(? '(? '(? '(?:(? '(? '(?em_relist as $em => $em_re) { + foreach ($this->strong_relist as $strong => $strong_re) { + // Construct list of allowed token expressions. + $token_relist = array(); + if (isset($this->em_strong_relist["$em$strong"])) { + $token_relist[] = $this->em_strong_relist["$em$strong"]; + } + $token_relist[] = $em_re; + $token_relist[] = $strong_re; + + // Construct master expression from list. + $token_re = '{(' . implode('|', $token_relist) . ')}'; + $this->em_strong_prepared_relist["$em$strong"] = $token_re; + } + } + } + + /** + * Convert Markdown italics (emphasis) and bold (strong) to HTML + * @param string $text + * @return string + */ + protected function doItalicsAndBold($text) { + if ($this->in_emphasis_processing) { + return $text; // avoid reentrency + } + $this->in_emphasis_processing = true; + + $token_stack = array(''); + $text_stack = array(''); + $em = ''; + $strong = ''; + $tree_char_em = false; + + while (1) { + // Get prepared regular expression for seraching emphasis tokens + // in current context. + $token_re = $this->em_strong_prepared_relist["$em$strong"]; + + // Each loop iteration search for the next emphasis token. + // Each token is then passed to handleSpanToken. + $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); + $text_stack[0] .= $parts[0]; + $token =& $parts[1]; + $text =& $parts[2]; + + if (empty($token)) { + // Reached end of text span: empty stack without emitting. + // any more emphasis. + while ($token_stack[0]) { + $text_stack[1] .= array_shift($token_stack); + $text_stack[0] .= array_shift($text_stack); + } + break; + } + + $token_len = strlen($token); + if ($tree_char_em) { + // Reached closing marker while inside a three-char emphasis. + if ($token_len == 3) { + // Three-char closing marker, close em and strong. + array_shift($token_stack); + $span = array_shift($text_stack); + $span = $this->runSpanGamut($span); + $span = "$span"; + $text_stack[0] .= $this->hashPart($span); + $em = ''; + $strong = ''; + } else { + // Other closing marker: close one em or strong and + // change current token state to match the other + $token_stack[0] = str_repeat($token[0], 3-$token_len); + $tag = $token_len == 2 ? "strong" : "em"; + $span = $text_stack[0]; + $span = $this->runSpanGamut($span); + $span = "<$tag>$span"; + $text_stack[0] = $this->hashPart($span); + $$tag = ''; // $$tag stands for $em or $strong + } + $tree_char_em = false; + } else if ($token_len == 3) { + if ($em) { + // Reached closing marker for both em and strong. + // Closing strong marker: + for ($i = 0; $i < 2; ++$i) { + $shifted_token = array_shift($token_stack); + $tag = strlen($shifted_token) == 2 ? "strong" : "em"; + $span = array_shift($text_stack); + $span = $this->runSpanGamut($span); + $span = "<$tag>$span"; + $text_stack[0] .= $this->hashPart($span); + $$tag = ''; // $$tag stands for $em or $strong + } + } else { + // Reached opening three-char emphasis marker. Push on token + // stack; will be handled by the special condition above. + $em = $token[0]; + $strong = "$em$em"; + array_unshift($token_stack, $token); + array_unshift($text_stack, ''); + $tree_char_em = true; + } + } else if ($token_len == 2) { + if ($strong) { + // Unwind any dangling emphasis marker: + if (strlen($token_stack[0]) == 1) { + $text_stack[1] .= array_shift($token_stack); + $text_stack[0] .= array_shift($text_stack); + $em = ''; + } + // Closing strong marker: + array_shift($token_stack); + $span = array_shift($text_stack); + $span = $this->runSpanGamut($span); + $span = "$span"; + $text_stack[0] .= $this->hashPart($span); + $strong = ''; + } else { + array_unshift($token_stack, $token); + array_unshift($text_stack, ''); + $strong = $token; + } + } else { + // Here $token_len == 1 + if ($em) { + if (strlen($token_stack[0]) == 1) { + // Closing emphasis marker: + array_shift($token_stack); + $span = array_shift($text_stack); + $span = $this->runSpanGamut($span); + $span = "$span"; + $text_stack[0] .= $this->hashPart($span); + $em = ''; + } else { + $text_stack[0] .= $token; + } + } else { + array_unshift($token_stack, $token); + array_unshift($text_stack, ''); + $em = $token; + } + } + } + $this->in_emphasis_processing = false; + return $text_stack[0]; + } + + /** + * Parse Markdown blockquotes to HTML + * @param string $text + * @return string + */ + protected function doBlockQuotes($text) { + $text = preg_replace_callback('/ + ( # Wrap whole match in $1 + (?> + ^[ ]*>[ ]? # ">" at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + /xm', + array($this, '_doBlockQuotes_callback'), $text); + + return $text; + } + + /** + * Blockquote parsing callback + * @param array $matches + * @return string + */ + protected function _doBlockQuotes_callback($matches) { + $bq = $matches[1]; + // trim one level of quoting - trim whitespace-only lines + $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); + $bq = $this->runBlockGamut($bq); // recurse + + $bq = preg_replace('/^/m', " ", $bq); + // These leading spaces cause problem with
     content,
    +		// so we need to fix that:
    +		$bq = preg_replace_callback('{(\s*
    .+?
    )}sx', + array($this, '_doBlockQuotes_callback2'), $bq); + + return "\n" . $this->hashBlock("
    \n$bq\n
    ") . "\n\n"; + } + + /** + * Blockquote parsing callback + * @param array $matches + * @return string + */ + protected function _doBlockQuotes_callback2($matches) { + $pre = $matches[1]; + $pre = preg_replace('/^ /m', '', $pre); + return $pre; + } + + /** + * Parse paragraphs + * + * @param string $text String to process in paragraphs + * @param boolean $wrap_in_p Whether paragraphs should be wrapped in

    tags + * @return string + */ + protected function formParagraphs($text, $wrap_in_p = true) { + // Strip leading and trailing lines: + $text = preg_replace('/\A\n+|\n+\z/', '', $text); + + $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); + + // Wrap

    tags and unhashify HTML blocks + foreach ($grafs as $key => $value) { + if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { + // Is a paragraph. + $value = $this->runSpanGamut($value); + if ($wrap_in_p) { + $value = preg_replace('/^([ ]*)/', "

    ", $value); + $value .= "

    "; + } + $grafs[$key] = $this->unhash($value); + } else { + // Is a block. + // Modify elements of @grafs in-place... + $graf = $value; + $block = $this->html_hashes[$graf]; + $graf = $block; +// if (preg_match('{ +// \A +// ( # $1 =
    tag +//
    ]* +// \b +// markdown\s*=\s* ([\'"]) # $2 = attr quote char +// 1 +// \2 +// [^>]* +// > +// ) +// ( # $3 = contents +// .* +// ) +// (
    ) # $4 = closing tag +// \z +// }xs', $block, $matches)) +// { +// list(, $div_open, , $div_content, $div_close) = $matches; +// +// // We can't call Markdown(), because that resets the hash; +// // that initialization code should be pulled into its own sub, though. +// $div_content = $this->hashHTMLBlocks($div_content); +// +// // Run document gamut methods on the content. +// foreach ($this->document_gamut as $method => $priority) { +// $div_content = $this->$method($div_content); +// } +// +// $div_open = preg_replace( +// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); +// +// $graf = $div_open . "\n" . $div_content . "\n" . $div_close; +// } + $grafs[$key] = $graf; + } + } + + return implode("\n\n", $grafs); + } + + /** + * Encode text for a double-quoted HTML attribute. This function + * is *not* suitable for attributes enclosed in single quotes. + * @param string $text + * @return string + */ + protected function encodeAttribute($text) { + $text = $this->encodeAmpsAndAngles($text); + $text = str_replace('"', '"', $text); + return $text; + } + + /** + * Encode text for a double-quoted HTML attribute containing a URL, + * applying the URL filter if set. Also generates the textual + * representation for the URL (removing mailto: or tel:) storing it in $text. + * This function is *not* suitable for attributes enclosed in single quotes. + * + * @param string $url + * @param string $text Passed by reference + * @return string URL + */ + protected function encodeURLAttribute($url, &$text = null) { + if (is_callable($this->url_filter_func)) { + $url = call_user_func($this->url_filter_func, $url); + } + + if (preg_match('{^mailto:}i', $url)) { + $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); + } else if (preg_match('{^tel:}i', $url)) { + $url = $this->encodeAttribute($url); + $text = substr($url, 4); + } else { + $url = $this->encodeAttribute($url); + $text = $url; + } + + return $url; + } + + /** + * Smart processing for ampersands and angle brackets that need to + * be encoded. Valid character entities are left alone unless the + * no-entities mode is set. + * @param string $text + * @return string + */ + protected function encodeAmpsAndAngles($text) { + if ($this->no_entities) { + $text = str_replace('&', '&', $text); + } else { + // Ampersand-encoding based entirely on Nat Irons's Amputator + // MT plugin: + $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', + '&', $text); + } + // Encode remaining <'s + $text = str_replace('<', '<', $text); + + return $text; + } + + /** + * Parse Markdown automatic links to anchor HTML tags + * @param string $text + * @return string + */ + protected function doAutoLinks($text) { + $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', + array($this, '_doAutoLinks_url_callback'), $text); + + // Email addresses: + $text = preg_replace_callback('{ + < + (?:mailto:)? + ( + (?: + [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ + | + ".*?" + ) + \@ + (?: + [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ + | + \[[\d.a-fA-F:]+\] # IPv4 & IPv6 + ) + ) + > + }xi', + array($this, '_doAutoLinks_email_callback'), $text); + + return $text; + } + + /** + * Parse URL callback + * @param array $matches + * @return string + */ + protected function _doAutoLinks_url_callback($matches) { + $url = $this->encodeURLAttribute($matches[1], $text); + $link = "$text"; + return $this->hashPart($link); + } + + /** + * Parse email address callback + * @param array $matches + * @return string + */ + protected function _doAutoLinks_email_callback($matches) { + $addr = $matches[1]; + $url = $this->encodeURLAttribute("mailto:$addr", $text); + $link = "$text"; + return $this->hashPart($link); + } + + /** + * Input: some text to obfuscate, e.g. "mailto:foo@example.com" + * + * Output: the same text but with most characters encoded as either a + * decimal or hex entity, in the hopes of foiling most address + * harvesting spam bots. E.g.: + * + * mailto:foo + * @example.co + * m + * + * Note: the additional output $tail is assigned the same value as the + * ouput, minus the number of characters specified by $head_length. + * + * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. + * With some optimizations by Milian Wolff. Forced encoding of HTML + * attribute special characters by Allan Odgaard. + * + * @param string $text + * @param string $tail Passed by reference + * @param integer $head_length + * @return string + */ + protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { + if ($text == "") { + return $tail = ""; + } + + $chars = preg_split('/(? $char) { + $ord = ord($char); + // Ignore non-ascii chars. + if ($ord < 128) { + $r = ($seed * (1 + $key)) % 100; // Pseudo-random function. + // roughly 10% raw, 45% hex, 45% dec + // '@' *must* be encoded. I insist. + // '"' and '>' have to be encoded inside the attribute + if ($r > 90 && strpos('@"&>', $char) === false) { + /* do nothing */ + } else if ($r < 45) { + $chars[$key] = '&#x'.dechex($ord).';'; + } else { + $chars[$key] = '&#'.$ord.';'; + } + } + } + + $text = implode('', $chars); + $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; + + return $text; + } + + /** + * Take the string $str and parse it into tokens, hashing embeded HTML, + * escaped characters and handling code spans. + * @param string $str + * @return string + */ + protected function parseSpan($str) { + $output = ''; + + $span_re = '{ + ( + \\\\'.$this->escape_chars_re.' + | + (?no_markup ? '' : ' + | + # comment + | + <\?.*?\?> | <%.*?%> # processing instruction + | + <[!$]?[-a-zA-Z0-9:_]+ # regular tags + (?> + \s + (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* + )? + > + | + <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag + | + # closing tag + ').' + ) + }xs'; + + while (1) { + // Each loop iteration seach for either the next tag, the next + // openning code span marker, or the next escaped character. + // Each token is then passed to handleSpanToken. + $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); + + // Create token from text preceding tag. + if ($parts[0] != "") { + $output .= $parts[0]; + } + + // Check if we reach the end. + if (isset($parts[1])) { + $output .= $this->handleSpanToken($parts[1], $parts[2]); + $str = $parts[2]; + } else { + break; + } + } + + return $output; + } + + /** + * Handle $token provided by parseSpan by determining its nature and + * returning the corresponding value that should replace it. + * @param string $token + * @param string $str Passed by reference + * @return string + */ + protected function handleSpanToken($token, &$str) { + switch ($token[0]) { + case "\\": + return $this->hashPart("&#". ord($token[1]). ";"); + case "`": + // Search for end marker in remaining text. + if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', + $str, $matches)) + { + $str = $matches[2]; + $codespan = $this->makeCodeSpan($matches[1]); + return $this->hashPart($codespan); + } + return $token; // Return as text since no ending marker found. + default: + return $this->hashPart($token); + } + } + + /** + * Remove one level of line-leading tabs or spaces + * @param string $text + * @return string + */ + protected function outdent($text) { + return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text); + } + + + /** + * String length function for detab. `_initDetab` will create a function to + * handle UTF-8 if the default function does not exist. + * can be a string or function + */ + protected $utf8_strlen = 'mb_strlen'; + + /** + * Replace tabs with the appropriate amount of spaces. + * + * For each line we separate the line in blocks delemited by tab characters. + * Then we reconstruct every line by adding the appropriate number of space + * between each blocks. + * + * @param string $text + * @return string + */ + protected function detab($text) { + $text = preg_replace_callback('/^.*\t.*$/m', + array($this, '_detab_callback'), $text); + + return $text; + } + + /** + * Replace tabs callback + * @param string $matches + * @return string + */ + protected function _detab_callback($matches) { + $line = $matches[0]; + $strlen = $this->utf8_strlen; // strlen function for UTF-8. + + // Split in blocks. + $blocks = explode("\t", $line); + // Add each blocks to the line. + $line = $blocks[0]; + unset($blocks[0]); // Do not add first block twice. + foreach ($blocks as $block) { + // Calculate amount of space, insert spaces, insert block. + $amount = $this->tab_width - + $strlen($line, 'UTF-8') % $this->tab_width; + $line .= str_repeat(" ", $amount) . $block; + } + return $line; + } + + /** + * Check for the availability of the function in the `utf8_strlen` property + * (initially `mb_strlen`). If the function is not available, create a + * function that will loosely count the number of UTF-8 characters with a + * regular expression. + * @return void + */ + protected function _initDetab() { + + if (function_exists($this->utf8_strlen)) { + return; + } + + $this->utf8_strlen = function ($text) { return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m); }; + } + + /** + * Swap back in all the tags hashed by _HashHTMLBlocks. + * @param string $text + * @return string + */ + protected function unhash($text) { + return preg_replace_callback('/(.)\x1A[0-9]+\1/', + array($this, '_unhash_callback'), $text); + } + + /** + * Unhashing callback + * @param array $matches + * @return string + */ + protected function _unhash_callback($matches) { + return $this->html_hashes[$matches[0]]; + } +} diff --git a/mdlibs/MarkdownExtra.inc.php b/mdlibs/MarkdownExtra.inc.php new file mode 100644 index 0000000..d09bd7a --- /dev/null +++ b/mdlibs/MarkdownExtra.inc.php @@ -0,0 +1,11 @@ + + * @copyright 2004-2019 Michel Fortin + * @copyright (Original Markdown) 2004-2006 John Gruber + */ + +namespace mdlibs; + +/** + * Markdown Extra Parser Class + */ +class MarkdownExtra extends \mdlibs\Markdown { + /** + * Configuration variables + */ + /** + * Prefix for footnote ids. + */ + public $fn_id_prefix = ""; + + /** + * Optional title attribute for footnote links. + */ + public $fn_link_title = ""; + + /** + * Optional class attribute for footnote links and backlinks. + */ + public $fn_link_class = "footnote-ref"; + public $fn_backlink_class = "footnote-backref"; + + /** + * Content to be displayed within footnote backlinks. The default is '↩'; + * the U+FE0E on the end is a Unicode variant selector used to prevent iOS + * from displaying the arrow character as an emoji. + * Optionally use '^^' and '%%' to refer to the footnote number and + * reference number respectively. {@see parseFootnotePlaceholders()} + */ + public $fn_backlink_html = '↩︎'; + + /** + * Optional title and aria-label attributes for footnote backlinks for + * added accessibility (to ensure backlink uniqueness). + * Use '^^' and '%%' to refer to the footnote number and reference number + * respectively. {@see parseFootnotePlaceholders()} + */ + public $fn_backlink_title = ""; + public $fn_backlink_label = ""; + + /** + * Class name for table cell alignment (%% replaced left/center/right) + * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' + * If empty, the align attribute is used instead of a class name. + */ + public $table_align_class_tmpl = ''; + + /** + * Optional class prefix for fenced code block. + */ + public $code_class_prefix = ""; + + /** + * Class attribute for code blocks goes on the `code` tag; + * setting this to true will put attributes on the `pre` tag instead. + */ + public $code_attr_on_pre = false; + + /** + * Predefined abbreviations. + */ + public $predef_abbr = array(); + + /** + * Only convert atx-style headers if there's a space between the header and # + */ + public $hashtag_protection = false; + + /** + * Determines whether footnotes should be appended to the end of the document. + * If true, footnote html can be retrieved from $this->footnotes_assembled. + */ + public $omit_footnotes = false; + + + /** + * After parsing, the HTML for the list of footnotes appears here. + * This is available only if $omit_footnotes == true. + * + * Note: when placing the content of `footnotes_assembled` on the page, + * consider adding the attribute `role="doc-endnotes"` to the `div` or + * `section` that will enclose the list of footnotes so they are + * reachable to accessibility tools the same way they would be with the + * default HTML output. + */ + public $footnotes_assembled = null; + + /** + * Parser implementation + */ + + /** + * Constructor function. Initialize the parser object. + * @return void + */ + public function __construct() { + // Add extra escapable characters before parent constructor + // initialize the table. + $this->escape_chars .= ':|'; + + // Insert extra document, block, and span transformations. + // Parent constructor will do the sorting. + $this->document_gamut += array( + "doFencedCodeBlocks" => 5, + "stripFootnotes" => 15, + "stripAbbreviations" => 25, + "appendFootnotes" => 50, + ); + $this->block_gamut += array( + "doFencedCodeBlocks" => 5, + "doTables" => 15, + "doDefLists" => 45, + ); + $this->span_gamut += array( + "doFootnotes" => 5, + "doAbbreviations" => 70, + ); + + $this->enhanced_ordered_list = true; + parent::__construct(); + } + + + /** + * Extra variables used during extra transformations. + */ + protected $footnotes = array(); + protected $footnotes_ordered = array(); + protected $footnotes_ref_count = array(); + protected $footnotes_numbers = array(); + protected $abbr_desciptions = array(); + protected $abbr_word_re = ''; + + /** + * Give the current footnote number. + */ + protected $footnote_counter = 1; + + /** + * Ref attribute for links + */ + protected $ref_attr = array(); + + /** + * Setting up Extra-specific variables. + */ + protected function setup() { + parent::setup(); + + $this->footnotes = array(); + $this->footnotes_ordered = array(); + $this->footnotes_ref_count = array(); + $this->footnotes_numbers = array(); + $this->abbr_desciptions = array(); + $this->abbr_word_re = ''; + $this->footnote_counter = 1; + $this->footnotes_assembled = null; + + foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { + if ($this->abbr_word_re) + $this->abbr_word_re .= '|'; + $this->abbr_word_re .= preg_quote($abbr_word); + $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); + } + } + + /** + * Clearing Extra-specific variables. + */ + protected function teardown() { + $this->footnotes = array(); + $this->footnotes_ordered = array(); + $this->footnotes_ref_count = array(); + $this->footnotes_numbers = array(); + $this->abbr_desciptions = array(); + $this->abbr_word_re = ''; + + if ( ! $this->omit_footnotes ) + $this->footnotes_assembled = null; + + parent::teardown(); + } + + + /** + * Extra attribute parser + */ + /** + * Expression to use to catch attributes (includes the braces) + */ + protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; + + /** + * Expression to use when parsing in a context when no capture is desired + */ + protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; + + /** + * Parse attributes caught by the $this->id_class_attr_catch_re expression + * and return the HTML-formatted list of attributes. + * + * Currently supported attributes are .class and #id. + * + * In addition, this method also supports supplying a default Id value, + * which will be used to populate the id attribute in case it was not + * overridden. + * @param string $tag_name + * @param string $attr + * @param mixed $defaultIdValue + * @param array $classes + * @return string + */ + protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { + if (empty($attr) && !$defaultIdValue && empty($classes)) { + return ""; + } + + // Split on components + preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); + $elements = $matches[0]; + + // Handle classes and IDs (only first ID taken into account) + $attributes = array(); + $id = false; + foreach ($elements as $element) { + if ($element[0] === '.') { + $classes[] = substr($element, 1); + } else if ($element[0] === '#') { + if ($id === false) $id = substr($element, 1); + } else if (strpos($element, '=') > 0) { + $parts = explode('=', $element, 2); + $attributes[] = $parts[0] . '="' . $parts[1] . '"'; + } + } + + if ($id === false || $id === '') { + $id = $defaultIdValue; + } + + // Compose attributes as string + $attr_str = ""; + if (!empty($id)) { + $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; + } + if (!empty($classes)) { + $attr_str .= ' class="'. implode(" ", $classes) . '"'; + } + if (!$this->no_markup && !empty($attributes)) { + $attr_str .= ' '.implode(" ", $attributes); + } + return $attr_str; + } + + /** + * Strips link definitions from text, stores the URLs and titles in + * hash references. + * @param string $text + * @return string + */ + protected function stripLinkDefinitions($text) { + $less_than_tab = $this->tab_width - 1; + + // Link defs are in the form: ^[id]: url "optional title" + $text = preg_replace_callback('{ + ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + (?: + <(.+?)> # url = $2 + | + (\S+?) # url = $3 + ) + [ ]* + \n? # maybe one newline + [ ]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.*?) # title = $4 + [")] + [ ]* + )? # title is optional + (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr + (?:\n+|\Z) + }xm', + array($this, '_stripLinkDefinitions_callback'), + $text); + return $text; + } + + /** + * Strip link definition callback + * @param array $matches + * @return string + */ + protected function _stripLinkDefinitions_callback($matches) { + $link_id = strtolower($matches[1]); + $url = $matches[2] == '' ? $matches[3] : $matches[2]; + $this->urls[$link_id] = $url; + $this->titles[$link_id] =& $matches[4]; + $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); + return ''; // String that will replace the block + } + + + /** + * HTML block parser + */ + /** + * Tags that are always treated as block tags + */ + protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure|details|summary'; + + /** + * Tags treated as block tags only if the opening tag is alone on its line + */ + protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; + + /** + * Tags where markdown="1" default to span mode: + */ + protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; + + /** + * Tags which must not have their contents modified, no matter where + * they appear + */ + protected $clean_tags_re = 'script|style|math|svg'; + + /** + * Tags that do not need to be closed. + */ + protected $auto_close_tags_re = 'hr|img|param|source|track'; + + /** + * Hashify HTML Blocks and "clean tags". + * + * We only want to do this for block-level HTML tags, such as headers, + * lists, and tables. That's because we still want to wrap

    s around + * "paragraphs" that are wrapped in non-block-level tags, such as anchors, + * phrase emphasis, and spans. The list of tags we're looking for is + * hard-coded. + * + * This works by calling _HashHTMLBlocks_InMarkdown, which then calls + * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" + * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back + * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. + * These two functions are calling each other. It's recursive! + * @param string $text + * @return string + */ + protected function hashHTMLBlocks($text) { + if ($this->no_markup) { + return $text; + } + + // Call the HTML-in-Markdown hasher. + list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); + + return $text; + } + + /** + * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. + * + * * $indent is the number of space to be ignored when checking for code + * blocks. This is important because if we don't take the indent into + * account, something like this (which looks right) won't work as expected: + * + *

    + *
    + * Hello World. <-- Is this a Markdown code block or text? + *
    <-- Is this a Markdown code block or a real tag? + *
    + * + * If you don't like this, just don't indent the tag on which + * you apply the markdown="1" attribute. + * + * * If $enclosing_tag_re is not empty, stops at the first unmatched closing + * tag with that name. Nested tags supported. + * + * * If $span is true, text inside must treated as span. So any double + * newline will be replaced by a single newline so that it does not create + * paragraphs. + * + * Returns an array of that form: ( processed text , remaining text ) + * + * @param string $text + * @param integer $indent + * @param string $enclosing_tag_re + * @param boolean $span + * @return array + */ + protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, + $enclosing_tag_re = '', $span = false) + { + + if ($text === '') return array('', ''); + + // Regex to check for the presense of newlines around a block tag. + $newline_before_re = '/(?:^\n?|\n\n)*$/'; + $newline_after_re = + '{ + ^ # Start of text following the tag. + (?>[ ]*)? # Optional comment. + [ ]*\n # Must be followed by newline. + }xs'; + + // Regex to match any tag. + $block_tag_re = + '{ + ( # $2: Capture whole tag. + # Tag name. + ' . $this->block_tags_re . ' | + ' . $this->context_block_tags_re . ' | + ' . $this->clean_tags_re . ' | + (?!\s)'.$enclosing_tag_re . ' + ) + (?: + (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. + (?> + ".*?" | # Double quotes (can contain `>`) + \'.*?\' | # Single quotes (can contain `>`) + .+? # Anything but quotes and `>`. + )*? + )? + > # End of tag. + | + # HTML Comment + | + <\?.*?\?> | <%.*?%> # Processing instruction + | + # CData Block + ' . ( !$span ? ' # If not in span. + | + # Indented code block + (?: ^[ ]*\n | ^ | \n[ ]*\n ) + [ ]{' . ($indent + 4) . '}[^\n]* \n + (?> + (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n + )* + | + # Fenced code block marker + (?<= ^ | \n ) + [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) + [ ]* + (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name + [ ]* + (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes + [ ]* + (?= \n ) + ' : '' ) . ' # End (if not is span). + | + # Code span marker + # Note, this regex needs to go after backtick fenced + # code blocks but it should also be kept outside of the + # "if not in span" condition adding backticks to the parser + `+ + ) + }xs'; + + + $depth = 0; // Current depth inside the tag tree. + $parsed = ""; // Parsed text that will be returned. + + // Loop through every tag until we find the closing tag of the parent + // or loop until reaching the end of text if no parent tag specified. + do { + // Split the text using the first $tag_match pattern found. + // Text before pattern will be first in the array, text after + // pattern will be at the end, and between will be any catches made + // by the pattern. + $parts = preg_split($block_tag_re, $text, 2, + PREG_SPLIT_DELIM_CAPTURE); + + // If in Markdown span mode, add a empty-string span-level hash + // after each newline to prevent triggering any block element. + if ($span) { + $void = $this->hashPart("", ':'); + $newline = "\n$void"; + $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; + } + + $parsed .= $parts[0]; // Text before current tag. + + // If end of $text has been reached. Stop loop. + if (count($parts) < 3) { + $text = ""; + break; + } + + $tag = $parts[1]; // Tag to handle. + $text = $parts[2]; // Remaining text after current tag. + + // Check for: Fenced code block marker. + // Note: need to recheck the whole tag to disambiguate backtick + // fences from code spans + if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { + // Fenced code block marker: find matching end marker. + $fence_indent = strlen($capture[1]); // use captured indent in re + $fence_re = $capture[2]; // use captured fence in re + if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, + $matches)) + { + // End marker found: pass text unchanged until marker. + $parsed .= $tag . $matches[0]; + $text = substr($text, strlen($matches[0])); + } + else { + // No end marker: just skip it. + $parsed .= $tag; + } + } + // Check for: Indented code block. + else if ($tag[0] === "\n" || $tag[0] === " ") { + // Indented code block: pass it unchanged, will be handled + // later. + $parsed .= $tag; + } + // Check for: Code span marker + // Note: need to check this after backtick fenced code blocks + else if ($tag[0] === "`") { + // Find corresponding end marker. + $tag_re = preg_quote($tag); + if (preg_match('{^(?>.+?|\n(?!\n))*?(?block_tags_re . ')\b}', $tag) || + ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && + preg_match($newline_before_re, $parsed) && + preg_match($newline_after_re, $text) ) + ) + { + // Need to parse tag and following text using the HTML parser. + list($block_text, $text) = + $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); + + // Make sure it stays outside of any paragraph by adding newlines. + $parsed .= "\n\n$block_text\n\n"; + } + // Check for: Clean tag (like script, math) + // HTML Comments, processing instructions. + else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || + $tag[1] === '!' || $tag[1] === '?') + { + // Need to parse tag and following text using the HTML parser. + // (don't check for markdown attribute) + list($block_text, $text) = + $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); + + $parsed .= $block_text; + } + // Check for: Tag with same name as enclosing tag. + else if ($enclosing_tag_re !== '' && + // Same name as enclosing tag. + preg_match('{^= 0); + + return array($parsed, $text); + } + + /** + * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. + * + * * Calls $hash_method to convert any blocks. + * * Stops when the first opening tag closes. + * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. + * (it is not inside clean tags) + * + * Returns an array of that form: ( processed text , remaining text ) + * @param string $text + * @param string $hash_method + * @param bool $md_attr Handle `markdown="1"` attribute + * @return array + */ + protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { + if ($text === '') return array('', ''); + + // Regex to match `markdown` attribute inside of a tag. + $markdown_attr_re = ' + { + \s* # Eat whitespace before the `markdown` attribute + markdown + \s*=\s* + (?> + (["\']) # $1: quote delimiter + (.*?) # $2: attribute value + \1 # matching delimiter + | + ([^\s>]*) # $3: unquoted attribute value + ) + () # $4: make $3 always defined (avoid warnings) + }xs'; + + // Regex to match any tag. + $tag_re = '{ + ( # $2: Capture whole tag. + + ".*?" | # Double quotes (can contain `>`) + \'.*?\' | # Single quotes (can contain `>`) + .+? # Anything but quotes and `>`. + )*? + )? + > # End of tag. + | + # HTML Comment + | + <\?.*?\?> | <%.*?%> # Processing instruction + | + # CData Block + ) + }xs'; + + $original_text = $text; // Save original text in case of faliure. + + $depth = 0; // Current depth inside the tag tree. + $block_text = ""; // Temporary text holder for current text. + $parsed = ""; // Parsed text that will be returned. + $base_tag_name_re = ''; + + // Get the name of the starting tag. + // (This pattern makes $base_tag_name_re safe without quoting.) + if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) + $base_tag_name_re = $matches[1]; + + // Loop through every tag until we find the corresponding closing tag. + do { + // Split the text using the first $tag_match pattern found. + // Text before pattern will be first in the array, text after + // pattern will be at the end, and between will be any catches made + // by the pattern. + $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); + + if (count($parts) < 3) { + // End of $text reached with unbalenced tag(s). + // In that case, we return original text unchanged and pass the + // first character as filtered to prevent an infinite loop in the + // parent function. + return array($original_text[0], substr($original_text, 1)); + } + + $block_text .= $parts[0]; // Text before current tag. + $tag = $parts[1]; // Tag to handle. + $text = $parts[2]; // Remaining text after current tag. + + // Check for: Auto-close tag (like
    ) + // Comments and Processing Instructions. + if (preg_match('{^auto_close_tags_re . ')\b}', $tag) || + $tag[1] === '!' || $tag[1] === '?') + { + // Just add the tag to the block as if it was text. + $block_text .= $tag; + } + else { + // Increase/decrease nested tag count. Only do so if + // the tag's name match base tag's. + if (preg_match('{^contain_span_tags_re . ')\b}', $tag)); + + // Calculate indent before tag. + if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { + $strlen = $this->utf8_strlen; + $indent = $strlen($matches[1], 'UTF-8'); + } else { + $indent = 0; + } + + // End preceding block with this tag. + $block_text .= $tag; + $parsed .= $this->$hash_method($block_text); + + // Get enclosing tag name for the ParseMarkdown function. + // (This pattern makes $tag_name_re safe without quoting.) + preg_match('/^<([\w:$]*)\b/', $tag, $matches); + $tag_name_re = $matches[1]; + + // Parse the content using the HTML-in-Markdown parser. + list ($block_text, $text) + = $this->_hashHTMLBlocks_inMarkdown($text, $indent, + $tag_name_re, $span_mode); + + // Outdent markdown text. + if ($indent > 0) { + $block_text = preg_replace("/^[ ]{1,$indent}/m", "", + $block_text); + } + + // Append tag content to parsed text. + if (!$span_mode) { + $parsed .= "\n\n$block_text\n\n"; + } else { + $parsed .= (string) $block_text; + } + + // Start over with a new block. + $block_text = ""; + } + else $block_text .= $tag; + } + + } while ($depth > 0); + + // Hash last block text that wasn't processed inside the loop. + $parsed .= $this->$hash_method($block_text); + + return array($parsed, $text); + } + + /** + * Called whenever a tag must be hashed when a function inserts a "clean" tag + * in $text, it passes through this function and is automaticaly escaped, + * blocking invalid nested overlap. + * @param string $text + * @return string + */ + protected function hashClean($text) { + return $this->hashPart($text, 'C'); + } + + /** + * Turn Markdown link shortcuts into XHTML tags. + * @param string $text + * @return string + */ + protected function doAnchors($text) { + if ($this->in_anchor) { + return $text; + } + $this->in_anchor = true; + + // First, handle reference-style links: [link text] [id] + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + (' . $this->nested_brackets_re . ') # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }xs', + array($this, '_doAnchors_reference_callback'), $text); + + // Next, inline-style links: [link text](url "optional title") + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + (' . $this->nested_brackets_re . ') # link text = $2 + \] + \( # literal paren + [ \n]* + (?: + <(.+?)> # href = $3 + | + (' . $this->nested_url_parenthesis_re . ') # href = $4 + ) + [ \n]* + ( # $5 + ([\'"]) # quote char = $6 + (.*?) # Title = $7 + \6 # matching quote + [ \n]* # ignore any spaces/tabs between closing quote and ) + )? # title is optional + \) + (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes + ) + }xs', + array($this, '_doAnchors_inline_callback'), $text); + + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link text][1] + // or [link text](/foo) + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + \[ + ([^\[\]]+) # link text = $2; can\'t contain [ or ] + \] + ) + }xs', + array($this, '_doAnchors_reference_callback'), $text); + + $this->in_anchor = false; + return $text; + } + + /** + * Callback for reference anchors + * @param array $matches + * @return string + */ + protected function _doAnchors_reference_callback($matches) { + $whole_match = $matches[1]; + $link_text = $matches[2]; + $link_id =& $matches[3]; + + if ($link_id == "") { + // for shortcut links like [this][] or [this]. + $link_id = $link_text; + } + + // lower-case and turn embedded newlines into spaces + $link_id = strtolower($link_id); + $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); + + if (isset($this->urls[$link_id])) { + $url = $this->urls[$link_id]; + $url = $this->encodeURLAttribute($url); + + $result = "titles[$link_id] ) ) { + $title = $this->titles[$link_id]; + $title = $this->encodeAttribute($title); + $result .= " title=\"$title\""; + } + if (isset($this->ref_attr[$link_id])) + $result .= $this->ref_attr[$link_id]; + + $link_text = $this->runSpanGamut($link_text); + $result .= ">$link_text"; + $result = $this->hashPart($result); + } + else { + $result = $whole_match; + } + return $result; + } + + /** + * Callback for inline anchors + * @param array $matches + * @return string + */ + protected function _doAnchors_inline_callback($matches) { + $link_text = $this->runSpanGamut($matches[2]); + $url = $matches[3] === '' ? $matches[4] : $matches[3]; + $title_quote =& $matches[6]; + $title =& $matches[7]; + $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); + + // if the URL was of the form it got caught by the HTML + // tag parser and hashed. Need to reverse the process before using the URL. + $unhashed = $this->unhash($url); + if ($unhashed !== $url) + $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); + + $url = $this->encodeURLAttribute($url); + + $result = "encodeAttribute($title); + $result .= " title=\"$title\""; + } + $result .= $attr; + + $link_text = $this->runSpanGamut($link_text); + $result .= ">$link_text"; + + return $this->hashPart($result); + } + + /** + * Turn Markdown image shortcuts into tags. + * @param string $text + * @return string + */ + protected function doImages($text) { + // First, handle reference-style labeled images: ![alt text][id] + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + !\[ + (' . $this->nested_brackets_re . ') # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }xs', + array($this, '_doImages_reference_callback'), $text); + + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + $text = preg_replace_callback('{ + ( # wrap whole match in $1 + !\[ + (' . $this->nested_brackets_re . ') # alt text = $2 + \] + \s? # One optional whitespace character + \( # literal paren + [ \n]* + (?: + <(\S*)> # src url = $3 + | + (' . $this->nested_url_parenthesis_re . ') # src url = $4 + ) + [ \n]* + ( # $5 + ([\'"]) # quote char = $6 + (.*?) # title = $7 + \6 # matching quote + [ \n]* + )? # title is optional + \) + (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes + ) + }xs', + array($this, '_doImages_inline_callback'), $text); + + return $text; + } + + /** + * Callback for referenced images + * @param array $matches + * @return string + */ + protected function _doImages_reference_callback($matches) { + $whole_match = $matches[1]; + $alt_text = $matches[2]; + $link_id = strtolower($matches[3]); + + if ($link_id === "") { + $link_id = strtolower($alt_text); // for shortcut links like ![this][]. + } + + $alt_text = $this->encodeAttribute($alt_text); + if (isset($this->urls[$link_id])) { + $url = $this->encodeURLAttribute($this->urls[$link_id]); + $result = "\"$alt_text\"";titles[$link_id])) { + $title = $this->titles[$link_id]; + $title = $this->encodeAttribute($title); + $result .= " title=\"$title\""; + } + if (isset($this->ref_attr[$link_id])) { + $result .= $this->ref_attr[$link_id]; + } + $result .= $this->empty_element_suffix; + $result = $this->hashPart($result); + } + else { + // If there's no such link ID, leave intact: + $result = $whole_match; + } + + return $result; + } + + /** + * Callback for inline images + * @param array $matches + * @return string + */ + protected function _doImages_inline_callback($matches) { + $alt_text = $matches[2]; + $url = $matches[3] === '' ? $matches[4] : $matches[3]; + $title_quote =& $matches[6]; + $title =& $matches[7]; + $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); + + $alt_text = $this->encodeAttribute($alt_text); + $url = $this->encodeURLAttribute($url); + $result = "\"$alt_text\"";encodeAttribute($title); + $result .= " title=\"$title\""; // $title already quoted + } + $result .= $attr; + $result .= $this->empty_element_suffix; + + return $this->hashPart($result); + } + + /** + * Process markdown headers. Redefined to add ID and class attribute support. + * @param string $text + * @return string + */ + protected function doHeaders($text) { + // Setext-style headers: + // Header 1 {#header1} + // ======== + // + // Header 2 {#header2 .class1 .class2} + // -------- + // + $text = preg_replace_callback( + '{ + (^.+?) # $1: Header text + (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes + [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer + }mx', + array($this, '_doHeaders_callback_setext'), $text); + + // atx-style headers: + // # Header 1 {#header1} + // ## Header 2 {#header2} + // ## Header 2 with closing hashes ## {#header3.class1.class2} + // ... + // ###### Header 6 {.class2} + // + $text = preg_replace_callback('{ + ^(\#{1,6}) # $1 = string of #\'s + [ ]'.($this->hashtag_protection ? '+' : '*').' + (.+?) # $2 = Header text + [ ]* + \#* # optional closing #\'s (not counted) + (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes + [ ]* + \n+ + }xm', + array($this, '_doHeaders_callback_atx'), $text); + + return $text; + } + + /** + * Callback for setext headers + * @param array $matches + * @return string + */ + protected function _doHeaders_callback_setext($matches) { + if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) { + return $matches[0]; + } + + $level = $matches[3][0] === '=' ? 1 : 2; + + $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; + + $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); + $block = "" . $this->runSpanGamut($matches[1]) . ""; + return "\n" . $this->hashBlock($block) . "\n\n"; + } + + /** + * Callback for atx headers + * @param array $matches + * @return string + */ + protected function _doHeaders_callback_atx($matches) { + $level = strlen($matches[1]); + + $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; + $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); + $block = "" . $this->runSpanGamut($matches[2]) . ""; + return "\n" . $this->hashBlock($block) . "\n\n"; + } + + /** + * Form HTML tables. + * @param string $text + * @return string + */ + protected function doTables($text) { + $less_than_tab = $this->tab_width - 1; + // Find tables with leading pipe. + // + // | Header 1 | Header 2 + // | -------- | -------- + // | Cell 1 | Cell 2 + // | Cell 3 | Cell 4 + $text = preg_replace_callback(' + { + ^ # Start of a line + [ ]{0,' . $less_than_tab . '} # Allowed whitespace. + [|] # Optional leading pipe (present) + (.+) \n # $1: Header row (at least one pipe) + + [ ]{0,' . $less_than_tab . '} # Allowed whitespace. + [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline + + ( # $3: Cells + (?> + [ ]* # Allowed whitespace. + [|] .* \n # Row content. + )* + ) + (?=\n|\Z) # Stop at final double newline. + }xm', + array($this, '_doTable_leadingPipe_callback'), $text); + + // Find tables without leading pipe. + // + // Header 1 | Header 2 + // -------- | -------- + // Cell 1 | Cell 2 + // Cell 3 | Cell 4 + $text = preg_replace_callback(' + { + ^ # Start of a line + [ ]{0,' . $less_than_tab . '} # Allowed whitespace. + (\S.*[|].*) \n # $1: Header row (at least one pipe) + + [ ]{0,' . $less_than_tab . '} # Allowed whitespace. + ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline + + ( # $3: Cells + (?> + .* [|] .* \n # Row content + )* + ) + (?=\n|\Z) # Stop at final double newline. + }xm', + array($this, '_DoTable_callback'), $text); + + return $text; + } + + /** + * Callback for removing the leading pipe for each row + * @param array $matches + * @return string + */ + protected function _doTable_leadingPipe_callback($matches) { + $head = $matches[1]; + $underline = $matches[2]; + $content = $matches[3]; + + $content = preg_replace('/^ *[|]/m', '', $content); + + return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); + } + + /** + * Make the align attribute in a table + * @param string $alignname + * @return string + */ + protected function _doTable_makeAlignAttr($alignname) { + if (empty($this->table_align_class_tmpl)) { + return " align=\"$alignname\""; + } + + $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); + return " class=\"$classname\""; + } + + /** + * Calback for processing tables + * @param array $matches + * @return string + */ + protected function _doTable_callback($matches) { + $head = $matches[1]; + $underline = $matches[2]; + $content = $matches[3]; + $attr = []; + + // Remove any tailing pipes for each line. + $head = preg_replace('/[|] *$/m', '', $head); + $underline = preg_replace('/[|] *$/m', '', $underline); + $content = preg_replace('/[|] *$/m', '', $content); + + // Reading alignement from header underline. + $separators = preg_split('/ *[|] */', $underline); + foreach ($separators as $n => $s) { + if (preg_match('/^ *-+: *$/', $s)) + $attr[$n] = $this->_doTable_makeAlignAttr('right'); + else if (preg_match('/^ *:-+: *$/', $s)) + $attr[$n] = $this->_doTable_makeAlignAttr('center'); + else if (preg_match('/^ *:-+ *$/', $s)) + $attr[$n] = $this->_doTable_makeAlignAttr('left'); + else + $attr[$n] = ''; + } + + // Parsing span elements, including code spans, character escapes, + // and inline HTML tags, so that pipes inside those gets ignored. + $head = $this->parseSpan($head); + $headers = preg_split('/ *[|] */', $head); + $col_count = count($headers); + $attr = array_pad($attr, $col_count, ''); + + // Write column headers. + $text = "\n"; + $text .= "\n"; + $text .= "\n"; + foreach ($headers as $n => $header) { + $text .= " " . $this->runSpanGamut(trim($header)) . "\n"; + } + $text .= "\n"; + $text .= "\n"; + + // Split content by row. + $rows = explode("\n", trim($content, "\n")); + + $text .= "\n"; + foreach ($rows as $row) { + // Parsing span elements, including code spans, character escapes, + // and inline HTML tags, so that pipes inside those gets ignored. + $row = $this->parseSpan($row); + + // Split row by cell. + $row_cells = preg_split('/ *[|] */', $row, $col_count); + $row_cells = array_pad($row_cells, $col_count, ''); + + $text .= "\n"; + foreach ($row_cells as $n => $cell) { + $text .= " " . $this->runSpanGamut(trim($cell)) . "\n"; + } + $text .= "\n"; + } + $text .= "\n"; + $text .= "
    "; + + return $this->hashBlock($text) . "\n"; + } + + /** + * Form HTML definition lists. + * @param string $text + * @return string + */ + protected function doDefLists($text) { + $less_than_tab = $this->tab_width - 1; + + // Re-usable pattern to match any entire dl list: + $whole_list_re = '(?> + ( # $1 = whole list + ( # $2 + [ ]{0,' . $less_than_tab . '} + ((?>.*\S.*\n)+) # $3 = defined term + \n? + [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another term + [ ]{0,' . $less_than_tab . '} + (?: \S.*\n )+? # defined term + \n? + [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition + ) + (?! # Negative lookahead for another definition + [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition + ) + ) + ) + )'; // mx + + $text = preg_replace_callback('{ + (?>\A\n?|(?<=\n\n)) + ' . $whole_list_re . ' + }mx', + array($this, '_doDefLists_callback'), $text); + + return $text; + } + + /** + * Callback for processing definition lists + * @param array $matches + * @return string + */ + protected function _doDefLists_callback($matches) { + // Re-usable patterns to match list item bullets and number markers: + $list = $matches[1]; + + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + $result = trim($this->processDefListItems($list)); + $result = "
    \n" . $result . "\n
    "; + return $this->hashBlock($result) . "\n\n"; + } + + /** + * Process the contents of a single definition list, splitting it + * into individual term and definition list items. + * @param string $list_str + * @return string + */ + protected function processDefListItems($list_str) { + + $less_than_tab = $this->tab_width - 1; + + // Trim trailing blank lines: + $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); + + // Process definition terms. + $list_str = preg_replace_callback('{ + (?>\A\n?|\n\n+) # leading line + ( # definition terms = $1 + [ ]{0,' . $less_than_tab . '} # leading whitespace + (?!\:[ ]|[ ]) # negative lookahead for a definition + # mark (colon) or more whitespace. + (?> \S.* \n)+? # actual term (not whitespace). + ) + (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed + # with a definition mark. + }xm', + array($this, '_processDefListItems_callback_dt'), $list_str); + + // Process actual definitions. + $list_str = preg_replace_callback('{ + \n(\n+)? # leading line = $1 + ( # marker space = $2 + [ ]{0,' . $less_than_tab . '} # whitespace before colon + \:[ ]+ # definition mark (colon) + ) + ((?s:.+?)) # definition text = $3 + (?= \n+ # stop at next definition mark, + (?: # next term or end of text + [ ]{0,' . $less_than_tab . '} \:[ ] | +
    | \z + ) + ) + }xm', + array($this, '_processDefListItems_callback_dd'), $list_str); + + return $list_str; + } + + /** + * Callback for
    elements in definition lists + * @param array $matches + * @return string + */ + protected function _processDefListItems_callback_dt($matches) { + $terms = explode("\n", trim($matches[1])); + $text = ''; + foreach ($terms as $term) { + $term = $this->runSpanGamut(trim($term)); + $text .= "\n
    " . $term . "
    "; + } + return $text . "\n"; + } + + /** + * Callback for
    elements in definition lists + * @param array $matches + * @return string + */ + protected function _processDefListItems_callback_dd($matches) { + $leading_line = $matches[1]; + $marker_space = $matches[2]; + $def = $matches[3]; + + if ($leading_line || preg_match('/\n{2,}/', $def)) { + // Replace marker with the appropriate whitespace indentation + $def = str_repeat(' ', strlen($marker_space)) . $def; + $def = $this->runBlockGamut($this->outdent($def . "\n\n")); + $def = "\n". $def ."\n"; + } + else { + $def = rtrim($def); + $def = $this->runSpanGamut($this->outdent($def)); + } + + return "\n
    " . $def . "
    \n"; + } + + /** + * Adding the fenced code block syntax to regular Markdown: + * + * ~~~ + * Code block + * ~~~ + * + * @param string $text + * @return string + */ + protected function doFencedCodeBlocks($text) { + + $text = preg_replace_callback('{ + (?:\n|\A) + # 1: Opening marker + ( + (?:~{3,}|`{3,}) # 3 or more tildes/backticks. + ) + [ ]* + (?: + \.?([-_:a-zA-Z0-9]+) # 2: standalone class name + )? + [ ]* + (?: + ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes + )? + [ ]* \n # Whitespace and newline following marker. + + # 4: Content + ( + (?> + (?!\1 [ ]* \n) # Not a closing marker. + .*\n+ + )+ + ) + + # Closing marker. + \1 [ ]* (?= \n ) + }xm', + array($this, '_doFencedCodeBlocks_callback'), $text); + + return $text; + } + + /** + * Callback to process fenced code blocks + * @param array $matches + * @return string + */ + protected function _doFencedCodeBlocks_callback($matches) { + $classname =& $matches[2]; + $attrs =& $matches[3]; + $codeblock = $matches[4]; + + if ($this->code_block_content_func) { + $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); + } else { + $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); + } + + $codeblock = preg_replace_callback('/^\n+/', + array($this, '_doFencedCodeBlocks_newlines'), $codeblock); + + $classes = array(); + if ($classname !== "") { + if ($classname[0] === '.') { + $classname = substr($classname, 1); + } + $classes[] = $this->code_class_prefix . $classname; + } + $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); + $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; + $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; + $codeblock = "$codeblock
    "; + + return "\n\n".$this->hashBlock($codeblock)."\n\n"; + } + + /** + * Replace new lines in fenced code blocks + * @param array $matches + * @return string + */ + protected function _doFencedCodeBlocks_newlines($matches) { + return str_repeat("empty_element_suffix", + strlen($matches[0])); + } + + /** + * Redefining emphasis markers so that emphasis by underscore does not + * work in the middle of a word. + * @var array + */ + protected $em_relist = array( + '' => '(?:(? '(? '(? '(?:(? '(? '(? '(?:(? '(? '(? tags + * @return string HTML output + */ + protected function formParagraphs($text, $wrap_in_p = true) { + // Strip leading and trailing lines: + $text = preg_replace('/\A\n+|\n+\z/', '', $text); + + $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); + + // Wrap

    tags and unhashify HTML blocks + foreach ($grafs as $key => $value) { + $value = trim($this->runSpanGamut($value)); + + // Check if this should be enclosed in a paragraph. + // Clean tag hashes & block tag hashes are left alone. + $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); + + if ($is_p) { + $value = "

    $value

    "; + } + $grafs[$key] = $value; + } + + // Join grafs in one text, then unhash HTML tags. + $text = implode("\n\n", $grafs); + + // Finish by removing any tag hashes still present in $text. + $text = $this->unhash($text); + + return $text; + } + + + /** + * Footnotes - Strips link definitions from text, stores the URLs and + * titles in hash references. + * @param string $text + * @return string + */ + protected function stripFootnotes($text) { + $less_than_tab = $this->tab_width - 1; + + // Link defs are in the form: [^id]: url "optional title" + $text = preg_replace_callback('{ + ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 + [ ]* + \n? # maybe *one* newline + ( # text = $2 (no blank lines allowed) + (?: + .+ # actual text + | + \n # newlines but + (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. + (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed + # by non-indented content + )* + ) + }xm', + array($this, '_stripFootnotes_callback'), + $text); + return $text; + } + + /** + * Callback for stripping footnotes + * @param array $matches + * @return string + */ + protected function _stripFootnotes_callback($matches) { + $note_id = $this->fn_id_prefix . $matches[1]; + $this->footnotes[$note_id] = $this->outdent($matches[2]); + return ''; // String that will replace the block + } + + /** + * Replace footnote references in $text [^id] with a special text-token + * which will be replaced by the actual footnote marker in appendFootnotes. + * @param string $text + * @return string + */ + protected function doFootnotes($text) { + if (!$this->in_anchor) { + $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); + } + return $text; + } + + /** + * Append footnote list to text + * @param string $text + * @return string + */ + protected function appendFootnotes($text) { + $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', + array($this, '_appendFootnotes_callback'), $text); + + if ( ! empty( $this->footnotes_ordered ) ) { + $this->_doFootnotes(); + if ( ! $this->omit_footnotes ) { + $text .= "\n\n"; + $text .= "
    \n"; + $text .= "empty_element_suffix . "\n"; + $text .= $this->footnotes_assembled; + $text .= "
    "; + } + } + return $text; + } + + + /** + * Generates the HTML for footnotes. Called by appendFootnotes, even if + * footnotes are not being appended. + * @return void + */ + protected function _doFootnotes() { + $attr = array(); + if ($this->fn_backlink_class !== "") { + $class = $this->fn_backlink_class; + $class = $this->encodeAttribute($class); + $attr['class'] = " class=\"$class\""; + } + $attr['role'] = " role=\"doc-backlink\""; + $num = 0; + + $text = "
      \n\n"; + while (!empty($this->footnotes_ordered)) { + $footnote = reset($this->footnotes_ordered); + $note_id = key($this->footnotes_ordered); + unset($this->footnotes_ordered[$note_id]); + $ref_count = $this->footnotes_ref_count[$note_id]; + unset($this->footnotes_ref_count[$note_id]); + unset($this->footnotes[$note_id]); + + $footnote .= "\n"; // Need to append newline before parsing. + $footnote = $this->runBlockGamut("$footnote\n"); + $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', + array($this, '_appendFootnotes_callback'), $footnote); + + $num++; + $note_id = $this->encodeAttribute($note_id); + + // Prepare backlink, multiple backlinks if multiple references + // Do not create empty backlinks if the html is blank + $backlink = ""; + if (!empty($this->fn_backlink_html)) { + for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) { + if (!empty($this->fn_backlink_title)) { + $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"'; + } + if (!empty($this->fn_backlink_label)) { + $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"'; + } + $parsed_attr = $this->parseFootnotePlaceholders( + implode('', $attr), + $num, + $ref_num + ); + $backlink_text = $this->parseFootnotePlaceholders( + $this->fn_backlink_html, + $num, + $ref_num + ); + $ref_count_mark = $ref_num > 1 ? $ref_num : ''; + $backlink .= " $backlink_text"; + } + $backlink = trim($backlink); + } + + // Add backlink to last paragraph; create new paragraph if needed. + if (!empty($backlink)) { + if (preg_match('{

      $}', $footnote)) { + $footnote = substr($footnote, 0, -4) . " $backlink

      "; + } else { + $footnote .= "\n\n

      $backlink

      "; + } + } + + $text .= "
    1. \n"; + $text .= $footnote . "\n"; + $text .= "
    2. \n\n"; + } + $text .= "
    \n"; + + $this->footnotes_assembled = $text; + } + + /** + * Callback for appending footnotes + * @param array $matches + * @return string + */ + protected function _appendFootnotes_callback($matches) { + $node_id = $this->fn_id_prefix . $matches[1]; + + // Create footnote marker only if it has a corresponding footnote *and* + // the footnote hasn't been used by another marker. + if (isset($this->footnotes[$node_id])) { + $num =& $this->footnotes_numbers[$node_id]; + if (!isset($num)) { + // Transfer footnote content to the ordered list and give it its + // number + $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; + $this->footnotes_ref_count[$node_id] = 1; + $num = $this->footnote_counter++; + $ref_count_mark = ''; + } else { + $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; + } + + $attr = ""; + if ($this->fn_link_class !== "") { + $class = $this->fn_link_class; + $class = $this->encodeAttribute($class); + $attr .= " class=\"$class\""; + } + if ($this->fn_link_title !== "") { + $title = $this->fn_link_title; + $title = $this->encodeAttribute($title); + $attr .= " title=\"$title\""; + } + $attr .= " role=\"doc-noteref\""; + + $attr = str_replace("%%", $num, $attr); + $node_id = $this->encodeAttribute($node_id); + + return + "". + "$num". + ""; + } + + return "[^" . $matches[1] . "]"; + } + + /** + * Build footnote label by evaluating any placeholders. + * - ^^ footnote number + * - %% footnote reference number (Nth reference to footnote number) + * @param string $label + * @param int $footnote_number + * @param int $reference_number + * @return string + */ + protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) { + return str_replace( + array('^^', '%%'), + array($footnote_number, $reference_number), + $label + ); + } + + + /** + * Abbreviations - strips abbreviations from text, stores titles in hash + * references. + * @param string $text + * @return string + */ + protected function stripAbbreviations($text) { + $less_than_tab = $this->tab_width - 1; + + // Link defs are in the form: [id]*: url "optional title" + $text = preg_replace_callback('{ + ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1 + (.*) # text = $2 (no blank lines allowed) + }xm', + array($this, '_stripAbbreviations_callback'), + $text); + return $text; + } + + /** + * Callback for stripping abbreviations + * @param array $matches + * @return string + */ + protected function _stripAbbreviations_callback($matches) { + $abbr_word = $matches[1]; + $abbr_desc = $matches[2]; + if ($this->abbr_word_re) { + $this->abbr_word_re .= '|'; + } + $this->abbr_word_re .= preg_quote($abbr_word); + $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); + return ''; // String that will replace the block + } + + /** + * Find defined abbreviations in text and wrap them in elements. + * @param string $text + * @return string + */ + protected function doAbbreviations($text) { + if ($this->abbr_word_re) { + // cannot use the /x modifier because abbr_word_re may + // contain significant spaces: + $text = preg_replace_callback('{' . + '(?abbr_word_re . ')' . + '(?![\w\x1A])' . + '}', + array($this, '_doAbbreviations_callback'), $text); + } + return $text; + } + + /** + * Callback for processing abbreviations + * @param array $matches + * @return string + */ + protected function _doAbbreviations_callback($matches) { + $abbr = $matches[0]; + if (isset($this->abbr_desciptions[$abbr])) { + $desc = $this->abbr_desciptions[$abbr]; + if (empty($desc)) { + return $this->hashPart("$abbr"); + } + $desc = $this->encodeAttribute($desc); + return $this->hashPart("$abbr"); + } + return $matches[0]; + } +} diff --git a/mdlibs/MarkdownInterface.inc.php b/mdlibs/MarkdownInterface.inc.php new file mode 100644 index 0000000..c4e9ac7 --- /dev/null +++ b/mdlibs/MarkdownInterface.inc.php @@ -0,0 +1,9 @@ + + * @copyright 2004-2021 Michel Fortin + * @copyright (Original Markdown) 2004-2006 John Gruber + */ + +namespace mdlibs; + +/** + * Markdown Parser Interface + */ +interface MarkdownInterface { + /** + * Initialize the parser and return the result of its transform method. + * This will work fine for derived classes too. + * + * @api + * + * @param string $text + * @return string + */ + public static function defaultTransform($text); + + /** + * Main function. Performs some preprocessing on the input text + * and pass it through the document gamut. + * + * @api + * + * @param string $text + * @return string + */ + public function transform($text); +} diff --git a/mktree.sh b/mktree.sh new file mode 100755 index 0000000..6700cc8 --- /dev/null +++ b/mktree.sh @@ -0,0 +1,3 @@ +#!/bin/sh +URL="https://codeberg.org/speedie/speedwm/raw/branch/master" +tree -I md5 -f --noreport --charset ascii | grep -vE "profiles|metadata|md5|Manifest|README|xml|repo_name|conf|directories|push|tree|LICENSE" | sed -e 's/| \+/ /g' -e 's/[|`]-\+/ */g' -e 's:\(* \)\(\(.*/\)\([^/]\+\)\):\1[\4](\2):g' | tail -n+2 | sed "s|]([.]|](${URL}|g" diff --git a/pages/Home.txt b/pages/Home.txt new file mode 100644 index 0000000..de6a91f --- /dev/null +++ b/pages/Home.txt @@ -0,0 +1,4 @@ +Welcome to the speedie.gq wiki! +------------------------------- + +Welcome to the very much work in progress speedie.gq wiki!