Well, this is really so long that the patch likely has a bug, but this is the limitation of my ingenuity. If someone can have better solution, I would like to know it. I hope the patch helps one who wants compliant HTML output.
Yes, because UseMod doesn't support nested <pre></pre> so the display is corrupted.
It is hard to change this patch, so I will just add a few comments here. If you have a </pre> in the source, that breaks formatting, so I suggest you change </pre> to </pre" . "> or something similar... Plus I think that the newline handling can be changed: s/[\r\n]/\n/g is the same as s/\r/\n/g. -- AlexSchroeder
Thanks comments. I agree that the patch is complicated, but I don't know how it can be done in a simpler way. --TakuyaMurata
--- wiki_orig.cgi 2002-12-25 18:56:20.000000000 -0600 +++ wiki_text.cgi 2002-12-25 23:59:59.000000000 -0600 @@ -1163,13 +1163,174 @@ } $pageText = &QuoteHtml($pageText); $pageText =~ s/\\ *\r?\n/ /g; # Join lines with backslash at end - $pageText = &CommonMarkup($pageText, 1, 0); # Multi-line markup - $pageText = &WikiLinesToHtml($pageText); # Line-oriented markup + #$pageText = &CommonMarkup($pageText, 1, 0); # Multi-line markup + + $pageText =~ s/$FS[123]//g; # Remove separators (paranoia) + $pageText =~ s/\r\n/\n/g; + $pageText =~ s/\r/\n/g; # In Mac only '\r' is a newline. + $pageText = &GetParsedPage(\$pageText); + $pageText =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge; # Restore saved text $pageText =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge; # Restore nested saved text return $pageText; } +sub regex_prematch { + my ($string) = @_; + return substr($string, 0, $-[0]); +} + +sub regex_postmatch { + my ($string) = @_; + return substr($string, $+[0]); +} + +sub GetParsedInline { + my ($t) = @_; + $t = &CommonMarkup($t, 1, 1); # do both + $t =~ s/\A\s+//g; # trim + $t =~ s/\s+\Z//g; # trim + return $t; +} + +sub GetParsedFlow { + my ($t) = @_; + my ($left, $right, $depth); + my ($ThinLine) = 0; + + $t || return ''; + + if ($t =~ /^(\=+) +([^\n]+) +\1(?:\n|\Z)/m) { + $left = ®ex_prematch($t); + $depth = length($1); + $depth = 6 if ($depth > 6); + return &GetParsedFlow($left) . "<h$depth>$2</h$depth>\n" . GetParsedFlow(®ex_postmatch($t)); + } + + if ($ThinLine) { + if ($t =~ /----+/) { + $left = ®ex_prematch($t); + $t = ®ex_postmatch($t); + return &GetParsedFlow($left) . "<hr noshade size=\"1\" />\n" . GetParsedFlow($t); + } + if ($t =~ /====+/) { + $left = ®ex_prematch($t); + $t = ®ex_postmatch($t); + return &GetParsedFlow($left) . "<hr noshade size=\"2\" />\n" . GetParsedFlow($t); + } + } + else { + if ($t =~ /----+/) { + $left = ®ex_prematch($t); + $t = ®ex_postmatch($t); + return &GetParsedFlow($left) . "<hr />\n" . GetParsedFlow($t); + } + } + + if ($t =~ /((?:^ +[^\n]+(?:\n|\Z))+)/m) { + $left = ®ex_prematch($t); + $t = ®ex_postmatch($t); + my ($pre) = $1; + $pre =~ s/^ +//mg; + return &GetParsedFlow($left) . "<pre>$pre</pre" . ">\n" . GetParsedFlow($t); + } + + ($left, $right) = split(/\n\s*\n/, $t, 2); + if ($right) { + return &GetParsedFlow($left) . &GetParsedFlow($right); + } + + $t =~ s/\n/ /g; # I suspect that some browser concatenates two line into one string. + $t =~ s/\A\s+//g; # trim + $t =~ s/\s+\Z//g; # trim + return $t && ("<p>" . &GetParsedInline($t) . "</p>\n"); +} + +sub GetParsedPage { + my ($ref_text, $depth, $tag) = @_; + my (%item_tag) = ('' => '', 'dl' => 'dd', 'ol' => 'li', 'ul' => 'li',); + my (%list_tag) = ('*' => 'ul', '#' => 'ol', ); + my ($html, $left); + my ($t) = $$ref_text; + + $t || return ''; + + if ($t =~ /^((?:\*+)|(?:\#+)) *([^\n]*)(?:\n|\Z)/m) { + $left = ®ex_prematch($t); + if (!$left) { + my ($en_tag) = $list_tag{substr($1, 0, 1)}; + if (length($1) > $depth) { + $html = "<$en_tag>\n"; + $html .= &GetParsedPage($ref_text, $depth + 1, $en_tag); + $html .= "</$en_tag>\n"; + if ($item_tag{$tag}) { + $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n"; + } + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + elsif (length($1) == $depth && $tag eq $en_tag) { + $$ref_text = substr($$ref_text, $+[0]); # consume + $html = $2 && "<li>" . &GetParsedInline($2) . "</li>\n"; + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + return $html; + } + $t = $left; + } + + if ($t =~ /^(;+)([^:\n]*)\:([^\n]*)(?:\n|\Z)/m) { + $left = ®ex_prematch($t); + if (!$left) { + if (length($1) > $depth) { + $html = "<dl>\n"; + $html .= &GetParsedPage($ref_text, $depth + 1, 'dl'); + $html .= "</dl>\n"; + if ($item_tag{$tag}) { + $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n"; + } + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + elsif (length($1) == $depth && $tag eq 'dl') { + $$ref_text = substr($$ref_text, $+[0]); # consume + $html = $2 && ("<dt>" . &GetParsedInline($2) . "</dt>\n"); + $html .= $3 && ("<dd>" . &GetParsedInline($3) . "</dd>\n"); + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + return $html; + } + $t = $left; + } + + if ($t =~ /^(\:+)([^\n]*)(?:\n|\Z)/m) { + $left = ®ex_prematch($t); + if (!$left) { + if (length($1) > $depth) { + $html = "<dl>\n"; + $html .= &GetParsedPage($ref_text, $depth + 1, 'dl'); + $html .= "</dl>\n"; + if ($item_tag{$tag}) { + $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n"; + } + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + elsif (length($1) == $depth && $tag eq 'dl') { + $$ref_text = substr($$ref_text, $+[0]); # consume + $html .= $2 && ("<dd>" . &GetParsedInline($2) . "</dd>\n"); + $html .= &GetParsedPage($ref_text, $depth, $tag); + } + return $html; + } + $t = $left; + } + + if ($depth > 0) { + return ''; + } + + $$ref_text = substr($$ref_text, length($t)); + return &GetParsedFlow($t) . &GetParsedPage($ref_text, $depth, $tag); +} + sub CommonMarkup { my ($text, $useImage, $doLines) = @_; local $_ = $text; @@ -1233,66 +1394,10 @@ # by matching the inner quotes for the strong pattern. s/('*)'''(.*?)'''/$1<strong>$2<\/strong>/g; s/''(.*?)''/<em>$1<\/em>/g; - if ($UseHeadings) { - s/(^|\n)\s*(\=+)\s+([^\n]+)\s+\=+/&WikiHeading($1, $2, $3)/geo; - } } return $_; } -sub WikiLinesToHtml { - my ($pageText) = @_; - my ($pageHtml, @htmlStack, $code, $depth, $oldCode); - - @htmlStack = (); - $depth = 0; - $pageHtml = ""; - foreach (split(/\n/, $pageText)) { # Process lines one-at-a-time - $_ .= "\n"; - if (s/^(\;+)([^:]+\:?)\:/<dt>$2<dd>/) { - $code = "DL"; - $depth = length $1; - } elsif (s/^(\:+)/<dt><dd>/) { - $code = "DL"; - $depth = length $1; - } elsif (s/^(\*+)/<li>/) { - $code = "UL"; - $depth = length $1; - } elsif (s/^(\#+)/<li>/) { - $code = "OL"; - $depth = length $1; - } elsif (/^[ \t].*\S/) { - $code = "PRE"; - $depth = 1; - } else { - $depth = 0; - } - while (@htmlStack > $depth) { # Close tags as needed - $pageHtml .= "</" . pop(@htmlStack) . ">\n"; - } - if ($depth > 0) { - $depth = $IndentLimit if ($depth > $IndentLimit); - if (@htmlStack) { # Non-empty stack - $oldCode = pop(@htmlStack); - if ($oldCode ne $code) { - $pageHtml .= "</$oldCode><$code>\n"; - } - push(@htmlStack, $code); - } - while (@htmlStack < $depth) { - push(@htmlStack, $code); - $pageHtml .= "<$code>\n"; - } - } - s/^\s*$/<p>\n/; # Blank lines become <p> tags - $pageHtml .= &CommonMarkup($_, 1, 2); # Line-oriented common markup - } - while (@htmlStack > 0) { # Clear stack - $pageHtml .= "</" . pop(@htmlStack) . ">\n"; - } - return $pageHtml; -} - sub QuoteHtml { my ($html) = @_; @@ -1512,14 +1617,6 @@ return $url; } -sub WikiHeading { - my ($pre, $depth, $text) = @_; - - $depth = length($depth); - $depth = 6 if ($depth > 6); - return $pre . "<H$depth>$text</H$depth>\n"; -} - # ==== Difference markup and HTML ==== sub GetDiffHTML { my ($diffType, $id, $rev, $newText) = @_;