| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * |
| * The lexer should work on a number of languages including C and friends, |
| * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. |
| * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, |
| * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or |
| * CAML-like languages. |
| * |
| * If there's a language not mentioned here, then I don't know it, and don't |
| * know whether it works. If it has a C-like, Bash-like, or XML-like syntax |
| * then it should work passably. |
| * |
| * Usage: |
| * 1) include this source file in an html page via |
| * <script type="text/javascript" src="/path/to/prettify.js"></script> |
| * 2) define style rules. See the example page for examples. |
| * 3) mark the <pre> and <code> tags in your source with class=prettyprint. |
| * You can also use the (html deprecated) <xmp> tag, but the pretty printer |
| * needs to do more substantial DOM manipulations to support that, so some |
| * css styles may not be preserved. |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in. |
| * |
| * Change log: |
| * cbeust, 2006/08/22 |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| */ |
| |
| var PR_keywords = {}; |
| /** initialize the keyword list for our target languages. */ |
| (function () { |
| var CPP_KEYWORDS = "abstract bool break case catch char class const " + |
| "const_cast continue default delete deprecated dllexport dllimport do " + |
| "double dynamic_cast else enum explicit extern false float for friend " + |
| "goto if inline int long mutable naked namespace new noinline noreturn " + |
| "nothrow novtable operator private property protected public register " + |
| "reinterpret_cast return selectany short signed sizeof static " + |
| "static_cast struct switch template this thread throw true try typedef " + |
| "typeid typename union unsigned using declaration, directive uuid " + |
| "virtual void volatile while typeof"; |
| var CSHARP_KEYWORDS = "as base by byte checked decimal delegate descending " + |
| "event finally fixed foreach from group implicit in interface internal " + |
| "into is lock null object out override orderby params readonly ref sbyte " + |
| "sealed stackalloc string select uint ulong unchecked unsafe ushort var"; |
| var JAVA_KEYWORDS = "package synchronized boolean implements import throws " + |
| "instanceof transient extends final strictfp native super"; |
| var JSCRIPT_KEYWORDS = "debugger export function with NaN Infinity"; |
| var PERL_KEYWORDS = "require sub unless until use elsif BEGIN END"; |
| var PYTHON_KEYWORDS = "and assert def del elif except exec global lambda " + |
| "not or pass print raise yield False True None"; |
| var RUBY_KEYWORDS = "then end begin rescue ensure module when undef next " + |
| "redo retry alias defined"; |
| var SH_KEYWORDS = "done fi"; |
| |
| var KEYWORDS = [CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, |
| JSCRIPT_KEYWORDS, PERL_KEYWORDS, PYTHON_KEYWORDS, |
| RUBY_KEYWORDS, SH_KEYWORDS]; |
| for ( var k = 0; k < KEYWORDS.length; k++ ) { |
| var kw = KEYWORDS[k].split(' '); |
| for ( var i = 0; i < kw.length; i++ ) { |
| if ( kw[i] ) { |
| PR_keywords[kw[i]] = true; |
| } |
| } |
| } |
| }).call(this); |
| |
| // token style names. correspond to css classes |
| /** token style for a string literal */ |
| var PR_STRING = 'str'; |
| /** token style for a keyword */ |
| var PR_KEYWORD = 'kwd'; |
| /** token style for a comment */ |
| var PR_COMMENT = 'com'; |
| /** token style for a type */ |
| var PR_TYPE = 'typ'; |
| /** token style for a literal value. e.g. 1, null, true. */ |
| var PR_LITERAL = 'lit'; |
| /** token style for a punctuation string. */ |
| var PR_PUNCTUATION = 'pun'; |
| /** token style for a punctuation string. */ |
| var PR_PLAIN = 'pln'; |
| |
| /** token style for an sgml tag. */ |
| var PR_TAG = 'tag'; |
| /** token style for a markup declaration such as a DOCTYPE. */ |
| var PR_DECLARATION = 'dec'; |
| /** token style for embedded source. */ |
| var PR_SOURCE = 'src'; |
| /** token style for an sgml attribute name. */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** token style for an sgml attribute value. */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** the number of characters between tab columns */ |
| var PR_TAB_WIDTH = 8; |
| |
| /** the position of the end of a token during. A division of a string into |
| * n tokens can be represented as a series n - 1 token ends, as long as |
| * runs of whitespace warrant their own token. |
| * @private |
| */ |
| function PR_TokenEnd( end, style ) { |
| if ( undefined === style ) { |
| throw new Error('BAD'); |
| } |
| if ( 'number' != typeof(end) ) { |
| throw new Error('BAD'); |
| } |
| this.end = end; |
| this.style = style; |
| } |
| PR_TokenEnd.prototype.toString = function () { |
| return '[PR_TokenEnd ' + this.end + |
| (this.style ? ':' + this.style : '') + ']'; |
| }; |
| |
| /** a chunk of text with a style. These are used to represent both the output |
| * from the lexing functions as well as intermediate results. |
| * @constructor |
| * @param token the token text |
| * @param style one of the token styles defined in designdoc-template, or null |
| * for a styleless token, such as an embedded html tag. |
| * @private |
| */ |
| function PR_Token( token, style ) { |
| if ( undefined === style ) { |
| throw new Error('BAD'); |
| } |
| this.token = token; |
| this.style = style; |
| } |
| |
| PR_Token.prototype.toString = function () { |
| return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; |
| }; |
| |
| /** a helper class that decodes common html entities used to escape special |
| * characters in source code. |
| * @constructor |
| * @private |
| */ |
| function PR_DecodeHelper() { |
| this.next = 0; |
| this.ch = '\0'; |
| } |
| |
| var PR_NAMED_ENTITIES = { |
| 'lt': '<', |
| 'gt': '>', |
| 'quot': '"', |
| 'apos': "'", |
| 'amp': '&' // reencoding requires that & always be decoded properly |
| }; |
| |
| PR_DecodeHelper.prototype.decode = function ( s, i ) { |
| var next = i + 1; |
| var ch = s.charAt(i); |
| if ( '&' === ch ) { |
| var semi = s.indexOf(';', next); |
| if ( semi >= 0 && semi < next + 4 ) { |
| var entityName = s.substring(next, semi); |
| var decoded = null; |
| if ( entityName.charAt(0) === '#' ) { // check for numeric entity |
| var ch1 = entityName.charAt(1); |
| var charCode; |
| if ( ch1 === 'x' || ch1 === 'X' ) { // like   |
| charCode = parseInt(entityName.substring(2), 16); |
| } |
| else { // like   |
| charCode = parseInt(entityName.substring(1), 10); |
| } |
| if ( !isNaN(charCode) ) { |
| decoded = String.fromCharCode(charCode); |
| } |
| } |
| if ( !decoded ) { |
| decoded = PR_NAMED_ENTITIES[entityName.toLowerCase()]; |
| } |
| if ( decoded ) { |
| ch = decoded; |
| next = semi + 1; |
| } |
| else { // skip over unrecognized entity |
| next = i + 1; |
| ch = '\0'; |
| } |
| } |
| } |
| this.next = next; |
| this.ch = ch; |
| return this.ch; |
| }; |
| |
| // some string utilities |
| function PR_isWordChar( ch ) { |
| return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); |
| } |
| |
| function PR_isIdentifierStart( ch ) { |
| return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; |
| } |
| |
| function PR_isIdentifierPart( ch ) { |
| return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); |
| } |
| |
| function PR_isSpaceChar( ch ) { |
| return "\t \r\n".indexOf(ch) >= 0; |
| } |
| |
| function PR_isDigitChar( ch ) { |
| return ch >= '0' && ch <= '9'; |
| } |
| |
| function PR_trim( s ) { |
| var i = 0, j = s.length - 1; |
| while ( i <= j && PR_isSpaceChar(s.charAt(i)) ) { |
| ++i; |
| } |
| while ( j > i && PR_isSpaceChar(s.charAt(j)) ) { |
| --j; |
| } |
| return s.substring(i, j + 1); |
| } |
| |
| function PR_startsWith( s, prefix ) { |
| return s.length >= prefix.length && prefix == s.substring(0, prefix.length); |
| } |
| |
| function PR_endsWith( s, suffix ) { |
| return s.length >= suffix.length && |
| suffix == s.substring(s.length - suffix.length, s.length); |
| } |
| |
| /** true iff prefix matches the first prefix characters in chars[0:len]. |
| * @private |
| */ |
| function PR_prefixMatch( chars, len, prefix ) { |
| if ( len < prefix.length ) { |
| return false; |
| } |
| for ( var i = 0, n = prefix.length; i < n; ++i ) { |
| if ( prefix.charAt(i) != chars[i] ) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** like textToHtml but escapes double quotes to be attribute safe. */ |
| function PR_attribToHtml( str ) { |
| return str.replace(/&/g, '&') |
| .replace(/</g, '<') |
| .replace(/>/g, '>') |
| .replace(/\"/g, '"') |
| .replace(/\xa0/, ' '); |
| } |
| |
| /** escapest html special characters to html. */ |
| function PR_textToHtml( str ) { |
| return str.replace(/&/g, '&') |
| .replace(/</g, '<') |
| .replace(/>/g, '>') |
| .replace(/\xa0/g, ' '); |
| } |
| |
| /** is the given node's innerHTML normally unescaped? */ |
| function PR_isRawContent( node ) { |
| return 'XMP' == node.tagName; |
| } |
| |
| var PR_innerHtmlWorks = null; |
| function PR_getInnerHtml( node ) { |
| // inner html is hopelessly broken in Safari 2.0.4 when the content is |
| // an html description of well formed XML and the containing tag is a PRE |
| // tag, so we detect that case and emulate innerHTML. |
| if ( null == PR_innerHtmlWorks ) { |
| var testNode = document.createElement('PRE'); |
| testNode.appendChild( |
| document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); |
| PR_innerHtmlWorks = !/</.test(testNode.innerHTML); |
| } |
| |
| if ( PR_innerHtmlWorks ) { |
| var content = node.innerHTML; |
| // XMP tags contain unescaped entities so require special handling. |
| if ( PR_isRawContent(node) ) { |
| content = PR_textToHtml(content); |
| } |
| return content; |
| } |
| |
| var out = []; |
| for ( var child = node.firstChild; child; child = child.nextSibling ) { |
| PR_normalizedHtml(child, out); |
| } |
| return out.join(''); |
| } |
| |
| /** |
| * walks the DOM returning a properly escaped version of innerHTML. |
| */ |
| function PR_normalizedHtml( node, out ) { |
| switch ( node.nodeType ) { |
| case 1: // an element |
| var name = node.tagName.toLowerCase(); |
| out.push('\074', name); |
| for ( var i = 0; i < node.attributes.length; ++i ) { |
| var attr = node.attributes[i]; |
| if ( !attr.specified ) { |
| continue; |
| } |
| out.push(' '); |
| PR_normalizedHtml(attr, out); |
| } |
| out.push('>'); |
| for ( var child = node.firstChild; child; child = child.nextSibling ) { |
| PR_normalizedHtml(child, out); |
| } |
| if ( node.firstChild || !/^(?:br|link|img)$/.test(name) ) { |
| out.push('<\/', name, '>'); |
| } |
| break; |
| case 2: // an attribute |
| out.push(node.name.toLowerCase(), '="', PR_attribToHtml(node.value), '"'); |
| break; |
| case 3: case 4: // text |
| out.push(PR_textToHtml(node.nodeValue)); |
| break; |
| } |
| } |
| |
| /** expand tabs to spaces |
| * @param {Array} chunks PR_Tokens possibly containing tabs |
| * @param {Number} tabWidth number of spaces between tab columns |
| * @return {Array} chunks with tabs replaced with spaces |
| */ |
| function PR_expandTabs( chunks, tabWidth ) { |
| var SPACES = ' '; |
| |
| var charInLine = 0; |
| var decodeHelper = new PR_DecodeHelper(); |
| |
| var chunksOut = [] |
| for ( var chunkIndex = 0; chunkIndex < chunks.length; ++chunkIndex ) { |
| var chunk = chunks[chunkIndex]; |
| if ( chunk.style == null ) { |
| chunksOut.push(chunk); |
| continue; |
| } |
| |
| var s = chunk.token; |
| var pos = 0; |
| // index of last character output |
| var out = []; |
| |
| // walk over each character looking for tabs and newlines. |
| // On tabs, expand them. On newlines, reset charInLine. |
| // Otherwise increment charInLine |
| for ( var charIndex = 0, n = s.length; charIndex < n; |
| charIndex = decodeHelper.next ) { |
| decodeHelper.decode(s, charIndex); |
| var ch = decodeHelper.ch; |
| |
| switch ( ch ) { |
| case '\t': |
| out.push(s.substring(pos, charIndex)); |
| // calculate how much space we need in front of this part |
| // nSpaces is the amount of padding -- the number of spaces needed to |
| // move us to the next column, where columns occur at factors of |
| // tabWidth. |
| var nSpaces = tabWidth - (charInLine % tabWidth); |
| charInLine += nSpaces; |
| for ( ; nSpaces >= 0; nSpaces -= SPACES.length ) { |
| out.push(SPACES.substring(0, nSpaces)); |
| } |
| pos = decodeHelper.next; |
| break; |
| case '\n': case '\r': |
| charInLine = 0; |
| break; |
| default: |
| ++charInLine; |
| } |
| } |
| out.push(s.substring(pos)); |
| chunksOut.push(new PR_Token(out.join(''), chunk.style)); |
| } |
| return chunksOut |
| } |
| |
| /** split markup into chunks of html tags (style null) and |
| * plain text (style {@link #PR_PLAIN}). |
| * |
| * @param {String} s html. |
| * @return {Array} of PR_Tokens of style PR_PLAIN, and null. |
| * @private |
| */ |
| function PR_chunkify( s ) { |
| // The below pattern matches one of the following |
| // (1) /[^<]+/ : A run of characters other than '<' |
| // (2) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted |
| // (3) /</ : A '<' that does not begin a larger chunk. Treated as 1 |
| var chunkPattern = /(?:[^<]+|<\/?[a-zA-Z][^>]*>|<)/g; |
| // since the pattern has the 'g' modifier and defines no capturing groups, |
| // this will return a list of all chunks which we then classify and wrap as |
| // PR_Tokens |
| var matches = s.match(chunkPattern); |
| var chunks = []; |
| if ( matches ) { |
| var lastChunk = null; |
| for ( var i = 0, n = matches.length; i < n; ++i ) { |
| var chunkText = matches[i]; |
| var style; |
| if ( chunkText.length < 2 || chunkText.charAt(0) !== '<' ) { |
| if ( lastChunk && lastChunk.style === PR_PLAIN ) { |
| lastChunk.token += chunkText; |
| continue; |
| } |
| style = PR_PLAIN; |
| } |
| else { // a tag |
| style = null; |
| } |
| lastChunk = new PR_Token(chunkText, style); |
| chunks.push(lastChunk); |
| } |
| } |
| return chunks; |
| } |
| |
| /** walk the tokenEnds list and the chunk list in parallel to generate a list |
| * of split tokens. |
| * @private |
| */ |
| function PR_splitChunks( chunks, tokenEnds ) { |
| var tokens = []; |
| // the output |
| |
| var ci = 0; |
| // index into chunks |
| // position of beginning of amount written so far in absolute space. |
| var posAbs = 0; |
| // position of amount written so far in chunk space |
| var posChunk = 0; |
| |
| // current chunk |
| var chunk = new PR_Token('', null); |
| |
| for ( var ei = 0, ne = tokenEnds.length, lastEnd = 0; ei < ne; ++ei ) { |
| var tokenEnd = tokenEnds[ei]; |
| var end = tokenEnd.end; |
| if ( end === lastEnd ) { |
| continue; |
| } // skip empty regions |
| |
| var tokLen = end - posAbs; |
| var remainingInChunk = chunk.token.length - posChunk; |
| while ( remainingInChunk <= tokLen ) { |
| if ( remainingInChunk > 0 ) { |
| tokens.push( |
| new PR_Token(chunk.token.substring(posChunk, chunk.token.length), |
| null == chunk.style ? null : tokenEnd.style)); |
| } |
| posAbs += remainingInChunk; |
| posChunk = 0; |
| if ( ci < chunks.length ) { |
| chunk = chunks[ci++]; |
| } |
| |
| tokLen = end - posAbs; |
| remainingInChunk = chunk.token.length - posChunk; |
| } |
| |
| if ( tokLen ) { |
| tokens.push( |
| new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), |
| tokenEnd.style)); |
| posAbs += tokLen; |
| posChunk += tokLen; |
| } |
| } |
| |
| return tokens; |
| } |
| |
| /** splits markup tokens into declarations, tags, and source chunks. |
| * @private |
| */ |
| function PR_splitMarkup( chunks ) { |
| // A state machine to split out declarations, tags, etc. |
| // This state machine deals with absolute space in the text, indexed by k, |
| // and position in the current chunk, indexed by pos and tokenStart to |
| // generate a list of the ends of tokens. |
| // Absolute space is calculated by considering the chunks as appended into |
| // one big string, as they were before being split. |
| |
| // Known failure cases |
| // Server side scripting sections such as <?...?> in attributes. |
| // i.e. <span class="<? foo ?>"> |
| // Handling this would require a stack, and we don't use PHP. |
| |
| // The output: a list of pairs of PR_TokenEnd instances |
| var tokenEnds = []; |
| |
| var state = 0; |
| // FSM state variable |
| var k = 0; |
| // position in absolute space of the start of the current chunk |
| var tokenStart = -1; |
| // the start of the current token |
| |
| // Try to find a closing tag for any open <style> or <script> tags |
| // We can't do this at a later stage because then the following case |
| // would fail: |
| // <script>document.writeln('<!--');</script> |
| |
| // We use tokenChars[:tokenCharsI] to accumulate the tag name so that we |
| // can check whether to enter into a no scripting section when the tag ends. |
| var tokenChars = new Array(12); |
| var tokenCharsI = 0; |
| // if non null, the tag prefix that we need to see to break out. |
| var endScriptTag = null; |
| var decodeHelper = new PR_DecodeHelper(); |
| |
| for ( var ci = 0, nc = chunks.length; ci < nc; ++ci ) { |
| var chunk = chunks[ci]; |
| if ( PR_PLAIN != chunk.style ) { |
| k += chunk.token.length; |
| continue; |
| } |
| |
| var s = chunk.token; |
| var pos = 0; |
| // the position past the last character processed so far in s |
| |
| for ( var i = 0, n = s.length; i < n; /* i = next at bottom */ ) { |
| decodeHelper.decode(s, i); |
| var ch = decodeHelper.ch; |
| var next = decodeHelper.next; |
| |
| var tokenStyle = null; |
| switch ( state ) { |
| case 0: |
| if ( '<' == ch ) { |
| state = 1; |
| } |
| break; |
| case 1: |
| tokenCharsI = 0; |
| if ( '/' == ch ) { // only consider close tags if we're in script/style |
| state = 7; |
| } |
| else if ( null == endScriptTag ) { |
| if ( '!' == ch ) { |
| state = 2; |
| } |
| else if ( PR_isWordChar(ch) ) { |
| state = 8; |
| } |
| else if ( '?' == ch ) { |
| state = 9; |
| } |
| else if ( '%' == ch ) { |
| state = 11; |
| } |
| else if ( '<' != ch ) { |
| state = 0; |
| } |
| } |
| else if ( '<' != ch ) { |
| state = 0; |
| } |
| break; |
| case 2: |
| if ( '-' == ch ) { |
| state = 4; |
| } |
| else if ( PR_isWordChar(ch) ) { |
| state = 3; |
| } |
| else if ( '<' == ch ) { |
| state = 1; |
| } |
| else { |
| state = 0; |
| } |
| break; |
| case 3: |
| if ( '>' == ch ) { |
| state = 0; |
| tokenStyle = PR_DECLARATION; |
| } |
| break; |
| case 4: |
| if ( '-' == ch ) { |
| state = 5; |
| } |
| break; |
| case 5: |
| if ( '-' == ch ) { |
| state = 6; |
| } |
| break; |
| case 6: |
| if ( '>' == ch ) { |
| state = 0; |
| tokenStyle = PR_COMMENT; |
| } |
| else if ( '-' == ch ) { |
| state = 6; |
| } |
| else { |
| state = 4; |
| } |
| break; |
| case 7: |
| if ( PR_isWordChar(ch) ) { |
| state = 8; |
| } |
| else if ( '<' == ch ) { |
| state = 1; |
| } |
| else { |
| state = 0; |
| } |
| break; |
| case 8: |
| if ( '>' == ch ) { |
| state = 0; |
| tokenStyle = PR_TAG; |
| } |
| break; |
| case 9: |
| if ( '?' == ch ) { |
| state = 10; |
| } |
| break; |
| case 10: |
| if ( '>' == ch ) { |
| state = 0; |
| tokenStyle = PR_SOURCE; |
| } |
| else if ( '?' != ch ) { |
| state = 9; |
| } |
| break; |
| case 11: |
| if ( '%' == ch ) { |
| state = 12; |
| } |
| break; |
| case 12: |
| if ( '>' == ch ) { |
| state = 0; |
| tokenStyle = PR_SOURCE; |
| } |
| else if ( '%' != ch ) { |
| state = 11; |
| } |
| break; |
| } |
| |
| if ( tokenCharsI < tokenChars.length ) { |
| tokenChars[tokenCharsI++] = ch.toLowerCase(); |
| } |
| if ( 1 == state ) { |
| tokenStart = k + i; |
| } |
| i = next; |
| if ( tokenStyle != null ) { |
| if ( null != tokenStyle ) { |
| if ( endScriptTag ) { |
| if ( PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag) ) { |
| endScriptTag = null; |
| } |
| } |
| else { |
| if ( PR_prefixMatch(tokenChars, tokenCharsI, 'script') ) { |
| endScriptTag = '/script'; |
| } |
| else if ( PR_prefixMatch(tokenChars, tokenCharsI, 'style') ) { |
| endScriptTag = '/style'; |
| } |
| else if ( PR_prefixMatch(tokenChars, tokenCharsI, 'xmp') ) { |
| endScriptTag = '/xmp'; |
| } |
| } |
| // disallow the tag if endScriptTag is set and this was not an open |
| // tag. |
| if ( endScriptTag && tokenCharsI && '/' == tokenChars[0] ) { |
| tokenStyle = null; |
| } |
| } |
| if ( null != tokenStyle ) { |
| tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN)); |
| tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle)); |
| } |
| } |
| } |
| k += chunk.token.length; |
| } |
| tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); |
| |
| return tokenEnds; |
| } |
| |
| /** splits the given string into comment, string, and "other" tokens. |
| * @return {Array} of PR_Tokens with style in |
| * (PR_STRING, PR_COMMENT, PR_PLAIN, null) |
| * The result array may contain spurious zero length tokens. Ignore them. |
| * |
| * @private |
| */ |
| function PR_splitStringAndCommentTokens( chunks ) { |
| // a state machine to split out comments, strings, and other stuff |
| var tokenEnds = []; |
| // positions of ends of tokens in absolute space |
| var state = 0; |
| // FSM state variable |
| var delim = -1; |
| // string delimiter |
| var k = 0; |
| // absolute position of beginning of current chunk |
| |
| for ( var ci = 0, nc = chunks.length; ci < nc; ++ci ) { |
| var chunk = chunks[ci]; |
| var s = chunk.token; |
| if ( PR_PLAIN == chunk.style ) { |
| var decodeHelper = new PR_DecodeHelper(); |
| var last = -1; |
| var next; |
| for ( var i = 0, n = s.length; i < n; last = i,i = next ) { |
| decodeHelper.decode(s, i); |
| var ch = decodeHelper.ch; |
| next = decodeHelper.next; |
| if ( 0 == state ) { |
| if ( ch == '"' || ch == '\'' || ch == '`' ) { |
| tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); |
| state = 1; |
| delim = ch; |
| } |
| else if ( ch == '/' ) { |
| state = 3; |
| } |
| else if ( ch == '#' ) { |
| tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); |
| state = 4; |
| } |
| } |
| else if ( 1 == state ) { |
| if ( ch == delim ) { |
| state = 0; |
| tokenEnds.push(new PR_TokenEnd(k + next, PR_STRING)); |
| } |
| else if ( ch == '\\' ) { |
| state = 2; |
| } |
| } |
| else if ( 2 == state ) { |
| state = 1; |
| } |
| else if ( 3 == state ) { |
| if ( ch == '/' ) { |
| state = 4; |
| tokenEnds.push(new PR_TokenEnd(k + last, PR_PLAIN)); |
| } |
| else if ( ch == '*' ) { |
| state = 5; |
| tokenEnds.push(new PR_TokenEnd(k + last, PR_PLAIN)); |
| } |
| else { |
| state = 0; |
| // next loop will reenter state 0 without same value of i, so |
| // ch will be reconsidered as start of new token. |
| next = i; |
| } |
| } |
| else if ( 4 == state ) { |
| if ( ch == '\r' || ch == '\n' ) { |
| state = 0; |
| tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT)); |
| } |
| } |
| else if ( 5 == state ) { |
| if ( ch == '*' ) { |
| state = 6; |
| } |
| } |
| else if ( 6 == state ) { |
| if ( ch == '/' ) { |
| state = 0; |
| tokenEnds.push(new PR_TokenEnd(k + next, PR_COMMENT)); |
| } |
| else if ( ch != '*' ) { |
| state = 5; |
| } |
| } |
| } |
| } |
| k += s.length; |
| } |
| var endTokenType; |
| switch ( state ) { |
| case 1: case 2: |
| endTokenType = PR_STRING; |
| break; |
| case 4: case 5: case 6: |
| endTokenType = PR_COMMENT; |
| break; |
| default: |
| endTokenType = PR_PLAIN; |
| break; |
| } |
| // handle unclosed token which can legally happen for line comments (state 4) |
| tokenEnds.push(new PR_TokenEnd(k, endTokenType)); |
| // a token ends at the end |
| |
| return PR_splitChunks(chunks, tokenEnds); |
| } |
| |
| /** used by lexSource to split a non string, non comment token. |
| * @private |
| */ |
| function PR_splitNonStringNonCommentToken( s, outlist ) { |
| var pos = 0; |
| var state = 0; |
| |
| var decodeHelper = new PR_DecodeHelper(); |
| var next; |
| for ( var i = 0; i <= s.length; i = next ) { |
| if ( i == s.length ) { |
| // nstate will not be equal to state, so it will append the token |
| nstate = -2; |
| next = i + 1; |
| } |
| else { |
| decodeHelper.decode(s, i); |
| next = decodeHelper.next; |
| var ch = decodeHelper.ch; |
| |
| // the next state. |
| // if set to -1 then it will cause a reentry to state 0 without consuming |
| // another character. |
| var nstate = state; |
| |
| switch ( state ) { |
| case 0: // whitespace state |
| if ( PR_isIdentifierStart(ch) ) { |
| nstate = 1; |
| } |
| else if ( PR_isDigitChar(ch) ) { |
| nstate = 2; |
| } |
| else if ( !PR_isSpaceChar(ch) ) { |
| nstate = 3; |
| } |
| if ( nstate && pos < i ) { |
| var t = s.substring(pos, i); |
| outlist.push(new PR_Token(t, PR_PLAIN)); |
| pos = i; |
| } |
| break; |
| case 1: // identifier state |
| if ( !PR_isIdentifierPart(ch) ) { |
| nstate = -1; |
| } |
| break; |
| case 2: // number literal state |
| // handle numeric literals like |
| // 0x7f 300UL 100_000 |
| |
| // this does not treat floating point values as a single literal |
| // 0.1 and 3e-6 |
| // are each split into multiple tokens |
| if ( !(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_') ) { |
| nstate = -1; |
| } |
| break; |
| case 3: // punctuation state |
| if ( PR_isIdentifierStart(ch) || PR_isDigitChar(ch) || |
| PR_isSpaceChar(ch) ) { |
| nstate = -1; |
| } |
| break; |
| } |
| } |
| |
| if ( nstate != state ) { |
| if ( nstate < 0 ) { |
| if ( i > pos ) { |
| var t = s.substring(pos, i); |
| var wordDecodeHelper = new PR_DecodeHelper(); |
| wordDecodeHelper.decode(t, 0); |
| var ch0 = wordDecodeHelper.ch; |
| var isSingleCharacter = wordDecodeHelper.next == t.length; |
| var style; |
| if ( PR_isIdentifierStart(ch0) ) { |
| if ( PR_keywords[t] ) { |
| style = PR_KEYWORD; |
| } |
| else if ( ch0 === '@' ) { |
| style = PR_LITERAL; |
| } |
| else { |
| // Treat any word that starts with an uppercase character and |
| // contains at least one lowercase character as a type, or |
| // ends with _t. |
| // This works perfectly for Java, pretty well for C++, and |
| // passably for Python. The _t catches C structs. |
| var isType = false; |
| if ( ch0 >= 'A' && ch0 <= 'Z' ) { |
| for ( var j = wordDecodeHelper.next; |
| j < t.length; j = wordDecodeHelper.next ) { |
| wordDecodeHelper.decode(t, j); |
| var ch1 = wordDecodeHelper.ch; |
| if ( ch1 >= 'a' && ch1 <= 'z' ) { |
| isType = true; |
| break; |
| } |
| } |
| if ( !isType && !isSingleCharacter && |
| t.substring(t.length - 2) == '_t' ) { |
| isType = true; |
| } |
| } |
| style = isType ? PR_TYPE : PR_PLAIN; |
| } |
| } |
| else if ( PR_isDigitChar(ch0) ) { |
| style = PR_LITERAL; |
| } |
| else if ( !PR_isSpaceChar(ch0) ) { |
| style = PR_PUNCTUATION; |
| } |
| else { |
| style = PR_PLAIN; |
| } |
| pos = i; |
| outlist.push(new PR_Token(t, style)); |
| } |
| |
| state = 0; |
| if ( nstate == -1 ) { |
| // don't increment. This allows us to use state 0 to redispatch based |
| // on the current character. |
| next = i; |
| continue; |
| } |
| } |
| state = nstate; |
| } |
| } |
| |
| } |
| |
| /** split a group of chunks of markup. |
| * @private |
| */ |
| function PR_tokenizeMarkup( chunks ) { |
| if ( !(chunks && chunks.length) ) { |
| return chunks; |
| } |
| |
| var tokenEnds = PR_splitMarkup(chunks); |
| return PR_splitChunks(chunks, tokenEnds); |
| } |
| |
| /** split tags attributes and their values out from the tag name, and |
| * recursively lex source chunks. |
| * @private |
| */ |
| function PR_splitTagAttributes( tokens ) { |
| var tokensOut = []; |
| var state = 0; |
| var stateStyle = PR_TAG; |
| var delim = null; |
| // attribute delimiter for quoted value state. |
| var decodeHelper = new PR_DecodeHelper(); |
| for ( var ci = 0; ci < tokens.length; ++ci ) { |
| var tok = tokens[ci]; |
| if ( PR_TAG == tok.style ) { |
| var s = tok.token; |
| var start = 0; |
| for ( var i = 0; i < s.length; /* i = next at bottom */ ) { |
| decodeHelper.decode(s, i); |
| var ch = decodeHelper.ch; |
| var next = decodeHelper.next; |
| |
| var emitEnd = null; |
| // null or position of end of chunk to emit. |
| var nextStyle = null; |
| // null or next value of stateStyle |
| if ( ch == '>' ) { |
| if ( PR_TAG != stateStyle ) { |
| emitEnd = i; |
| nextStyle = PR_TAG; |
| } |
| } |
| else { |
| switch ( state ) { |
| case 0: |
| if ( '<' == ch ) { |
| state = 1; |
| } |
| break; |
| case 1: |
| if ( PR_isSpaceChar(ch) ) { |
| state = 2; |
| } |
| break; |
| case 2: |
| if ( !PR_isSpaceChar(ch) ) { |
| nextStyle = PR_ATTRIB_NAME; |
| emitEnd = i; |
| state = 3; |
| } |
| break; |
| case 3: |
| if ( '=' == ch ) { |
| emitEnd = i; |
| nextStyle = PR_TAG; |
| state = 5; |
| } |
| else if ( PR_isSpaceChar(ch) ) { |
| emitEnd = i; |
| nextStyle = PR_TAG; |
| state = 4; |
| } |
| break; |
| case 4: |
| if ( '=' == ch ) { |
| state = 5; |
| } |
| else if ( !PR_isSpaceChar(ch) ) { |
| emitEnd = i; |
| nextStyle = PR_ATTRIB_NAME; |
| state = 3; |
| } |
| break; |
| case 5: |
| if ( '"' == ch || '\'' == ch ) { |
| emitEnd = i; |
| nextStyle = PR_ATTRIB_VALUE; |
| state = 6; |
| delim = ch; |
| } |
| else if ( !PR_isSpaceChar(ch) ) { |
| emitEnd = i; |
| nextStyle = PR_ATTRIB_VALUE; |
| state = 7; |
| } |
| break; |
| case 6: |
| if ( ch == delim ) { |
| emitEnd = next; |
| nextStyle = PR_TAG; |
| state = 2; |
| } |
| break; |
| case 7: |
| if ( PR_isSpaceChar(ch) ) { |
| emitEnd = i; |
| nextStyle = PR_TAG; |
| state = 2; |
| } |
| break; |
| } |
| } |
| if ( emitEnd ) { |
| if ( emitEnd > start ) { |
| tokensOut.push( |
| new PR_Token(s.substring(start, emitEnd), stateStyle)); |
| start = emitEnd; |
| } |
| stateStyle = nextStyle; |
| } |
| i = next; |
| } |
| if ( s.length > start ) { |
| tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle)); |
| } |
| } |
| else { |
| if ( tok.style ) { |
| state = 0; |
| stateStyle = PR_TAG; |
| } |
| tokensOut.push(tok); |
| } |
| } |
| return tokensOut; |
| } |
| |
| /** identify regions of markup that are really source code, and recursivley |
| * lex them. |
| * @private |
| */ |
| function PR_splitSourceNodes( tokens ) { |
| var tokensOut = []; |
| // when we see a <script> tag, store '/' here so that we know to end the |
| // source processing |
| var endScriptTag = null; |
| var decodeHelper = new PR_DecodeHelper(); |
| |
| var sourceChunks = null; |
| |
| for ( var ci = 0, nc = tokens.length; /* break below */; ++ci ) { |
| var tok; |
| |
| if ( ci < nc ) { |
| tok = tokens[ci]; |
| if ( null == tok.style ) { |
| tokens.push(tok); |
| continue; |
| } |
| } |
| else if ( !endScriptTag ) { |
| break; |
| } |
| else { |
| // else pretend there's an end tag so we can gracefully handle |
| // unclosed source blocks |
| tok = new PR_Token('', null); |
| } |
| |
| var s = tok.token; |
| |
| if ( null == endScriptTag ) { |
| if ( PR_SOURCE == tok.style ) { |
| // split off any starting and trailing <?, <% |
| if ( '<' == decodeHelper.decode(s, 0) ) { |
| decodeHelper.decode(s, decodeHelper.next); |
| if ( '%' == decodeHelper.ch || '?' == decodeHelper.ch ) { |
| endScriptTag = decodeHelper.ch; |
| tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next), |
| PR_TAG)); |
| s = s.substring(decodeHelper.next, s.length); |
| } |
| } |
| } |
| else if ( PR_TAG == tok.style ) { |
| if ( '<' == decodeHelper.decode(s, 0) && |
| '/' != s.charAt(decodeHelper.next) ) { |
| var tagContent = s.substring(decodeHelper.next).toLowerCase(); |
| // FIXME(msamuel): this does not mirror exactly the code in |
| // in PR_splitMarkup that defers splitting tags inside script and |
| // style blocks. |
| if ( PR_startsWith(tagContent, 'script') || |
| PR_startsWith(tagContent, 'style') || |
| PR_startsWith(tagContent, 'xmp') ) { |
| endScriptTag = '/'; |
| } |
| } |
| } |
| } |
| |
| if ( null != endScriptTag ) { |
| var endTok = null; |
| if ( PR_SOURCE == tok.style ) { |
| if ( endScriptTag == '%' || endScriptTag == '?' ) { |
| var pos = s.lastIndexOf(endScriptTag); |
| if ( pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) && |
| s.length == decodeHelper.next ) { |
| endTok = new PR_Token(s.substring(pos, s.length), PR_TAG); |
| s = s.substring(0, pos); |
| } |
| } |
| if ( null == sourceChunks ) { |
| sourceChunks = []; |
| } |
| sourceChunks.push(new PR_Token(s, PR_PLAIN)); |
| } |
| else if ( PR_PLAIN == tok.style ) { |
| if ( null == sourceChunks ) { |
| sourceChunks = []; |
| } |
| sourceChunks.push(tok); |
| } |
| else if ( PR_TAG == tok.style ) { |
| // if it starts with </ then it must be the end tag. |
| if ( '<' == decodeHelper.decode(tok.token, 0) && |
| tok.token.length > decodeHelper.next && |
| '/' == decodeHelper.decode(tok.token, decodeHelper.next) ) { |
| endTok = tok; |
| } |
| else { |
| tokensOut.push(tok); |
| } |
| } |
| else if ( ci >= nc ) { |
| // force the token to close |
| endTok = tok; |
| } |
| else { |
| if ( sourceChunks ) { |
| sourceChunks.push(tok); |
| } |
| else { |
| // push remaining tag and attribute tokens from the opening tag |
| tokensOut.push(tok); |
| } |
| } |
| if ( endTok ) { |
| if ( sourceChunks ) { |
| var sourceTokens = PR_lexSource(sourceChunks); |
| tokensOut.push(new PR_Token('<span class=embsrc>', null)); |
| for ( var si = 0, ns = sourceTokens.length; si < ns; ++si ) { |
| tokensOut.push(sourceTokens[si]); |
| } |
| tokensOut.push(new PR_Token('</span>', null)); |
| sourceChunks = null; |
| } |
| if ( endTok.token ) { |
| tokensOut.push(endTok); |
| } |
| endScriptTag = null; |
| } |
| } |
| else { |
| tokensOut.push(tok); |
| } |
| } |
| return tokensOut; |
| } |
| |
| /** splits the quotes from an attribute value. |
| * ['"foo"'] -> ['"', 'foo', '"'] |
| * @private |
| */ |
| function PR_splitAttributeQuotes( tokens ) { |
| var firstPlain = null, lastPlain = null; |
| for ( var i = 0; i < tokens.length; ++i ) { |
| if ( PR_PLAIN == tokens[i].style ) { |
| firstPlain = i; |
| break; |
| } |
| } |
| for ( var i = tokens.length; --i >= 0; ) { |
| if ( PR_PLAIN == tokens[i].style ) { |
| lastPlain = i; |
| break; |
| } |
| } |
| if ( null == firstPlain ) { |
| return tokens; |
| } |
| |
| var decodeHelper = new PR_DecodeHelper(); |
| var fs = tokens[firstPlain].token; |
| var fc = decodeHelper.decode(fs, 0); |
| if ( '"' != fc && '\'' != fc ) { |
| return tokens; |
| } |
| var fpos = decodeHelper.next; |
| |
| var ls = tokens[lastPlain].token; |
| var lpos = ls.lastIndexOf('&'); |
| if ( lpos < 0 ) { |
| lpos = ls.length - 1; |
| } |
| var lc = decodeHelper.decode(ls, lpos); |
| if ( lc != fc || decodeHelper.next != ls.length ) { |
| lc = null; |
| lpos = ls.length; |
| } |
| |
| var tokensOut = []; |
| for ( var i = 0; i < firstPlain; ++i ) { |
| tokensOut.push(tokens[i]); |
| } |
| tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE)); |
| if ( lastPlain == firstPlain ) { |
| tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN)); |
| } |
| else { |
| tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN)); |
| for ( var i = firstPlain + 1; i < lastPlain; ++i ) { |
| tokensOut.push(tokens[i]); |
| } |
| if ( lc ) { |
| tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN)); |
| } |
| else { |
| tokens.push(tokens[lastPlain]); |
| } |
| } |
| if ( lc ) { |
| tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN)); |
| } |
| for ( var i = lastPlain + 1; i < tokens.length; ++i ) { |
| tokensOut.push(tokens[i]); |
| } |
| return tokensOut; |
| } |
| |
| /** identify attribute values that really contain source code and recursively |
| * lex them. |
| * @private |
| */ |
| function PR_splitSourceAttributes( tokens ) { |
| var tokensOut = []; |
| |
| var sourceChunks = null; |
| var inSource = false; |
| var name = ''; |
| |
| for ( var ci = 0, nc = tokens.length; ci < nc; ++ci ) { |
| var tok = tokens[ci]; |
| var outList = tokensOut; |
| if ( PR_TAG == tok.style ) { |
| if ( inSource ) { |
| inSource = false; |
| name = ''; |
| if ( sourceChunks ) { |
| tokensOut.push(new PR_Token('<span class=embsrc>', null)); |
| var sourceTokens = |
| PR_lexSource(PR_splitAttributeQuotes(sourceChunks)); |
| for ( var si = 0, ns = sourceTokens.length; si < ns; ++si ) { |
| tokensOut.push(sourceTokens[si]); |
| } |
| tokensOut.push(new PR_Token('</span>', null)); |
| sourceChunks = null; |
| } |
| } |
| else if ( name && tok.token.indexOf('=') >= 0 ) { |
| var nameLower = name.toLowerCase(); |
| if ( PR_startsWith(nameLower, 'on') || 'style' == nameLower ) { |
| inSource = true; |
| } |
| } |
| else { |
| name = ''; |
| } |
| } |
| else if ( PR_ATTRIB_NAME == tok.style ) { |
| name += tok.token; |
| } |
| else if ( PR_ATTRIB_VALUE == tok.style ) { |
| if ( inSource ) { |
| if ( null == sourceChunks ) { |
| sourceChunks = []; |
| } |
| outList = sourceChunks; |
| tok = new PR_Token(tok.token, PR_PLAIN); |
| } |
| } |
| else { |
| if ( sourceChunks ) { |
| outList = sourceChunks; |
| } |
| } |
| outList.push(tok); |
| } |
| return tokensOut; |
| } |
| |
| /** returns a list of PR_Token objects given chunks of source code. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string escape. |
| * It does not recognize perl's qq() style strings. It has no special handling |
| * for double delimiter escapes as in basic, or tje tripled delimiters used in |
| * python, but should work on those regardless although in those cases a single |
| * string literal may be broken up into multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param chunks PR_Tokens with style in (null, PR_PLAIN) |
| */ |
| function PR_lexSource( chunks ) { |
| // split into strings, comments, and other. |
| // We do this because strings and comments are easily recognizable and can |
| // contain stuff that looks like other tokens, so we want to mark those early |
| // so we don't recurse into them. |
| var tokens = PR_splitStringAndCommentTokens(chunks); |
| |
| // split non comment|string tokens on whitespace and word boundaries |
| var tokensOut = []; |
| for ( var i = 0; i < tokens.length; ++i ) { |
| var tok = tokens[i]; |
| if ( PR_PLAIN === tok.style ) { |
| PR_splitNonStringNonCommentToken(tok.token, tokensOut); |
| continue; |
| } |
| tokensOut.push(tok); |
| } |
| |
| return tokensOut; |
| } |
| |
| /** returns a list of PR_Token objects given a string of markup. |
| * |
| * This code assumes that < tokens are html escaped, but " are not. |
| * It will do a resonable job with <, but will not recognize an " |
| * as starting a string. |
| * |
| * This code recognizes a number of constructs. |
| * <!-- ... --> comment |
| * <!\w ... > declaration |
| * <\w ... > tag |
| * </\w ... > tag |
| * <?...?> embedded source |
| * &[#\w]...; entity |
| * |
| * It does not recognizes %foo; entities. |
| * |
| * It will recurse into any <style>, <script>, and on* attributes using |
| * PR_lexSource. |
| */ |
| function PR_lexMarkup( chunks ) { |
| // This function works as follows: |
| // 1) Start by splitting the markup into text and tag chunks |
| // Input: String s |
| // Output: List<PR_Token> where style in (PR_PLAIN, null) |
| // 2) Then split the text chunks further into comments, declarations, |
| // tags, etc. |
| // After each split, consider whether the token is the start of an |
| // embedded source section, i.e. is an open <script> tag. If it is, |
| // find the corresponding close token, and don't bother to lex in between. |
| // Input: List<String> |
| // Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null) |
| // 3) Finally go over each tag token and split out attribute names and values. |
| // Input: List<PR_Token> |
| // Output: List<PR_Token> where style in |
| // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) |
| var tokensOut = PR_tokenizeMarkup(chunks); |
| tokensOut = PR_splitTagAttributes(tokensOut); |
| tokensOut = PR_splitSourceNodes(tokensOut); |
| tokensOut = PR_splitSourceAttributes(tokensOut); |
| return tokensOut; |
| } |
| |
| /** |
| * classify the string as either source or markup and lex appropriately. |
| * @param {String} html |
| */ |
| function PR_lexOne( html ) { |
| var chunks = PR_expandTabs(PR_chunkify(html), PR_TAB_WIDTH); |
| |
| // treat it as markup if the first non whitespace character is a < and the |
| // last non-whitespace character is a > |
| var isMarkup = false; |
| for ( var i = 0; i < chunks.length; ++i ) { |
| if ( PR_PLAIN == chunks[i].style ) { |
| if ( PR_startsWith(PR_trim(chunks[i].token), '<') ) { |
| for ( var j = chunks.length; --j >= 0; ) { |
| if ( PR_PLAIN == chunks[j].style ) { |
| isMarkup = PR_endsWith(PR_trim(chunks[j].token), '>'); |
| break; |
| } |
| } |
| } |
| break; |
| } |
| } |
| |
| return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks); |
| } |
| |
| /** pretty print a chunk of code. |
| * |
| * @param s code as html |
| * @return code as html, but prettier |
| */ |
| function prettyPrintOne( s ) { |
| try { |
| var tokens = PR_lexOne(s); |
| var out = []; |
| var lastStyle = null; |
| for ( var i = 0; i < tokens.length; i++ ) { |
| var t = tokens[i]; |
| if ( t.style != lastStyle ) { |
| if ( lastStyle != null ) { |
| out.push('</span>'); |
| } |
| if ( t.style != null ) { |
| out.push('<span class=', t.style, '>'); |
| } |
| lastStyle = t.style; |
| } |
| var html = t.token; |
| if ( null != t.style ) { |
| // This interacts badly with some wikis which introduces paragraph tags |
| // into pre blocks for some strange reason. |
| // It's necessary for IE though which seems to lose the preformattedness |
| // of <pre> tags when their innerHTML is assigned. |
| // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html |
| html = html |
| .replace(/(\r\n?|\n| ) /g, '$1 ') |
| .replace(/\r\n?|\n/g, '<br>'); |
| } |
| out.push(html); |
| } |
| if ( lastStyle != null ) { |
| out.push('</span>'); |
| } |
| return out.join(''); |
| } |
| catch ( e ) { |
| if ( 'console' in window ) { |
| console.log(e); |
| console.trace(); |
| } |
| return s; |
| } |
| } |
| |
| /** find all the < pre > and < code > tags in the DOM with class=prettyprint and |
| * prettify them. |
| */ |
| function prettyPrint() { |
| // fetch a list of nodes to rewrite |
| var codeSegments = [ |
| document.getElementsByTagName('pre'), |
| document.getElementsByTagName('code'), |
| document.getElementsByTagName('xmp') ]; |
| var elements = []; |
| for ( var i = 0; i < codeSegments.length; ++i ) { |
| for ( var j = 0; j < codeSegments[i].length; ++j ) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| // the loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| |
| function doWork() { |
| var endTime = new Date().getTime() + 250; |
| for ( ; k < elements.length && new Date().getTime() < endTime; k++ ) { |
| var cs = elements[k]; |
| if ( cs.className && cs.className.indexOf('prettyprint') >= 0 ) { |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for ( var p = cs.parentNode; p != null; p = p.parentNode ) { |
| if ( (p.tagName == 'pre' || p.tagName == 'code' || |
| p.tagName == 'xmp') && |
| p.className && p.className.indexOf('prettyprint') >= 0 ) { |
| nested = true; |
| break; |
| } |
| } |
| if ( !nested ) { |
| // fetch the content as a snippet of properly escaped HTML. |
| // Firefox adds newlines at the end. |
| var content = PR_getInnerHtml(cs); |
| content = content.replace(/(?:\r\n?|\n)$/, ''); |
| |
| // do the pretty printing |
| var newContent = prettyPrintOne(content); |
| |
| // push the prettified html back into the tag. |
| if ( !PR_isRawContent(cs) ) { |
| // just replace the old html with the new |
| cs.innerHTML = newContent; |
| } |
| else { |
| // we need to change the tag to a <pre> since <xmp>s do not allow |
| // embedded tags such as the span tags used to attach styles to |
| // sections of source code. |
| var pre = document.createElement('PRE'); |
| for ( var i = 0; i < cs.attributes.length; ++i ) { |
| var a = cs.attributes[i]; |
| if ( a.specified ) { |
| pre.setAttribute(a.name, a.value); |
| } |
| } |
| pre.innerHTML = newContent; |
| // remove the old |
| cs.parentNode.replaceChild(pre, cs); |
| } |
| } |
| } |
| } |
| if ( k < elements.length ) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } |
| } |
| |
| doWork(); |
| } |