/* ***** BEGIN LICENSE BLOCK ***** * Distributed under the BSD license: * * Copyright (c) 2010, Ajax.org B.V. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Ajax.org B.V. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ***** END LICENSE BLOCK ***** */ define(function(require, exports, module) { "use strict"; /** * * * This class takes a set of highlighting rules, and creates a tokenizer out of them. For more information, see [the wiki on extending highlighters](https://github.com/ajaxorg/ace/wiki/Creating-or-Extending-an-Edit-Mode#wiki-extendingTheHighlighter). * @class Tokenizer **/ /** * Constructs a new tokenizer based on the given rules and flags. * @param {Object} rules The highlighting rules * @param {String} flag Any additional regular expression flags to pass (like "i" for case insensitive) * * * * * @constructor **/ var Tokenizer = function(rules, flag) { flag = flag ? "g" + flag : "g"; this.rules = rules; this.regExps = {}; this.matchMappings = {}; for ( var key in this.rules) { var rule = this.rules[key]; var state = rule; var ruleRegExps = []; var matchTotal = 0; var mapping = this.matchMappings[key] = {}; for ( var i = 0; i < state.length; i++) { if (state[i].regex instanceof RegExp) state[i].regex = state[i].regex.toString().slice(1, -1); // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; // Replace any backreferences and offset appropriately. var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) { return "\\" + (parseInt(digit, 10) + matchTotal + 1); }); if (matchcount > 1 && state[i].token.length !== matchcount-1) throw new Error("For " + state[i].regex + " the matching groups (" +(matchcount-1) + ") and length of the token array (" + state[i].token.length + ") don't match (rule #" + i + " of state " + key + ")"); mapping[matchTotal] = { rule: i, len: matchcount }; matchTotal += matchcount; ruleRegExps.push(adjustedregex); } this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", flag); } }; (function() { /** * Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state. * @returns {Object} **/ this.getLineTokens = function(line, startState) { var currentState = startState || "start"; var state = this.rules[currentState]; var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; var match, tokens = []; var lastIndex = 0; var token = { type: null, value: "" }; while (match = re.exec(line)) { var type = "text"; var rule = null; var value = [match[0]]; for (var i = 0; i < match.length-2; i++) { if (match[i + 1] === undefined) continue; rule = state[mapping[i].rule]; if (mapping[i].len > 1) value = match.slice(i+2, i+1+mapping[i].len); // compute token type if (typeof rule.token == "function") type = rule.token.apply(this, value); else type = rule.token; if (rule.next) { currentState = rule.next; state = this.rules[currentState]; mapping = this.matchMappings[currentState]; lastIndex = re.lastIndex; re = this.regExps[currentState]; if (re === undefined) { throw new Error("You indicated a state of " + rule.next + " to go to, but it doesn't exist!"); } re.lastIndex = lastIndex; } break; } if (value[0]) { if (typeof type == "string") { value = [value.join("")]; type = [type]; } for (var i = 0; i < value.length; i++) { if (!value[i]) continue; if ((!rule || rule.merge || type[i] === "text") && token.type === type[i]) { token.value += value[i]; } else { if (token.type) tokens.push(token); token = { type: type[i], value: value[i] }; } } } if (lastIndex == line.length) break; lastIndex = re.lastIndex; } if (token.type) tokens.push(token); return { tokens : tokens, state : currentState }; }; }).call(Tokenizer.prototype); exports.Tokenizer = Tokenizer; });