/* ***** BEGIN LICENSE BLOCK ***** * Distributed under the BSD license: * * Copyright (c) 2010, Ajax.org B.V. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Ajax.org B.V. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ***** END LICENSE BLOCK ***** */ define(function(require, exports, module) { /* * version of Tokenizer with additional logging * and infinite loop checks * can be used for developing/testing new modes **/ var Tokenizer = function(rules, flag) { flag = flag ? "g" + flag : "g"; this.rules = rules; this.regExps = {}; this.matchMappings = {}; for ( var key in this.rules) { var rule = this.rules[key]; var state = rule; var ruleRegExps = []; var matchTotal = 0; var mapping = this.matchMappings[key] = {}; for ( var i = 0; i < state.length; i++) { if (state[i].regex instanceof RegExp) state[i].regex = state[i].regex.toString().slice(1, -1); // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; // Replace any backreferences and offset appropriately. var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) { return "\\" + (parseInt(digit, 10) + matchTotal + 1); }); if (matchcount > 1 && state[i].token.length !== matchcount-1) throw new Error("For " + state[i].regex + " the matching groups and length of the token array don't match (rule #" + i + " of state " + key + ")"); mapping[matchTotal] = { rule: i, len: matchcount }; matchTotal += matchcount; ruleRegExps.push(adjustedregex); } this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", flag); } }; (function() { this.getLineTokens = function(line, startState) { var currentState = startState || "start"; var state = this.rules[currentState]; var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; var match, tokens = []; var lastIndex = 0; var stateTransitions = []; function onStateChange() { stateTransitions.push(startState+"@"+lastIndex); } function initState() { onStateChange(); stateTransitions = []; onStateChange(); } var token = { type: null, value: "", state: currentState }; initState(); var maxRecur = 10000; while (match = re.exec(line)) { var type = "default.text"; var rule = null; var value = [match[0]]; for (var i = 0; i < match.length-2; i++) { if (match[i + 1] === undefined) continue; if (!maxRecur--) { throw "infinite" + mapping[i].rule + currentState } rule = state[mapping[i].rule]; if (mapping[i].len > 1) value = match.slice(i+2, i+1+mapping[i].len); // compute token type if (typeof rule.token == "function") type = rule.token.apply(this, value); else type = rule.token; if (rule.next) { currentState = rule.next; state = this.rules[currentState]; mapping = this.matchMappings[currentState]; lastIndex = re.lastIndex; re = this.regExps[currentState]; if (re === undefined) { throw new Error("You indicated a state of " + rule.next + " to go to, but it doesn't exist!"); } re.lastIndex = lastIndex; onStateChange(); } break; } if (value[0]) { if (typeof type == "string") { value = [value.join("")]; type = [type]; } for (var i = 0; i < value.length; i++) { if (!value[i]) continue; var mergeable = (!rule || rule.merge || type[i] === "text") && token.type === type[i]; if (false && mergeable) { token.value += value[i]; } else { if (token.type) { token.stateTransitions = stateTransitions; tokens.push(token); initState() } token = { type: type[i], value: value[i], state: currentState, mergeable: mergeable }; } } } if (lastIndex == line.length) break; lastIndex = re.lastIndex; } if (token.type) { token.stateTransitions = stateTransitions; tokens.push(token); } return { tokens : tokens, state : currentState }; }; }).call(Tokenizer.prototype); exports.Tokenizer = Tokenizer; });