;(function() { %%{ machine lexer; action begin_content { this.content_start = p; this.current_line = this.line_number; this.start_col = p - this.last_newline - (this.keyword+':').length; } action start_docstring { this.current_line = this.line_number; this.start_col = p - this.last_newline; } action begin_docstring_content { this.content_start = p; } action start_docstring_content_type { this.docstring_content_type_start = p; } action end_docstring_content_type { this.docstring_content_type_end = p; } action store_docstring_content { var con = this.unindent( this.start_col, this.bytesToString(data.slice(this.content_start, this.next_keyword_start-1)).replace(/(\r?\n)?([\t ])*$/, '').replace(/ESCAPED_TRIPLE_QUOTE/mg, '"""') ); var con_type = this.bytesToString(data.slice(this.docstring_content_type_start, this.docstring_content_type_end)).trim(); this.listener.doc_string(con_type, con, this.current_line); } action store_feature_content { p = this.store_keyword_content('feature', data, p, eof); } action store_background_content { p = this.store_keyword_content('background', data, p, eof); } action store_scenario_content { p = this.store_keyword_content('scenario', data, p, eof); } action store_scenario_outline_content { p = this.store_keyword_content('scenario_outline', data, p, eof); } action store_examples_content { p = this.store_keyword_content('examples', data, p, eof); } action store_step_content { var con = this.bytesToString(data.slice(this.content_start, p)).trim(); this.listener.step(this.keyword, con, this.current_line); } action store_comment_content { var con = this.bytesToString(data.slice(this.content_start, p)).trim(); this.listener.comment(con, this.line_number); this.keyword_start = null; } action store_tag_content { var con = this.bytesToString(data.slice(this.content_start, p)).trim(); this.listener.tag(con, this.line_number); this.keyword_start = null; } action inc_line_number { this.line_number++; } action last_newline { this.last_newline = p + 1; } action start_keyword { this.keyword_start = this.keyword_start || p; } action end_keyword { this.keyword = this.bytesToString(data.slice(this.keyword_start, p)).replace(/:$/, ''); this.keyword_start = null; } action next_keyword_start { this.next_keyword_start = p; } action start_row { p = p - 1; current_row = []; this.current_line = this.line_number; } action begin_cell_content { this.content_start = p; } action store_cell_content { var con = this.bytesToString(data.slice(this.content_start, p)).trim(); current_row.push(con.replace(/\\\|/, "|").replace(/\\n/, "\n").replace(/\\\\/, "\\")); } action store_row { this.listener.row(current_row, this.current_line); } action end_feature { if(this.cs < lexer_first_final) { var content = this.current_line_content(data, p); throw new Error("Lexing error on line " + this.line_number + ": '" + content + "'. See http://wiki.github.com/cucumber/gherkin/lexingerror for more information."); } else { this.listener.eof(); } } include lexer_common "lexer_common.<%= @i18n.underscored_iso_code %>.rl"; }%% %% write data; %% access this.; %% variable data data; %% getkey signedCharValue(data[p]); var Lexer = function(listener) { // Check that listener has the required functions var events = ['comment', 'tag', 'feature', 'background', 'scenario', 'scenario_outline', 'examples', 'step', 'doc_string', 'row', 'eof']; for(var i=0, len=events.length; i 127 ? v-256 : v; }; %% write init; %% write exec; }; /* * Decode utf-8 byte sequence to string. */ var decodeUtf8 = function(bytes) { var result = ""; var i = 0; var wc; var c; while (i < bytes.length) { /* parse as UTF-8 lead byte */ wc = bytes[i++]; if (wc < 0x80) { count = 0; } else if (wc < 0xC2 || wc >= 0xF8) { throw new Error("input is not a valid UTF-8 lead octet"); } else if (wc < 0xE0) { count = 1; wc = (wc & 0x1F) << 6; } else if (wc < 0xF0) { count = 2; wc = (wc & 0x0F) << 12; } else /* wc < 0xF8 */ { count = 3; wc = (wc & 0x07) << 18; } /* parse trail bytes, if any */ while (count) { if (!(i < bytes.length)) { throw new Error("short read"); } if ((c = bytes[i++] ^ 0x80) > 0x3F) { throw new Error("input is not a valid UTF-8 trail octet"); } wc |= c << (6 * --count); if (wc < (1 << (5 * count + 6))) { throw new Error("invalid non-minimal encoded input"); } } /* handle conversion to UTF-16 if needed */ if (wc > 0xFFFF) { wc -= 0x10000; result += String.fromCharCode(0xD800 + (wc >> 10)); wc = 0xDC00 + (wc & 0x3FF); } result += String.fromCharCode(wc); } return result; }; /* * Encode string to an array of bytes using utf8 encoding. * * Javascript internally stores character data as utf16 (like java). * String.charCodeAt() does *not* produce unicode points, but simply * reflects this internal representation. Thus, it is necessary * to first decode the utf-16 representation before encoding to * utf-8. */ var encodeUtf8 = function(string) { var bytes = []; var i = 0; var j = 0; var wc; while (i < string.length) { wc = string.charCodeAt(i++); if (wc >= 0xD800 && wc <= 0xDBFF && i < string.length && string.charCodeAt(i) >= 0xDC00 && string.charCodeAt(i) <= 0xDFFF) { /* decode UTF-16 */ wc = 0x10000 + ((wc & 0x3FF) << 10) + (string.charCodeAt(i++) & 0x3FF); } /* emit lead byte */ if (wc < 0x80) { bytes[j++] = wc; count = 0; } else if (wc < 0x800) { bytes[j++] = 0xC0 | (wc >> 6); count = 1; } else if (wc < 0x10000) { bytes[j++] = 0xE0 | (wc >> 12); count = 2; } else { /* SMP: 21-bit Unicode */ bytes[j++] = 0xF0 | (wc >> 18); count = 3; } /* emit trail bytes, if any */ while (count) { bytes[j++] = 0x80 | ((wc >> (6 * --count)) & 0x3F); } } return bytes; }; Lexer.prototype.bytesToString = function(bytes) { if(typeof bytes.write == 'function') { // Node.js return bytes.toString('utf-8'); } return decodeUtf8(bytes); }; Lexer.prototype.stringToBytes = function(string) { return encodeUtf8(string); }; Lexer.prototype.unindent = function(startcol, text) { startcol = startcol || 0; return text.replace(new RegExp('^[\t ]{0,' + startcol + '}', 'gm'), ''); }; Lexer.prototype.store_keyword_content = function(event, data, p, eof) { var end_point = (!this.next_keyword_start || (p == eof)) ? p : this.next_keyword_start; var content = this.unindent(this.start_col + 2, this.bytesToString(data.slice(this.content_start, end_point))).replace(/\s+$/,""); var content_lines = content.split("\n") var name = content_lines.shift() || ""; name = name.trim(); var description = content_lines.join("\n"); this.listener[event](this.keyword, name, description, this.current_line); var nks = this.next_keyword_start; this.next_keyword_start = null; return nks ? nks - 1 : p; }; Lexer.prototype.current_line_content = function(data, p) { var rest = Array.prototype.slice.call(data,this.last_newline, -1); var end = rest.indexOf(10) || -1; return this.bytesToString(rest.slice(0, end)).trim(); }; // Node.js export if(typeof module !== 'undefined') { module.exports = Lexer; } // Require.js export if (typeof define !== 'undefined') { if(define.amd) { define('gherkin/lexer/<%= @i18n.underscored_iso_code %>', [], function() { return Lexer; }); } else { define('gherkin/lexer/<%= @i18n.underscored_iso_code %>', function(require, exports, module) { exports.Lexer = Lexer; }); } } })();