lib/src_lexer.rb in src_lexer-1.0.2 vs lib/src_lexer.rb in src_lexer-1.0.3
- old
+ new
@@ -1,227 +1,227 @@
-# -*- encoding: utf-8 -*-
-require "src_lexer/version"
-
-module SrcLexer
- class Token
- attr_reader :str, :line_no, :char_no
-
- def initialize(str, line_no, char_no)
- @str = str
- @line_no = line_no
- @char_no = char_no
- end
-
- def ==(other_object)
- @str == other_object.str && @line_no == other_object.line_no && @char_no == other_object.char_no
- end
- end
-
- class Lexer
- END_TOKEN = [false, nil]
- NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
- STRING_REGEX = /^\"(.*)\"\z/m
- attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
-
- def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
- @keywords = (keywords ? keywords.uniq.compact : [])
- @symbols = (symbols ? symbols.uniq.compact : [])
- @string_literal_marker = string_literal_marker
- @line_comment_marker = line_comment_marker
- @comment_markers = comment_markers
- end
-
- def analyze(str)
- @str = str
- tokenize
- end
-
- def pop_token
- token = @tokens.shift
- return END_TOKEN if token.nil?
- case token[0]
- when NUMBER_REGEX
- [:NUMBER, Token.new(token[0], token[1], token[2])]
- when STRING_REGEX
- [:STRING, Token.new(token[0], token[1], token[2])]
- else
- [is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
- end
- end
-
- private
-
- class PosInfo
- attr_accessor :index, :line_no, :char_no
-
- def initialize
- @index = 0
- @line_no = 1
- @char_no = 1
- end
- end
-
- class StringIterator
- def initialize(str)
- @str = str
- @current_pos = PosInfo.new
- @marked_pos = PosInfo.new
- mark_clear()
- end
-
- def mark_clear
- @marked_pos.index = -1
- @marked_pos.line_no = 0
- @marked_pos.char_no = 0
- end
-
- def mark_set
- @marked_pos = @current_pos.clone
- end
-
- def is(target_string)
- return false if target_string.length.zero?
- end_pos = (@current_pos.index + target_string.length - 1)
- @str[@current_pos.index..end_pos] == target_string
- end
-
- def is_in(target_list)
- target_list.find { |target| is(target) } != nil
- end
-
- def move_next
- if /\n/.match @str[@current_pos.index]
- @current_pos.line_no += 1
- @current_pos.char_no = 1
- else
- @current_pos.char_no += 1
- end
- @current_pos.index += 1
- end
-
- def move_to_the_end_of_the_line
- char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
- @current_pos.index += char_count_to_the_end_of_the_line
- @current_pos.char_no += char_count_to_the_end_of_the_line
- end
-
- def move_to(target)
- char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
- chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
- @current_pos.index += char_count_to_target
- match = /.*\n(.*)$/m.match(chopped_string)
- p match[1].length if match
- if match
- @current_pos.char_no = match[1].length
- else
- @current_pos.char_no += char_count_to_target
- end
- @current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
- end
-
- def <(index)
- @current_pos.index < index
- end
-
- def is_white_space
- /\s/.match(@str[@current_pos.index])
- end
-
- def marked?
- @marked_pos.index != -1
- end
-
- def shift
- result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
- mark_clear()
- return result
- end
- end
-
- def tokenize()
- @tokens = []
- iterator = StringIterator.new(@str)
-
- while iterator < @str.length do
- if iterator.is_white_space then
- @tokens.push iterator.shift if iterator.marked?
- iterator.move_next
- elsif @line_comment_marker && iterator.is(@line_comment_marker) then
- @tokens.push iterator.shift if iterator.marked?
- iterator.move_to_the_end_of_the_line
- iterator.move_next
- elsif @comment_markers && iterator.is(@comment_markers[0]) then
- @tokens.push iterator.shift if iterator.marked?
- iterator.move_to(@comment_markers[1])
- iterator.move_next
- elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
- @tokens.push iterator.shift if iterator.marked?
- iterator.mark_set
- iterator.move_next
- iterator.move_to(@string_literal_marker[1])
- iterator.move_next
- @tokens.push iterator.shift
- elsif iterator.is_in(@symbols) then
- @tokens.push iterator.shift if iterator.marked?
- iterator.mark_set
- @symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
- @tokens.push iterator.shift
- elsif !iterator.marked? then
- iterator.mark_set
- else
- iterator.move_next
- end
- end
- @tokens.push iterator.shift if iterator.marked?
-
- return self
- end
-
- def is_reserved?(token)
- @keywords.include?(token) || @symbols.include?(token)
- end
- end
-
- class CSharpLexer < Lexer
- def initialize
- super(
- [ # C# keywords
- 'abstract', 'as', 'base', 'bool', 'break',
- 'byte', 'case', 'catch', 'char', 'checked',
- 'class', 'const', 'continue', 'decimal', 'default',
- 'delegate', 'do', 'double', 'else', 'enum',
- 'event', 'explicit', 'extern', 'false', 'finally',
- 'fixed', 'float', 'for', 'foreach', 'goto',
- 'if', 'implicit', 'in', 'int', 'interface',
- 'internal', 'is', 'lock', 'long', 'namespace',
- 'new', 'null', 'object', 'operator', 'out',
- 'override', 'params', 'private', 'protected', 'public',
- 'readonly', 'ref', 'return', 'sbyte', 'sealed',
- 'short', 'sizeof', 'stackalloc', 'static', 'string',
- 'struct', 'switch', 'this', 'throw', 'true',
- 'try', 'typeof', 'uint', 'ulong', 'unchecked',
- 'unsafe', 'ushort', 'using', 'virtual', 'void',
- 'volatile', 'while',
- # C# context keywords
- 'add', 'alias', 'ascending', 'async', 'await',
- 'descending', 'dynamic', 'from', 'get', 'global',
- 'group', 'into', 'join', 'let', 'orderby',
- 'partial', 'remove', 'select', 'set', 'value',
- 'var', 'where', 'yield'
- ],
- [
- '<<=', '>>=', '<<', '>>', '<=',
- '>=', '==', '!=', '&&', '||',
- '??', '+=', '-=', '*=', '/=',
- '%=', '&=', '|=', '^=', '=>',
- '*', '/', '%', '+', '-',
- '<', '>', '&', '^', '|',
- '?', ':', '=', '{', '}',
- '(', ')', '[', ']', ';',
- ','
- ],
- ['"', '"'], # comment markers
- '//', # line comment marker
- ['/*', '*/']) # multi line comment markers
- end
- end
-end
+# -*- encoding: utf-8 -*-
+require "src_lexer/version"
+
+module SrcLexer
+ class Token
+ attr_reader :str, :line_no, :char_no
+
+ def initialize(str, line_no, char_no)
+ @str = str
+ @line_no = line_no
+ @char_no = char_no
+ end
+
+ def ==(other_object)
+ @str == other_object.str && @line_no == other_object.line_no && @char_no == other_object.char_no
+ end
+ end
+
+ class Lexer
+ END_TOKEN = [false, nil]
+ NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
+ STRING_REGEX = /^\"(.*)\"\z/m
+ attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
+
+ def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
+ @keywords = (keywords ? keywords.uniq.compact : [])
+ @symbols = (symbols ? symbols.uniq.compact : [])
+ @string_literal_marker = string_literal_marker
+ @line_comment_marker = line_comment_marker
+ @comment_markers = comment_markers
+ end
+
+ def analyze(str)
+ @str = str
+ tokenize
+ end
+
+ def pop_token
+ token = @tokens.shift
+ return END_TOKEN if token.nil?
+ case token[0]
+ when NUMBER_REGEX
+ [:NUMBER, Token.new(token[0], token[1], token[2])]
+ when STRING_REGEX
+ [:STRING, Token.new(token[0], token[1], token[2])]
+ else
+ [is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
+ end
+ end
+
+ private
+
+ class PosInfo
+ attr_accessor :index, :line_no, :char_no
+
+ def initialize
+ @index = 0
+ @line_no = 1
+ @char_no = 1
+ end
+ end
+
+ class StringIterator
+ def initialize(str)
+ @str = str
+ @current_pos = PosInfo.new
+ @marked_pos = PosInfo.new
+ mark_clear()
+ end
+
+ def mark_clear
+ @marked_pos.index = -1
+ @marked_pos.line_no = 0
+ @marked_pos.char_no = 0
+ end
+
+ def mark_set
+ @marked_pos = @current_pos.clone
+ end
+
+ def is(target_string)
+ return false if target_string.length.zero?
+ end_pos = (@current_pos.index + target_string.length - 1)
+ @str[@current_pos.index..end_pos] == target_string
+ end
+
+ def is_in(target_list)
+ target_list.find { |target| is(target) } != nil
+ end
+
+ def move_next
+ if /\n/.match @str[@current_pos.index]
+ @current_pos.line_no += 1
+ @current_pos.char_no = 1
+ else
+ @current_pos.char_no += 1
+ end
+ @current_pos.index += 1
+ end
+
+ def move_to_the_end_of_the_line
+ char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
+ @current_pos.index += char_count_to_the_end_of_the_line
+ @current_pos.char_no += char_count_to_the_end_of_the_line
+ end
+
+ def move_to(target)
+ char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
+ chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
+ @current_pos.index += char_count_to_target
+ match = /.*\n(.*)$/m.match(chopped_string)
+ p match[1].length if match
+ if match
+ @current_pos.char_no = match[1].length
+ else
+ @current_pos.char_no += char_count_to_target
+ end
+ @current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
+ end
+
+ def <(index)
+ @current_pos.index < index
+ end
+
+ def is_white_space
+ /\s/.match(@str[@current_pos.index])
+ end
+
+ def marked?
+ @marked_pos.index != -1
+ end
+
+ def shift
+ result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
+ mark_clear()
+ return result
+ end
+ end
+
+ def tokenize()
+ @tokens = []
+ iterator = StringIterator.new(@str)
+
+ while iterator < @str.length do
+ if iterator.is_white_space then
+ @tokens.push iterator.shift if iterator.marked?
+ iterator.move_next
+ elsif @line_comment_marker && iterator.is(@line_comment_marker) then
+ @tokens.push iterator.shift if iterator.marked?
+ iterator.move_to_the_end_of_the_line
+ iterator.move_next
+ elsif @comment_markers && iterator.is(@comment_markers[0]) then
+ @tokens.push iterator.shift if iterator.marked?
+ iterator.move_to(@comment_markers[1])
+ iterator.move_next
+ elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
+ @tokens.push iterator.shift if iterator.marked?
+ iterator.mark_set
+ iterator.move_next
+ iterator.move_to(@string_literal_marker[1])
+ iterator.move_next
+ @tokens.push iterator.shift
+ elsif iterator.is_in(@symbols) then
+ @tokens.push iterator.shift if iterator.marked?
+ iterator.mark_set
+ @symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
+ @tokens.push iterator.shift
+ elsif !iterator.marked? then
+ iterator.mark_set
+ else
+ iterator.move_next
+ end
+ end
+ @tokens.push iterator.shift if iterator.marked?
+
+ return self
+ end
+
+ def is_reserved?(token)
+ @keywords.include?(token) || @symbols.include?(token)
+ end
+ end
+
+ class CSharpLexer < Lexer
+ def initialize
+ super(
+ [ # C# keywords
+ 'abstract', 'as', 'base', 'bool', 'break',
+ 'byte', 'case', 'catch', 'char', 'checked',
+ 'class', 'const', 'continue', 'decimal', 'default',
+ 'delegate', 'do', 'double', 'else', 'enum',
+ 'event', 'explicit', 'extern', 'false', 'finally',
+ 'fixed', 'float', 'for', 'foreach', 'goto',
+ 'if', 'implicit', 'in', 'int', 'interface',
+ 'internal', 'is', 'lock', 'long', 'namespace',
+ 'new', 'null', 'object', 'operator', 'out',
+ 'override', 'params', 'private', 'protected', 'public',
+ 'readonly', 'ref', 'return', 'sbyte', 'sealed',
+ 'short', 'sizeof', 'stackalloc', 'static', 'string',
+ 'struct', 'switch', 'this', 'throw', 'true',
+ 'try', 'typeof', 'uint', 'ulong', 'unchecked',
+ 'unsafe', 'ushort', 'using', 'virtual', 'void',
+ 'volatile', 'while',
+ # C# context keywords
+ 'add', 'alias', 'ascending', 'async', 'await',
+ 'descending', 'dynamic', 'from', 'get', 'global',
+ 'group', 'into', 'join', 'let', 'orderby',
+ 'partial', 'remove', 'select', 'set', 'value',
+ 'var', 'where', 'yield'
+ ],
+ [
+ '<<=', '>>=', '<<', '>>', '<=',
+ '>=', '==', '!=', '&&', '||',
+ '??', '+=', '-=', '*=', '/=',
+ '%=', '&=', '|=', '^=', '=>',
+ '*', '/', '%', '+', '-',
+ '<', '>', '&', '^', '|',
+ '?', ':', '=', '{', '}',
+ '(', ')', '[', ']', ';',
+ ','
+ ],
+ ['"', '"'], # comment markers
+ '//', # line comment marker
+ ['/*', '*/']) # multi line comment markers
+ end
+ end
+end