lib/dentaku/tokenizer.rb in dentaku-0.2.10 vs lib/dentaku/tokenizer.rb in dentaku-0.2.11

- old
+ new

@@ -2,75 +2,42 @@ require 'dentaku/token_matcher' require 'dentaku/token_scanner' module Dentaku class Tokenizer - SCANNERS = [ - TokenScanner.new(:whitespace, '\s+'), - TokenScanner.new(:numeric, '(\d+(\.\d+)?|\.\d+)\b', lambda { |raw| raw =~ /\./ ? raw.to_f : raw.to_i }), - TokenScanner.new(:string, '"[^"]*"', lambda { |raw| raw.gsub(/^"|"$/, '') }), - TokenScanner.new(:string, "'[^']*'", lambda { |raw| raw.gsub(/^'|'$/, '') }), - TokenScanner.new(:operator, '\^|\+|-|\*|\/', lambda do |raw| - case raw - when '^' then :pow - when '+' then :add - when '-' then :subtract - when '*' then :multiply - when '/' then :divide - end - end), - TokenScanner.new(:grouping, '\(|\)|,', lambda do |raw| - case raw - when '(' then :open - when ')' then :close - when ',' then :comma - end - end), - TokenScanner.new(:comparator, '<=|>=|!=|<>|<|>|=', lambda do |raw| - case raw - when '<=' then :le - when '>=' then :ge - when '!=' then :ne - when '<>' then :ne - when '<' then :lt - when '>' then :gt - when '=' then :eq - end - end), - TokenScanner.new(:combinator, '(and|or)\b', lambda {|raw| raw.strip.downcase.to_sym }), - TokenScanner.new(:function, '(if|round(up|down)?|not)\b', - lambda {|raw| raw.strip.downcase.to_sym }), - TokenScanner.new(:identifier, '\w+\b', lambda {|raw| raw.strip.downcase.to_sym }) - ] - LPAREN = TokenMatcher.new(:grouping, :open) RPAREN = TokenMatcher.new(:grouping, :close) def tokenize(string) - nesting = 0 - tokens = [] - input = string.dup + @nesting = 0 + @tokens = [] + input = string.dup until input.empty? - raise "parse error at: '#{ input }'" unless SCANNERS.any? do |scanner| - if token = scanner.scan(input) - raise "unexpected zero-width match (:#{ token.category }) at '#{ input }'" if token.length == 0 + raise "parse error at: '#{ input }'" unless TokenScanner.scanners.any? do |scanner| + scanned, input = scan(input, scanner) + scanned + end + end - nesting += 1 if LPAREN == token - nesting -= 1 if RPAREN == token - raise "too many closing parentheses" if nesting < 0 + raise "too many opening parentheses" if @nesting > 0 - tokens << token unless token.is?(:whitespace) - input.slice!(0, token.length) + @tokens + end - true - else - false - end - end - end + def scan(string, scanner) + if token = scanner.scan(string) + raise "unexpected zero-width match (:#{ token.category }) at '#{ string }'" if token.length == 0 - raise "too many opening parentheses" if nesting > 0 - tokens + @nesting += 1 if LPAREN == token + @nesting -= 1 if RPAREN == token + raise "too many closing parentheses" if @nesting < 0 + + @tokens << token unless token.is?(:whitespace) + + [true, string[token.length..-1]] + else + [false, string] + end end end end