tokenizer.rb in dentaku-0.2.11

- old
+ new
@@ -2,75 +2,42 @@
 require 'dentaku/token_matcher'
 require 'dentaku/token_scanner'
 
 module Dentaku
   class Tokenizer
-    SCANNERS = [
-      TokenScanner.new(:whitespace, '\s+'),
-      TokenScanner.new(:numeric,    '(\d+(\.\d+)?|\.\d+)\b', lambda { |raw| raw =~ /\./ ? raw.to_f : raw.to_i }),
-      TokenScanner.new(:string,     '"[^"]*"',               lambda { |raw| raw.gsub(/^"|"$/, '') }),
-      TokenScanner.new(:string,     "'[^']*'",               lambda { |raw| raw.gsub(/^'|'$/, '') }),
-      TokenScanner.new(:operator,   '\^|\+|-|\*|\/', lambda do |raw|
-        case raw
-        when '^' then :pow
-        when '+' then :add
-        when '-' then :subtract
-        when '*' then :multiply
-        when '/' then :divide
-        end
-      end),
-      TokenScanner.new(:grouping,   '\(|\)|,', lambda do |raw|
-        case raw
-        when '(' then :open
-        when ')' then :close
-        when ',' then :comma
-        end
-      end),
-      TokenScanner.new(:comparator, '<=|>=|!=|<>|<|>|=', lambda do |raw|
-        case raw
-        when '<=' then :le
-        when '>=' then :ge
-        when '!=' then :ne
-        when '<>' then :ne
-        when '<'  then :lt
-        when '>'  then :gt
-        when '='  then :eq
-        end
-      end),
-      TokenScanner.new(:combinator, '(and|or)\b', lambda {|raw| raw.strip.downcase.to_sym }),
-      TokenScanner.new(:function,   '(if|round(up|down)?|not)\b',
-                                                  lambda {|raw| raw.strip.downcase.to_sym }),
-      TokenScanner.new(:identifier, '\w+\b',      lambda {|raw| raw.strip.downcase.to_sym })
-    ]
-
     LPAREN = TokenMatcher.new(:grouping, :open)
     RPAREN = TokenMatcher.new(:grouping, :close)
 
     def tokenize(string)
-      nesting = 0
-      tokens  = []
-      input   = string.dup
+      @nesting = 0
+      @tokens  = []
+      input    = string.dup
 
       until input.empty?
-        raise "parse error at: '#{ input }'" unless SCANNERS.any? do |scanner|
-          if token = scanner.scan(input)
-            raise "unexpected zero-width match (:#{ token.category }) at '#{ input }'" if token.length == 0
+        raise "parse error at: '#{ input }'" unless TokenScanner.scanners.any? do |scanner|
+          scanned, input = scan(input, scanner)
+          scanned
+        end
+      end
 
-            nesting += 1 if LPAREN == token
-            nesting -= 1 if RPAREN == token
-            raise "too many closing parentheses" if nesting < 0
+      raise "too many opening parentheses" if @nesting > 0
 
-            tokens << token unless token.is?(:whitespace)
-            input.slice!(0, token.length)
+      @tokens
+    end
 
-            true
-          else
-            false
-          end
-        end
-      end
+    def scan(string, scanner)
+      if token = scanner.scan(string)
+        raise "unexpected zero-width match (:#{ token.category }) at '#{ string }'" if token.length == 0
 
-      raise "too many opening parentheses" if nesting > 0
-      tokens
+        @nesting += 1 if LPAREN == token
+        @nesting -= 1 if RPAREN == token
+        raise "too many closing parentheses" if @nesting < 0
+
+        @tokens << token unless token.is?(:whitespace)
+
+        [true, string[token.length..-1]]
+      else
+        [false, string]
+      end
     end
   end
 end