Sha256: a60fad3371329e9a69c8dafee8b9532de3f9dc5590d41b53d2d5e5deb23bfbb2

Contents?: true

Size: 1.39 KB

Versions: 3

Compression:

Stored size: 1.39 KB

Contents

require 'dentaku/token'
require 'dentaku/token_matcher'
require 'dentaku/token_scanner'

module Dentaku
  class Tokenizer
    LPAREN = TokenMatcher.new(:grouping, :open)
    RPAREN = TokenMatcher.new(:grouping, :close)

    def tokenize(string)
      @nesting = 0
      @tokens  = []
      input    = strip_comments(string.to_s.dup)

      until input.empty?
        fail TokenizerError, "parse error at: '#{ input }'" unless TokenScanner.scanners.any? do |scanner|
          scanned, input = scan(input, scanner)
          scanned
        end
      end

      fail TokenizerError, "too many opening parentheses" if @nesting > 0

      @tokens
    end

    def last_token
      @tokens.last
    end

    def scan(string, scanner)
      if tokens = scanner.scan(string, last_token)
        tokens.each do |token|
          fail TokenizerError, "unexpected zero-width match (:#{ token.category }) at '#{ string }'" if token.length == 0

          @nesting += 1 if LPAREN == token
          @nesting -= 1 if RPAREN == token
          fail TokenizerError, "too many closing parentheses" if @nesting < 0

          @tokens << token unless token.is?(:whitespace)
        end

        match_length = tokens.map(&:length).reduce(:+)
        [true, string[match_length..-1]]
      else
        [false, string]
      end
    end

    def strip_comments(input)
      input.gsub(/\/\*[^*]*\*+(?:[^*\/][^*]*\*+)*\//, '')
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
dentaku-2.0.11 lib/dentaku/tokenizer.rb
dentaku-2.0.10 lib/dentaku/tokenizer.rb
dentaku-2.0.9 lib/dentaku/tokenizer.rb