Sha256: 607c8a0756133d395cd3d01fe932db938a316a26d22968f057f761f66343a7ba

Contents?: true

Size: 878 Bytes

Versions: 1

Compression:

Stored size: 878 Bytes

Contents

require 'forwardable'
require 'strscan'
require 'search_lingo/token'

module SearchLingo
  class Tokenizer
    include Enumerable
    extend Forwardable

    SIMPLE    = %r{"[^"]*"|[[:graph:]]+}
    COMPOUND  = %r{(?:[[:graph:]]+:[[:space:]]*)?#{SIMPLE}}
    DELIMITER = %r{[[:space:]]*}

    def initialize(query)
      @scanner = StringScanner.new query.strip
    end

    def enum
      Enumerator.new do |yielder|
        until scanner.eos?
          token = scanner.scan COMPOUND
          if token
            yielder << Token.new(token)
          end
          scanner.skip DELIMITER
        end
      end
    end

    def_delegator :scanner, :reset
    def_delegators :enum, :each, :next

    def simplify
      scanner.unscan
      Token.new(scanner.scan(SIMPLE)).tap do
        scanner.skip DELIMITER
      end
    end

    private

    attr_reader :scanner
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
search_lingo-1.0.0.beta2 lib/search_lingo/tokenizer.rb