Sha256: 0600086bc6157258c784778b67e76129daf6c4380c4fbb2b33fd3647d0c36f07
Contents?: true
Size: 1.66 KB
Versions: 1
Compression:
Stored size: 1.66 KB
Contents
require 'strscan' module EyeOfNewt class Tokenizer NO_MATCH = /a^/ # should never match anything WHITESPACE = /\s+/ ANYTHING = /[^()]+/ WORD = /[\w-]+/ NUMBER = /\d+/ OF = /of/ A = /an?/ TO_TASTE = /to taste/ COMMA = ',' attr_reader :string, :units, :unit_modifiers def initialize(string, units: EyeOfNewt.units.all, unit_modifiers: EyeOfNewt.units.unit_modifiers) @string = string @units = units @unit_modifiers = unit_modifiers @ss = StringScanner.new(string) end def next_token return if @ss.eos? @ss.scan(WHITESPACE) case when @scan_text && text = @ss.scan(/#{ANYTHING}\b/) [:TEXT, text] when text = @ss.scan(NUMBER) [:NUMBER, text] when text = @ss.scan(/#{OF}\b/) [:OF, text] when text = @ss.scan(/#{A}\b/) [:A, text] when text = @ss.scan(/#{TO_TASTE}\b/) [:TO_TASTE, text] when text = @ss.scan(/#{unit_matcher}\b/) [:UNIT, text] when text = @ss.scan(/#{unit_modifier}\b/) [:UNIT_MODIFIER, text] when text = @ss.scan(/#{WORD}\b/) [:WORD, text] else x = @ss.getch @scan_text = true if x == ',' || x == '(' [x, x] end end private def unit_modifier @unit_modifier_matcher ||= match_any(unit_modifiers) end def unit_matcher @unit_matcher ||= match_any(units) end def match_any(elements) if elements.any? r = elements .sort .reverse .map{|u|Regexp.escape(u)} .join("|") Regexp.new(r) else NO_MATCH end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
eye-of-newt-0.0.8 | lib/eye_of_newt/tokenizer.rb |