lib/eye_of_newt/tokenizer.rb in eye-of-newt-0.0.6 vs lib/eye_of_newt/tokenizer.rb in eye-of-newt-0.0.8

- old
+ new

@@ -11,15 +11,16 @@ OF = /of/ A = /an?/ TO_TASTE = /to taste/ COMMA = ',' - attr_reader :string, :units + attr_reader :string, :units, :unit_modifiers - def initialize(string, units: EyeOfNewt.units.all) + def initialize(string, units: EyeOfNewt.units.all, unit_modifiers: EyeOfNewt.units.unit_modifiers) @string = string @units = units + @unit_modifiers = unit_modifiers @ss = StringScanner.new(string) end def next_token return if @ss.eos? @@ -37,10 +38,12 @@ [:A, text] when text = @ss.scan(/#{TO_TASTE}\b/) [:TO_TASTE, text] when text = @ss.scan(/#{unit_matcher}\b/) [:UNIT, text] + when text = @ss.scan(/#{unit_modifier}\b/) + [:UNIT_MODIFIER, text] when text = @ss.scan(/#{WORD}\b/) [:WORD, text] else x = @ss.getch @scan_text = true if x == ',' || x == '(' @@ -48,12 +51,20 @@ end end private + def unit_modifier + @unit_modifier_matcher ||= match_any(unit_modifiers) + end + def unit_matcher - @unit_matcher ||= if units.any? - r = units + @unit_matcher ||= match_any(units) + end + + def match_any(elements) + if elements.any? + r = elements .sort .reverse .map{|u|Regexp.escape(u)} .join("|") Regexp.new(r)