lib/appstats/parser.rb in appstats-0.9.0 vs lib/appstats/parser.rb in appstats-0.9.1

- old
+ new

@@ -1,10 +1,12 @@ module Appstats class Parser - attr_reader :raw_rules, :rules, :repeating, :raw_tokenize, :tokenize, :tokenize_no_spaces, :tokenize_regex, :tokenize_regex_no_spaces, :results, :raw_results, :constants + attr_reader :raw_rules, :rules, :repeating, :raw_tokenize, :results, :raw_results, + :tokenize, :tokenize_no_spaces, :tokenize_regex, :tokenize_regex_no_spaces, + :constants, :constants_no_spaces def initialize(data = {}) @raw_rules = data[:rules] @raw_tokenize = data[:tokenize] @repeating = data[:repeating] == true @@ -23,10 +25,11 @@ @rule_index = 0 @max_rule_index = @rules.size - 1 @previous_text_so_far = input.strip @text_so_far = @previous_text_so_far @remaining_constants = @constants.dup + @remaining_constants_no_spaces = @constants_no_spaces.dup while !@text_so_far.blank? process_constant_if_present break if @rule_index > @max_rule_index && !@repeating @rule_index = 0 if @rule_index > @max_rule_index @@ -35,16 +38,17 @@ @rule_index += 1 if rule.kind_of?(Hash) if rule[:stop] == :constant was_found = false - @remaining_constants.each_with_index do |k,index| + @remaining_constants_no_spaces.each_with_index do |k,index| p = parse_word(@text_so_far,k,true) if p[0].nil? unset_rules_until(k) else (index-1).downto(0) do |i| + @remaining_constants_no_spaces.delete_at(i) @remaining_constants.delete_at(i) end add_results(rule[:rule],p[0]) @text_so_far = p[1] was_found = true @@ -77,44 +81,43 @@ answer[0] = m[1] unless m.nil? answer[1] = m.nil? ? current_text : m[2] clean_parsed_word(answer) end - def self.merge_regex_filter(a,b) - return "" if a.blank? && b.blank? - return "(#{a})" if b.blank? - return "(#{b})" if a.blank? - "(#{a}|#{b})" + def self.merge_regex_filter(inputs = []) + inputs.collect! { |x| x unless x.blank? }.compact! + return "" if inputs.empty? + "(#{inputs.join('|')})" end def parse_word(current_text,stop_on,strict = false) answer = [nil,nil] return answer if current_text.blank? || stop_on.nil? current_text.strip! current_text = remove_tokens_at_start(current_text) if stop_on == :end - filter = Parser.merge_regex_filter(nil,@tokenize_regex) + filter = Parser.merge_regex_filter([nil,@tokenize_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if m.nil? || m[1].blank? answer[0] = current_text else answer[0] = m[1] answer[1] = m[2] end elsif stop_on == :space - filter = Parser.merge_regex_filter('\s',@tokenize_regex) + filter = Parser.merge_regex_filter(['\s',@tokenize_regex,remaining_constants_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if m.nil? answer[0] = current_text else answer[0] = m[1] answer[1] = m[2] end else - filter = Parser.merge_regex_filter(stop_on,@tokenize_regex) + filter = Parser.merge_regex_filter([stop_on,@tokenize_regex,remaining_constants_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if strict answer[0] = m[1] unless m.nil? answer[1] = m.nil? ? current_text : m[2] else @@ -136,19 +139,20 @@ end def process_constant_if_present while process_tokens_if_present; end to_delete = nil - @remaining_constants.each do |k| + @remaining_constants_no_spaces.each do |k| p = Parser.parse_constant(@text_so_far,k) next if p[0].nil? to_delete = k unset_rules_until(k) add_constant(p[0]) @text_so_far = p[1] end @remaining_constants.delete(to_delete) unless to_delete.nil? + @remaining_constants_no_spaces.delete(to_delete) unless to_delete.nil? end def process_tokens_if_present found = false @tokenize.each do |k| @@ -189,28 +193,33 @@ end def update_rules @rules = [] @constants = [] + @constants_no_spaces = [] current_rule = nil return if @raw_rules.blank? @raw_rules.split(" ").each do |rule| + current_rule_no_spaces = nil if rule.starts_with?(":") && rule.size > 1 - current_rule = { :rule => rule[1..-1].to_sym, :stop => :end } + current_rule_no_spaces = { :rule => rule[1..-1].to_sym, :stop => :end } previous_stop_on = :space else current_rule = rule.upcase + current_rule_no_spaces = current_rule + @constants_no_spaces<< current_rule_no_spaces + current_rule = "\\s+#{current_rule}" unless current_rule.match(/.*[a-z].*/i).nil? @constants<< current_rule previous_stop_on = :constant end if @rules.last.kind_of?(Hash) @rules.last[:stop] = previous_stop_on end - @rules<< current_rule + @rules<< current_rule_no_spaces end end def add_constant(value) @raw_results<< value @@ -232,8 +241,13 @@ current_text = m[2] current_text.strip! unless current_text.nil? end current_text end - + + def remaining_constants_regex + return "" if @remaining_constants.nil? + @remaining_constants.join("|") + end + end end \ No newline at end of file