lib/appstats/parser.rb in appstats-0.9.0 vs lib/appstats/parser.rb in appstats-0.9.1
- old
+ new
@@ -1,10 +1,12 @@
module Appstats
class Parser
- attr_reader :raw_rules, :rules, :repeating, :raw_tokenize, :tokenize, :tokenize_no_spaces, :tokenize_regex, :tokenize_regex_no_spaces, :results, :raw_results, :constants
+ attr_reader :raw_rules, :rules, :repeating, :raw_tokenize, :results, :raw_results,
+ :tokenize, :tokenize_no_spaces, :tokenize_regex, :tokenize_regex_no_spaces,
+ :constants, :constants_no_spaces
def initialize(data = {})
@raw_rules = data[:rules]
@raw_tokenize = data[:tokenize]
@repeating = data[:repeating] == true
@@ -23,10 +25,11 @@
@rule_index = 0
@max_rule_index = @rules.size - 1
@previous_text_so_far = input.strip
@text_so_far = @previous_text_so_far
@remaining_constants = @constants.dup
+ @remaining_constants_no_spaces = @constants_no_spaces.dup
while !@text_so_far.blank?
process_constant_if_present
break if @rule_index > @max_rule_index && !@repeating
@rule_index = 0 if @rule_index > @max_rule_index
@@ -35,16 +38,17 @@
@rule_index += 1
if rule.kind_of?(Hash)
if rule[:stop] == :constant
was_found = false
- @remaining_constants.each_with_index do |k,index|
+ @remaining_constants_no_spaces.each_with_index do |k,index|
p = parse_word(@text_so_far,k,true)
if p[0].nil?
unset_rules_until(k)
else
(index-1).downto(0) do |i|
+ @remaining_constants_no_spaces.delete_at(i)
@remaining_constants.delete_at(i)
end
add_results(rule[:rule],p[0])
@text_so_far = p[1]
was_found = true
@@ -77,44 +81,43 @@
answer[0] = m[1] unless m.nil?
answer[1] = m.nil? ? current_text : m[2]
clean_parsed_word(answer)
end
- def self.merge_regex_filter(a,b)
- return "" if a.blank? && b.blank?
- return "(#{a})" if b.blank?
- return "(#{b})" if a.blank?
- "(#{a}|#{b})"
+ def self.merge_regex_filter(inputs = [])
+ inputs.collect! { |x| x unless x.blank? }.compact!
+ return "" if inputs.empty?
+ "(#{inputs.join('|')})"
end
def parse_word(current_text,stop_on,strict = false)
answer = [nil,nil]
return answer if current_text.blank? || stop_on.nil?
current_text.strip!
current_text = remove_tokens_at_start(current_text)
if stop_on == :end
- filter = Parser.merge_regex_filter(nil,@tokenize_regex)
+ filter = Parser.merge_regex_filter([nil,@tokenize_regex])
m = current_text.match(/^(.*?)(#{filter}.*)$/im)
if m.nil? || m[1].blank?
answer[0] = current_text
else
answer[0] = m[1]
answer[1] = m[2]
end
elsif stop_on == :space
- filter = Parser.merge_regex_filter('\s',@tokenize_regex)
+ filter = Parser.merge_regex_filter(['\s',@tokenize_regex,remaining_constants_regex])
m = current_text.match(/^(.*?)(#{filter}.*)$/im)
if m.nil?
answer[0] = current_text
else
answer[0] = m[1]
answer[1] = m[2]
end
else
- filter = Parser.merge_regex_filter(stop_on,@tokenize_regex)
+ filter = Parser.merge_regex_filter([stop_on,@tokenize_regex,remaining_constants_regex])
m = current_text.match(/^(.*?)(#{filter}.*)$/im)
if strict
answer[0] = m[1] unless m.nil?
answer[1] = m.nil? ? current_text : m[2]
else
@@ -136,19 +139,20 @@
end
def process_constant_if_present
while process_tokens_if_present; end
to_delete = nil
- @remaining_constants.each do |k|
+ @remaining_constants_no_spaces.each do |k|
p = Parser.parse_constant(@text_so_far,k)
next if p[0].nil?
to_delete = k
unset_rules_until(k)
add_constant(p[0])
@text_so_far = p[1]
end
@remaining_constants.delete(to_delete) unless to_delete.nil?
+ @remaining_constants_no_spaces.delete(to_delete) unless to_delete.nil?
end
def process_tokens_if_present
found = false
@tokenize.each do |k|
@@ -189,28 +193,33 @@
end
def update_rules
@rules = []
@constants = []
+ @constants_no_spaces = []
current_rule = nil
return if @raw_rules.blank?
@raw_rules.split(" ").each do |rule|
+ current_rule_no_spaces = nil
if rule.starts_with?(":") && rule.size > 1
- current_rule = { :rule => rule[1..-1].to_sym, :stop => :end }
+ current_rule_no_spaces = { :rule => rule[1..-1].to_sym, :stop => :end }
previous_stop_on = :space
else
current_rule = rule.upcase
+ current_rule_no_spaces = current_rule
+ @constants_no_spaces<< current_rule_no_spaces
+ current_rule = "\\s+#{current_rule}" unless current_rule.match(/.*[a-z].*/i).nil?
@constants<< current_rule
previous_stop_on = :constant
end
if @rules.last.kind_of?(Hash)
@rules.last[:stop] = previous_stop_on
end
- @rules<< current_rule
+ @rules<< current_rule_no_spaces
end
end
def add_constant(value)
@raw_results<< value
@@ -232,8 +241,13 @@
current_text = m[2]
current_text.strip! unless current_text.nil?
end
current_text
end
-
+
+ def remaining_constants_regex
+ return "" if @remaining_constants.nil?
+ @remaining_constants.join("|")
+ end
+
end
end
\ No newline at end of file