lib/appstats/parser.rb in appstats-0.20.2 vs lib/appstats/parser.rb in appstats-0.20.6

- old
+ new

@@ -192,17 +192,43 @@ @tokenize = [] @tokenize_no_spaces = [] @tokenize_regex = nil @tokenize_regex_no_spaces = nil return if @raw_tokenize.blank? + is_multi_work_token = false + current_token = nil @raw_tokenize.split(" ").each do |token| - current_token = token.upcase + + start_token = token.starts_with?("'") + end_token = is_multi_work_token and token.ends_with?("'") + mid_token = !start_token && !end_token && is_multi_work_token + add_token = false + + if start_token + current_token = token.upcase[1..-1] + is_multi_work_token = true + elsif mid_token + current_token = "#{current_token}#{token.upcase}" + elsif end_token + current_token = "#{current_token}#{token.upcase.chop}" + is_multi_work_token = false + add_token = true + else + current_token = token.upcase + add_token = true + end + current_token.gsub!("(",'\(') current_token.gsub!(")",'\)') current_token.gsub!("|",'\|') - @tokenize_no_spaces<< current_token - current_token = "\\s+#{current_token}(\\s|$)" unless current_token.match(/.*[a-z].*/i).nil? - @tokenize<< current_token + + if add_token + @tokenize_no_spaces<< current_token + current_token = "\\s+#{current_token}(\\s|$)" unless current_token.match(/.*[a-z].*/i).nil? + @tokenize<< current_token + else is_multi_work_token + current_token = "#{current_token}\\s+" + end end @tokenize_regex_no_spaces = @tokenize_no_spaces.join("|") @tokenize_regex = @tokenize.join("|") end \ No newline at end of file