Sha256: 0ff46ee60791e38b7fb8a53dfd14a829de833de71db11d08876473b03cc32bf9
Contents?: true
Size: 1.8 KB
Versions: 2
Compression:
Stored size: 1.8 KB
Contents
module MagickColumns class Tokenizer def initialize(query = '') @query = query end def extract_terms terms = [] clean_query.split(%r{\s+(#{MagickColumns.or_operators})\s+}).each do |o_t| unless o_t =~ %r{\A(#{MagickColumns.or_operators})\z} and_terms = [] o_t.split(%r{\s+(#{MagickColumns.and_operators})\s+}).each do |t| unless t =~ %r{\A(#{MagickColumns.and_operators})\z} and_terms.concat split_term_in_terms(t) end end terms << and_terms unless and_terms.empty? end end terms.reject(&:empty?) end def clean_query @query.strip .gsub(%r{\A(\s*(#{MagickColumns.and_operators})\s+)+}, '') .gsub(%r{(\s+(#{MagickColumns.and_operators})\s*)+\z}, '') .gsub(%r{\A(\s*(#{MagickColumns.or_operators})\s+)+}, '') .gsub(%r{(\s+(#{MagickColumns.or_operators})\s*)+\z}, '') end def split_term_in_terms(term) term_copy = term.dup terms = [] MagickColumns.replacement_rules.each do |rule, options| pattern = options[:pattern].respond_to?(:call) ? options[:pattern].call : options[:pattern] while(match = term_copy.match(pattern)) term_copy.sub!(pattern, options[:replacement].call(match)) end end MagickColumns.tokenize_rules.each do |rule, options| pattern = options[:pattern].respond_to?(:call) ? options[:pattern].call : options[:pattern] while(match = term_copy.match(pattern)) terms << options[:tokenizer].call(match) term_copy.sub!(pattern, '') end end terms + term_copy.strip.split(/\s+/).map { |t| { term: t } } end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
magick_columns-0.0.4 | lib/magick_columns/tokenizer.rb |
magick_columns-0.0.3 | lib/magick_columns/tokenizer.rb |