Sha256: c94b43f9f8d544848943b7f835f55c4cdd01d55327878ab30f25d5a8512fdfcb
Contents?: true
Size: 1.99 KB
Versions: 1
Compression:
Stored size: 1.99 KB
Contents
# encoding: utf-8 module Mongoid module FTS module Stemming def stem(*args, &block) words = args.join(' ').strip.split(/\s+/) list = [] words.each do |word| stem = word.stem.downcase next if Stopwords.stopword?(word) next if Stopwords.stopword?(stem) block ? block.call(stem) : list.push(stem) end block ? nil : list end alias_method('for', 'stem') module Stopwords dirname = __FILE__.sub(/\.rb\Z/, '') glob = File.join(dirname, 'stopwords', '*.txt') List = {} Dir.glob(glob).each do |wordlist| basename = File.basename(wordlist) name = basename.split(/\./).first open(wordlist) do |fd| lines = fd.readlines words = lines.map{|line| line.strip} words.delete_if{|word| word.empty?} words.push('') List[name] = words end end # TODO - this needs to be configurable... # unless defined?(All) All = [] All.concat(List['english']) All.concat(List['full_english']) All.concat(List['extended_english']) #All.concat(List['full_french']) #All.concat(List['full_spanish']) #All.concat(List['full_portuguese']) #All.concat(List['full_italian']) #All.concat(List['full_german']) #All.concat(List['full_dutch']) #All.concat(List['full_norwegian']) #All.concat(List['full_danish']) #All.concat(List['full_russian']) #All.concat(List['full_russian_koi8_r']) #All.concat(List['full_finnish']) All.sort! All.uniq! end unless defined?(Index) Index = {} All.each do |word| Index[word] = word end end def stopword?(word) !!Index[word] end extend(Stopwords) end extend(Stemming) end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
mongoid-fts-2.0.0 | lib/mongoid-fts/stemming.rb |