Sha256: 0c51d997919432ab9721231be3a4d64d84934cec482faee79919f4045bdce745
Contents?: true
Size: 759 Bytes
Versions: 2
Compression:
Stored size: 759 Bytes
Contents
require 'middleman-blog-similar/algorithm/word_frequency' class Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger < ::Middleman::Blog::Similar::Algorithm::WordFrequency class CommandNotFound < StandardError; end def words raise CommandNotFound.new "You need to tree tagger command with ENV['TREETAGGER_COMMAND']" unless ENV['TREETAGGER_COMMAND'] res = [] IO.popen("#{ ENV['TREETAGGER_COMMAND'] } 2>/dev/null", 'r+') {|f| f.puts article.untagged_body f.puts article.title f.close_write while line = f.gets word, pos = line.split(/\s+/) # http://courses.washington.edu/hypertxt/csar-v02/penntable.html res << word if %w{NN JJ NP}.include? pos[0..2] end } res end end
Version data entries
2 entries across 2 versions & 1 rubygems