Sha256: 59d36bfaf16e9318fcb84df0c81d7c3bc0d4327a52b18f084de83da3cebac47f

Contents?: true

Size: 641 Bytes

Versions: 1

Compression:

Stored size: 641 Bytes

Contents

# -*- coding: UTF-8 -*-

require 'middleman-blog-similar/algorithm/word_frequency'

class Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab < ::Middleman::Blog::Similar::Algorithm::WordFrequency
  class CommandNotFound < StandardError; end
  def words
    res = []
    IO.popen("mecab 2>/dev/null", 'r+') {|f|
      f.puts article.untagged_body
      f.puts article.title
      f.close_write
      while line = f.gets
        word, pos = line.split(/[\t\s]+/)
        next unless pos
        pos = pos.split(',')
        res << word if pos[0] == '名詞' && %w{一般 固有名詞}.include?(pos[1])
      end
    }
    res
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
middleman-blog-similar-1.1.1 lib/middleman-blog-similar/algorithm/word_frequency/mecab.rb