Sha256: a61ade82a868d682582c4fac18619bc347aa5befab41f9f7927ad44f4245022d
Contents?: true
Size: 951 Bytes
Versions: 2
Compression:
Stored size: 951 Bytes
Contents
module MeCab class NounParser def initialize(**options) @except = options[:except] || [] @nouns = {} @nouns.default = 0 @text = [] end def <<(text) @text << text unless text.nil? end def parse @text.each do |text| node = mecab.parseToNode(text) noun = '' while node if node.feature.split(',')[0] == '名詞' unless @except.include?(node.surface) noun << node.surface end else unless noun.empty? @nouns[noun] += 1 noun = '' end end node = node.next end end self end def nouns sorted_nouns.map { |noun| { noun: noun[0], count: noun[1] } } end private def mecab @mecab ||= MeCab::Tagger.new end def sorted_nouns @nouns.sort_by { |_, count| -count } end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
mecab-noun_parser-0.1.2 | lib/mecab/noun_parser.rb |
mecab-noun_parser-0.1.1 | lib/mecab/noun_parser.rb |