Sha256: a61ade82a868d682582c4fac18619bc347aa5befab41f9f7927ad44f4245022d

Contents?: true

Size: 951 Bytes

Versions: 2

Compression:

Stored size: 951 Bytes

Contents

module MeCab
  class NounParser
    def initialize(**options)
      @except = options[:except] || []
      @nouns = {}
      @nouns.default = 0
      @text = []
    end

    def <<(text)
      @text << text unless text.nil?
    end

    def parse
      @text.each do |text|
        node = mecab.parseToNode(text)
        noun = ''

        while node
          if node.feature.split(',')[0] == '名詞'
            unless @except.include?(node.surface)
              noun << node.surface
            end
          else
            unless noun.empty?
              @nouns[noun] += 1
              noun = ''
            end
          end

          node = node.next
        end
      end

      self
    end

    def nouns
      sorted_nouns.map { |noun| { noun: noun[0], count: noun[1] } }
    end

    private

    def mecab
      @mecab ||= MeCab::Tagger.new
    end

    def sorted_nouns
      @nouns.sort_by { |_, count| -count }
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
mecab-noun_parser-0.1.2 lib/mecab/noun_parser.rb
mecab-noun_parser-0.1.1 lib/mecab/noun_parser.rb