Sha256: 60f542f914eb2da65a35fe1b3a512e09affb94fae659848d24d449754c744803

Contents?: true

Size: 934 Bytes

Versions: 1

Compression:

Stored size: 934 Bytes

Contents

module MeCab
  class NounParser
    def initialize(**options)
      @except = options[:except] || []
      @nouns = {}
      @nouns.default = 0
      @text = []
    end

    def <<(text)
      @text << text
    end

    def parse
      @text.each do |text|
        node = mecab.parseToNode(text)
        noun = ''

        while node
          if node.feature.split(',')[0] == '名詞'
            unless @except.include?(node.surface)
              noun << node.surface
            end
          else
            unless noun.empty?
              @nouns[noun] += 1
              noun = ''
            end
          end

          node = node.next
        end
      end

      self
    end

    def nouns
      sorted_nouns.map { |noun| { noun: noun[0], count: noun[1] } }
    end

    private

    def mecab
      @mecab ||= MeCab::Tagger.new
    end

    def sorted_nouns
      @nouns.sort_by { |_, count| -count }
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
mecab-noun_parser-0.1.0 lib/mecab/noun_parser.rb