Sha256: 1fbaf5f7fba7ec7d85e552d398d0673dcfa6027de514fd3b299573759edbe25a

Contents?: true

Size: 950 Bytes

Versions: 3

Compression:

Stored size: 950 Bytes

Contents

# -*- coding: utf-8 -*-
require 'natto'

module RNlp
  class Tf
    # compatible with ja or en
    attr_reader :lang
    def initialize(lang)
      @lang = lang
      unless lang == 'ja' || lang == 'en'
        puts "lang #{@lang} is not compatible."
        exit
      end
    end
    def count(text)
      tf = Hash.new
      if @lang == 'ja'
        nm = Natto::MeCab.new
        text.split("\n").each do |line|
          nm.parse(line) do |word|
            next if word.stat == 3
            if tf[word.surface] == nil
              tf[word.surface] = 1
            else
              tf[word.surface] += 1
            end
          end
        end
      elsif @lang == 'en'
        text.split("\n").each do |line|
          line.split(" ").each do |word|
            if tf[word] == nil
              tf[word] = 1
            else
              tf[word] += 1
            end
          end
        end
      end
      return tf
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
r_nlp-0.1.7 lib/r_nlp/tf.rb
r_nlp-0.1.6 lib/r_nlp/tf.rb
r_nlp-0.1.5 lib/r_nlp/tf.rb