Sha256: c0517de9b496689b1e6eda89729c1558cec448dc4ee59599f67c6909d515bb59

Contents?: true

Size: 908 Bytes

Versions: 2

Compression:

Stored size: 908 Bytes

Contents

# -*- coding: utf-8 -*-
require 'natto'

module RNlp
  class Tf
    # compatible with ja or en
    attr_reader :lang
    def initialize(lang)
      @lang = lang
    end
    def count(text)
      tf = Hash.new
      if @lang == 'ja'
        nm = Natto::MeCab.new
        text.split("\n").each do |line|
          nm.parse(line) do |word|
            next if word.stat == 3
            if tf[word.surface] == nil
              tf[word.surface] = 1
            else
              tf[word.surface] += 1
            end
          end
        end
      elsif @lang == 'en'
        text.split(" ").each do |line|
          line.split(" ").each do |word|
            if tf[word] == nil
              tf[word] = 1
            else
              tf[word] += 1
            end
          end
        end
      else
        puts "lang #{@lang} is not compatible."
        exit
      end
      return tf
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
r_nlp-0.1.4 lib/r_nlp/tf.rb
r_nlp-0.1.3 lib/r_nlp/tf.rb