Sha256: a7611def64f6f0724b3d161f859f905358f4bb09bb5207240bed5c852d8dc33c

Contents?: true

Size: 758 Bytes

Versions: 2

Compression:

Stored size: 758 Bytes

Contents

# -*- coding: utf-8 -*-
require 'natto'

module RNlp
  class Tf
    # compatible with ja or en
    attr_reader :lang
    def initialize(lang)
      @lang = lang
    end
    def count(text)
      tf = Hash.new
      if @lang == 'ja'
        nm = Natto::MeCab.new
        text.each do |line|
          nm.parse(title).each do |word|
            tf[word.surface] = 1 if tf[word.surface] == nil
            tf[word.surface] += 1
          end
        end
      elsif @lang == 'en'
        text.each do |line|
          line.split(" ").each do |word|
            tf[word] = 1 if tf[word] == nil
            tf[word] += 1
          end
        end
      else
        puts "lang #{@lang} is not compatible."
        exit
      end
      return tf
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
r_nlp-0.1.2 lib/r_nlp/tf.rb
r_nlp-0.1.1 lib/r_nlp/tf.rb