Sha256: a7611def64f6f0724b3d161f859f905358f4bb09bb5207240bed5c852d8dc33c
Contents?: true
Size: 758 Bytes
Versions: 2
Compression:
Stored size: 758 Bytes
Contents
# -*- coding: utf-8 -*- require 'natto' module RNlp class Tf # compatible with ja or en attr_reader :lang def initialize(lang) @lang = lang end def count(text) tf = Hash.new if @lang == 'ja' nm = Natto::MeCab.new text.each do |line| nm.parse(title).each do |word| tf[word.surface] = 1 if tf[word.surface] == nil tf[word.surface] += 1 end end elsif @lang == 'en' text.each do |line| line.split(" ").each do |word| tf[word] = 1 if tf[word] == nil tf[word] += 1 end end else puts "lang #{@lang} is not compatible." exit end return tf end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
r_nlp-0.1.2 | lib/r_nlp/tf.rb |
r_nlp-0.1.1 | lib/r_nlp/tf.rb |