Sha256: c56b8593a08ba9a35f1b84b3ac6af58b9045c2e4c86356525d0fb7b75f597045
Contents?: true
Size: 1.49 KB
Versions: 2
Compression:
Stored size: 1.49 KB
Contents
#!/usr/bin/env ruby # coding: utf-8 require 'rjb' class Rjb::Rjb_JavaProxy def to_kuromoji_hash result = {} result['parts_of_speech'] = part_of_speech result['reading'] = getReading result['base_form'] = base_form result['surface_form'] = surface_form result['position'] = position result['is_known'] = isKnown result['is_user'] = isUser result end end module Kuromoji class Core def initialize(user_dictionary = nil) jar = File.expand_path('../../../vendor/kuromoji-0.7.7/lib/kuromoji-0.7.7.jar', __FILE__) Rjb.load(jar) if user_dictionary.nil? @tokenizer = Rjb.import('org.atilika.kuromoji.Tokenizer').builder.build else @tokenizer = Rjb.import('org.atilika.kuromoji.Tokenizer').builder.userDictionary(user_dictionary).build end end def tokenize(sentence) process(:all_features, sentence) end def reading(sentence) process(:getReading, sentence) end def tokenize_with_hash(sentence) list = @tokenizer.tokenize(sentence) iterator = list.iterator result = [] while iterator.has_next item = iterator.next result << item.to_kuromoji_hash end result end def process(method, sentence) list = @tokenizer.tokenize(sentence) iterator = list.iterator tokenized = {} while iterator.has_next item = iterator.next tokenized[item.surface_form] = item.send(method) end tokenized end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
kuromoji-ruby-0.0.2 | lib/kuromoji/core.rb |
kuromoji-ruby-0.0.1 | lib/kuromoji/core.rb |