Sha256: 21b063b57a432b8aeea5f311eb511e27ebc7dc2116a6d08161594c6ba84798db

Contents?: true

Size: 618 Bytes

Versions: 10

Compression:

Stored size: 618 Bytes

Contents

#!/usr/bin/env ruby
# coding: utf-8
# 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
# mecab-modern gemが必要です

require "mecab-modern"
require "sosowa"

puts "Fetching thdic-mecab..."
system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic") unless FileTest.exists? "thdic-mecab.dic"

puts "Done. Initialize MeCab::Tagger"
mecab = MeCab::Tagger.new#("-u thdic-mecab.dic")

text = Sosowa.get.sample.fetch.plain
tokens = mecab.parseToNode(text)
tokens.each do |token|
  puts token.feature
end

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
sosowa-3.0.1 samples/token_segment.rb
sosowa-3.0 samples/token_segment.rb
sosowa-2.0 samples/token_segment.rb
sosowa-1.1.2 samples/token_segment.rb
sosowa-1.1.1 samples/token_segment.rb
sosowa-1.1 samples/token_segment.rb
sosowa-1.0 samples/token_segment.rb
sosowa-0.9 samples/token_segment.rb
sosowa-0.8 samples/token_segment.rb
sosowa-0.7 samples/token_segment.rb