Sha256: 6c41a74e4b6f1ccf0988ef6155b1dbbaf9a8326e3bbf1dcb3acefdb6763e494b

Contents?: true

Size: 1.17 KB

Versions: 15

Compression:

Stored size: 1.17 KB

Contents

#!/usr/bin/env ruby
# coding: utf-8
# 創想話の最新版から適当なSSのテキストを取得してMeCab(+ 東方MeCab辞書)を用いてテキスト中のセリフの発言者を予測します。
# 精度低いので誰かちゃんとしたの作ってください!

require "mecab-modern"
require "kconv"
require "sosowa"
require "pp"

puts "東方MeCab辞書をダウンロード中..."
system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic") unless FileTest.exists? "thdic-mecab.dic"

puts "完了. MeCab::Taggerを初期化します"
mecab = MeCab::Tagger.new#("-u thdic-mecab.dic")

#novel = Sosowa.get.sample.fetch
novel = Sosowa.get(:log => 170, :key => 1342037924)
puts "-"*30
puts novel.title
puts "作者: #{novel.author.name}"
puts "-"*30
lines = novel.text.gsub(/\r?\n/, "").split("<br>").reject{|t| t == ""}.map{|n| n.strip}
num = 0
lines.each do |line|
  name_nodes = mecab.parseToNode(line).select{|n| n.feature =~ /名詞,固有名詞,人名/}
  unless name_nodes[0]
    num += 1
    next
  end
  unless lines[num+1] =~ /(「|」)/
    num += 1
    next
  end
  puts "#{name_nodes[0].surface}: #{lines[num+1]}"
  num += 1
end

Version data entries

15 entries across 15 versions & 1 rubygems

Version Path
sosowa-3.0.1 samples/chara_recognize.rb
sosowa-3.0 samples/chara_recognize.rb
sosowa-2.0 samples/chara_recognize.rb
sosowa-1.1.2 samples/chara_recognize.rb
sosowa-1.1.1 samples/chara_recognize.rb
sosowa-1.1 samples/chara_recognize.rb
sosowa-1.0 samples/chara_recognize.rb
sosowa-0.9 samples/chara_recognize.rb
sosowa-0.8 samples/chara_recognize.rb
sosowa-0.7 samples/chara_recognize.rb
sosowa-0.6 samples/chara_recognize.rb
sosowa-0.5.1 samples/chara_recognize.rb
sosowa-0.5 samples/chara_recognize.rb
sosowa-0.4 samples/chara_recognize.rb
sosowa-0.3 samples/chara_recognize.rb