lib/ruby.rb in narou-1.6.2 vs lib/ruby.rb in narou-1.6.3

- old
+ new

@@ -1,152 +1,152 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2012 whiteleaf. All rights reserved. -# - -# ルビふりスクリプト - -require_relative "mecab" - -=begin -訂正画面イメージ -┌─────────────────────────────────┐ -│...シャルティアの|妾である吸血鬼の花嫁《ヴァンパイア・ブライド》 │ -│ ^ │ -│ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*):ffff │ -└─────────────────────────────────┘ -↓ -┌─────────────────────────────────┐ -│...シャルティアの妾である|吸血鬼の花嫁《ヴァンパイア・ブライド》 │ -│ ^ │ -│ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*):y │ -└─────────────────────────────────┘ -=end - -def make_text(prev_text, name, ruby) - "#{prev_text}|#{name}《#{ruby}》" -end - -def output_input_message - print "ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*): " -end - -def output_interface(prev_text, name, ruby) - is_omit = prev_text.length > 7 - omit_text = (is_omit ? "‥‥" : "") + prev_text[(is_omit ? -7 : 0)..-1] - puts make_text(omit_text, name, ruby) - puts " " * omit_text.length + "^" - output_input_message -end - -def wait_user_input(prev_text, name, ruby) - output_interface(prev_text, name, ruby) - while input = STDIN.gets - case input[0].downcase - when "y" - return make_text(prev_text, name, ruby) - when "s" - return nil - when "f" - count = input.match(/(f+)/i)[1].length - prev_text += name[0...count] - _name = name[count..-1] - name = (_name ? _name : "") - output_interface(prev_text, name, ruby) - when "b" - count = input.match(/(b+)/i)[1].length - _prev_text = prev_text[-count..-1] - name = (_prev_text ? _prev_text : prev_text) + name - prev_text = (_prev_text ? prev_text[0...-count] : "") - output_interface(prev_text, name, ruby) - else - output_input_message - end - end -end - -# -# ルビっぽい文字列かどうか判定 -# -# ひらがな、カタカナ、アルファベット、数字、・ のみで構成された文字列かどうか -# 漢数字が含まれていた場合、全角アラビア数字に直す -# -def is_ruby_strings?(str) - str =~ /^[ぁ-んァ-ヴーゞ・A-Za-zA-Za-z]+$/ -end - -Mecab = MecabLib::Mecab.new("") - -def extract_ruby(line) - line.gsub(/(.+?)[<<〈((](.+?)[>>〉))]/) do - match_name_message = $1 - match_ruby = $2 - match_all = $& - next match_all unless is_ruby_strings?(match_ruby) - node = Mecab.sparse_tonode(match_name_message.force_encoding("Windows-31J").encode("UTF-8")) - nodes = [] - while node.hasNext - node = node.next - break if node.surface == "EOS" - nodes << { - surface: node.surface.force_encoding("Windows-31J").encode("UTF-8")), - pos: node.pos.force_encoding("Windows-31J").encode("UTF-8"), - root: node.root.force_encoding("Windows-31J").encode("UTF-8"), - reading: node.reading.force_encoding("Windows-31J").encode("UTF-8"), - pronunciation: node.pronunciation.force_encoding("Windows-31J").encode("UTF-8") - } - end - name = "" - prev_text = "" - is_before_word_alphabet = false - detected = false - nodes.reverse.each do |node| - unless detected - sujou = node[:pos].split(",") - if ["接頭詞", "名詞", "助詞", "助動詞", "形容詞"].include?(sujou[0]) - if (sujou[0] == "助詞" && sujou[1] != "連体化") || - (sujou[0] == "助動詞" && sujou[5] != "体言接続") - detected = true - else - if node[:surface] =~ /^[a-zA-Z]+$/ - if is_before_word_alphabet - name = " " + name - end - is_before_word_alphabet = true - else - is_before_word_alphabet = false - end - name = node[:surface] + name - next - end - else - detected = true - end - end - prev_text = node[:surface] + prev_text - end - result = wait_user_input(prev_text, name, match_ruby) - result ? result : match_all - end -end - -if ARGV.count == 0 - puts "ファイル名を指定して下さい" - exit -end - -ARGV.each do |fname| - puts "#{fname} の処理を開始します" - puts "-" * 70 - open(fname) do |read_fp| - open("[ルビ変換]#{fname}", "w") do |write_fp| - read_fp.each do |line| - result = extract_ruby(line) - write_fp.puts(result) - end - end - end - puts "-" * 30 - puts "#{fname} の変換が完了しました。" -end - -Mecab.destroy +# -*- coding: utf-8 -*- +# +# Copyright 2012 whiteleaf. All rights reserved. +# + +# ルビふりスクリプト + +require_relative "mecab" + +=begin +訂正画面イメージ +┌─────────────────────────────────┐ +│...シャルティアの|妾である吸血鬼の花嫁《ヴァンパイア・ブライド》 │ +│ ^ │ +│ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*):ffff │ +└─────────────────────────────────┘ +↓ +┌─────────────────────────────────┐ +│...シャルティアの妾である|吸血鬼の花嫁《ヴァンパイア・ブライド》 │ +│ ^ │ +│ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*):y │ +└─────────────────────────────────┘ +=end + +def make_text(prev_text, name, ruby) + "#{prev_text}|#{name}《#{ruby}》" +end + +def output_input_message + print "ルビ開始位置の確認 (Yes/Skip/Forward*/Backward*): " +end + +def output_interface(prev_text, name, ruby) + is_omit = prev_text.length > 7 + omit_text = (is_omit ? "‥‥" : "") + prev_text[(is_omit ? -7 : 0)..-1] + puts make_text(omit_text, name, ruby) + puts " " * omit_text.length + "^" + output_input_message +end + +def wait_user_input(prev_text, name, ruby) + output_interface(prev_text, name, ruby) + while input = STDIN.gets + case input[0].downcase + when "y" + return make_text(prev_text, name, ruby) + when "s" + return nil + when "f" + count = input.match(/(f+)/i)[1].length + prev_text += name[0...count] + _name = name[count..-1] + name = (_name ? _name : "") + output_interface(prev_text, name, ruby) + when "b" + count = input.match(/(b+)/i)[1].length + _prev_text = prev_text[-count..-1] + name = (_prev_text ? _prev_text : prev_text) + name + prev_text = (_prev_text ? prev_text[0...-count] : "") + output_interface(prev_text, name, ruby) + else + output_input_message + end + end +end + +# +# ルビっぽい文字列かどうか判定 +# +# ひらがな、カタカナ、アルファベット、数字、・ のみで構成された文字列かどうか +# 漢数字が含まれていた場合、全角アラビア数字に直す +# +def is_ruby_strings?(str) + str =~ /^[ぁ-んァ-ヴーゞ・A-Za-zA-Za-z]+$/ +end + +Mecab = MecabLib::Mecab.new("") + +def extract_ruby(line) + line.gsub(/(.+?)[<<〈((](.+?)[>>〉))]/) do + match_name_message = $1 + match_ruby = $2 + match_all = $& + next match_all unless is_ruby_strings?(match_ruby) + node = Mecab.sparse_tonode(match_name_message.force_encoding("Windows-31J").encode("UTF-8")) + nodes = [] + while node.hasNext + node = node.next + break if node.surface == "EOS" + nodes << { + surface: node.surface.force_encoding("Windows-31J").encode("UTF-8")), + pos: node.pos.force_encoding("Windows-31J").encode("UTF-8"), + root: node.root.force_encoding("Windows-31J").encode("UTF-8"), + reading: node.reading.force_encoding("Windows-31J").encode("UTF-8"), + pronunciation: node.pronunciation.force_encoding("Windows-31J").encode("UTF-8") + } + end + name = "" + prev_text = "" + is_before_word_alphabet = false + detected = false + nodes.reverse.each do |node| + unless detected + sujou = node[:pos].split(",") + if ["接頭詞", "名詞", "助詞", "助動詞", "形容詞"].include?(sujou[0]) + if (sujou[0] == "助詞" && sujou[1] != "連体化") || + (sujou[0] == "助動詞" && sujou[5] != "体言接続") + detected = true + else + if node[:surface] =~ /^[a-zA-Z]+$/ + if is_before_word_alphabet + name = " " + name + end + is_before_word_alphabet = true + else + is_before_word_alphabet = false + end + name = node[:surface] + name + next + end + else + detected = true + end + end + prev_text = node[:surface] + prev_text + end + result = wait_user_input(prev_text, name, match_ruby) + result ? result : match_all + end +end + +if ARGV.count == 0 + puts "ファイル名を指定して下さい" + exit +end + +ARGV.each do |fname| + puts "#{fname} の処理を開始します" + puts "-" * 70 + open(fname) do |read_fp| + open("[ルビ変換]#{fname}", "w") do |write_fp| + read_fp.each do |line| + result = extract_ruby(line) + write_fp.puts(result) + end + end + end + puts "-" * 30 + puts "#{fname} の変換が完了しました。" +end + +Mecab.destroy