Sha256: 5b5a85b76e55bbdfb817ef7b53e1a8b90827cf8fb93da58e4ea67191bce20058
Contents?: true
Size: 804 Bytes
Versions: 3
Compression:
Stored size: 804 Bytes
Contents
# coding: utf-8 class Tefil::EachSentence < Tefil::TextFilterBase END_CHAR = %w(. ? . 。) NOT_END_WORDS = ["Fig.", "FIG."] def initialize(options = {}) options[:smart_filename] = true @minimum = options[:minimum] super(options) end def process_stream(in_io, out_io) results = [] #words = [] in_io.read.strip.split("\n").each do |line| new_line = '' #line.gsub!("\n", ' ') line.chars.each do |char| new_line += char new_line += "\n" if (END_CHAR.include?(char)) end NOT_END_WORDS.each do |word| new_line.gsub!(/#{word}\n/, word) end new_line.gsub!(/\n */, "\n") new_line.strip! new_line.gsub!(/ */, " ") results << new_line end out_io.puts results.join("\n") end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
tefil-0.1.4 | lib/tefil/eachsentence.rb |
tefil-0.1.3 | lib/tefil/eachsentence.rb |
tefil-0.1.2 | lib/tefil/eachsentence.rb |