Sha256: 3eb7e72ec7a7fa2ba63cf8b80aa592a5bff13818efc42dd1b668b5ada58b8527
Contents?: true
Size: 798 Bytes
Versions: 1
Compression:
Stored size: 798 Bytes
Contents
# coding: utf-8 class Tefil::EachSentence < Tefil::TextFilterBase END_CHAR = %w(. ? . 。) NOT_END_WORDS = ["Fig.", "FIG."] def initialize(options = {}) options[:smart_filename] = true @minimum = options[:minimum] super(options) end def process_stream(in_io, out_io) results = [] words = [] in_io.read.strip.split("\n").each do |line| new_line = '' #line.gsub!("\n", ' ') line.chars do |char| new_line += char new_line += "\n" if (END_CHAR.include?(char)) end NOT_END_WORDS.each do |word| new_line.gsub!(/#{word}\n/, word) end new_line.gsub!(/\n */, "\n") new_line.strip! new_line.gsub!(/ */, " ") results << new_line end out_io.puts results.join("\n") end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
tefil-0.1.1 | lib/tefil/eachsentence.rb |