Sha256: a9b24a39266cff18d9fd381365d978d456d75efe5cbc5ffb81d54fe1d1718cc8

Contents?: true

Size: 453 Bytes

Versions: 19

Compression:

Stored size: 453 Bytes

Contents

require 'cgi/util'
htmlfile = 'nisendouka.html'
textfile = 'nisendouka.txt'

html = File.read(htmlfile)

File.open(textfile, 'w') do |f|
  in_header = true
  html.each_line do |line|
    if in_header && /<div class="main_text">/ !~ line
      next
    else
      in_header = false
    end
    break if /<div class="bibliographical_information">/ =~ line
    line.gsub!(/<[^>]+>/, '')
    esc_line = CGI.unescapeHTML(line)
    f.write esc_line
  end
end

Version data entries

19 entries across 19 versions & 1 rubygems

Version Path
ruby_learner-1.2.9 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.8 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.7 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.6 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.5 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.4 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.3 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.2 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.1 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.2.0 contents/takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.15 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.14 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.13 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.11 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.10 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.9 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.8 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.7 takahashi/sample_prog/final_check/cut_nisen.rb
ruby_learner-1.1.6 takahashi/sample_prog/final_check/cut_nisen.rb