Sha256: 4d5b2b8f50bbc4b38a2f79cd2648c0173f147fb87c57a59b253c9792515600c4
Contents?: true
Size: 968 Bytes
Versions: 1
Compression:
Stored size: 968 Bytes
Contents
require "law/japan/e_gov" require "find" require "nokogiri" require "logger" class Law::Japan::EGov::Converter attr_reader :html_dir, :text_dir def initialize(html_dir, text_dir) @html_dir = html_dir @text_dir = text_dir end def convert! logger.info "Start converting all laws" convert logger.info "Finish converting all laws" end private def logger @logger ||= Logger.new STDOUT end def convert Dir.chdir(html_dir) do Dir.glob(File.join("**", "*.html")) do |path| convert_html(path) end end end def convert_html(path) dirname = File.dirname(path) basename = File.basename(path, ".html") target_dir = File.join(text_dir, dirname) target_file = File.join(target_dir, "#{basename}.txt") logger.info "Converting to #{target_file}" FileUtils.mkdir_p target_dir text = Nokogiri::HTML(open(path)).css("body").first.text File.write(target_file, text) end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
law-japan-0.0.1 | lib/law/japan/e_gov/converter.rb |