Sha256: 4d5b2b8f50bbc4b38a2f79cd2648c0173f147fb87c57a59b253c9792515600c4

Contents?: true

Size: 968 Bytes

Versions: 1

Compression:

Stored size: 968 Bytes

Contents

require "law/japan/e_gov"
require "find"
require "nokogiri"
require "logger"

class Law::Japan::EGov::Converter
  attr_reader :html_dir, :text_dir

  def initialize(html_dir, text_dir)
    @html_dir = html_dir
    @text_dir = text_dir
  end

  def convert!
    logger.info "Start converting all laws"
    convert
    logger.info "Finish converting all laws"
  end

  private

  def logger
    @logger ||= Logger.new STDOUT
  end

  def convert
    Dir.chdir(html_dir) do
      Dir.glob(File.join("**", "*.html")) do |path|
        convert_html(path)
      end
    end
  end

  def convert_html(path)
    dirname = File.dirname(path)
    basename = File.basename(path, ".html")
    target_dir = File.join(text_dir, dirname)
    target_file = File.join(target_dir, "#{basename}.txt")
    logger.info "Converting to #{target_file}"

    FileUtils.mkdir_p target_dir
    text = Nokogiri::HTML(open(path)).css("body").first.text
    File.write(target_file, text)
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
law-japan-0.0.1 lib/law/japan/e_gov/converter.rb