Sha256: cf52e45d525f092baeea3f7654dd31063da0241fa808fb5a2c70849c4f023594

Contents?: true

Size: 1.92 KB

Versions: 1

Compression:

Stored size: 1.92 KB

Contents

require "law/japan/e_gov"
require "mechanize"

class Law::Japan::EGov::Downloader
  module Mechanize::Form::Clearable
    refine Mechanize::Form do
      def clear_buttons
        @clicked_buttons = []
      end
    end
  end
  using Mechanize::Form::Clearable

  attr_reader :root_dir
  def initialize(root_dir)
    @root_dir = root_dir
  end

  def download!
    logger.info "Start downloading all laws"
    download
    logger.info "Finish downloading all laws"
  end

  private

  def logger
    @logger ||= Logger.new STDOUT
  end

  def agent
    unless @agent
      @agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" }
      @agent.log = Logger.new STDOUT
    end
    @agent
  end

  def index_page
    @index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi")
  end

  def category_form
    index_page.forms_with(name: "index")[2]
  end

  def download
    category_form.buttons.each do |button|
      category_name = button.node.next.text.gsub(/[  ]+/, "")

      category_form.clear_buttons
      list_page = agent.submit(category_form, button)
      sleep 1

      list_page.links.each do |link|
        law_name = link.text
        h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first
        if h_file_name =~ /^([MTSH]\d{2})/
          law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html"
          law_file = File.join(root_dir, category_name, "#{h_file_name}.html")
          if File.exists? law_file
            logger.info "File already exists for #{law_name} (#{law_file})"
          else
            logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
             agent.download(law_url, law_file)
            logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
             sleep 2
          end
        else
          logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}"
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
law-japan-0.0.1 lib/law/japan/e_gov/downloader.rb