Sha256: cf52e45d525f092baeea3f7654dd31063da0241fa808fb5a2c70849c4f023594
Contents?: true
Size: 1.92 KB
Versions: 1
Compression:
Stored size: 1.92 KB
Contents
require "law/japan/e_gov" require "mechanize" class Law::Japan::EGov::Downloader module Mechanize::Form::Clearable refine Mechanize::Form do def clear_buttons @clicked_buttons = [] end end end using Mechanize::Form::Clearable attr_reader :root_dir def initialize(root_dir) @root_dir = root_dir end def download! logger.info "Start downloading all laws" download logger.info "Finish downloading all laws" end private def logger @logger ||= Logger.new STDOUT end def agent unless @agent @agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" } @agent.log = Logger.new STDOUT end @agent end def index_page @index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi") end def category_form index_page.forms_with(name: "index")[2] end def download category_form.buttons.each do |button| category_name = button.node.next.text.gsub(/[ ]+/, "") category_form.clear_buttons list_page = agent.submit(category_form, button) sleep 1 list_page.links.each do |link| law_name = link.text h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first if h_file_name =~ /^([MTSH]\d{2})/ law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html" law_file = File.join(root_dir, category_name, "#{h_file_name}.html") if File.exists? law_file logger.info "File already exists for #{law_name} (#{law_file})" else logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})" agent.download(law_url, law_file) logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})" sleep 2 end else logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}" end end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
law-japan-0.0.1 | lib/law/japan/e_gov/downloader.rb |