# frozen_string_literal: true require 'uri' require 'cgi' require 'json' require 'mechanize' require 'logger' require 'English' require 'io/console' module AtCoderFriends # scrapes AtCoder contest site and # - fetches problems # - submits sources # - runs tests on custom_test page class ScrapingAgent include PathUtil BASE_URL = 'https://atcoder.jp/' XPATH_SECTION = '//h3[.="%s"]/following-sibling::section' XPATH_USERNAME = '//*[@id="navbar-collapse"]/ul[2]/li[2]/a' SESSION_STORE = File.join(Dir.home, '.at_coder_friends', '%<user>s_session.yml') attr_reader :ctx, :agent def initialize(ctx) @ctx = ctx @agent = Mechanize.new agent.pre_connect_hooks << proc { sleep 0.1 } agent.log = Logger.new(STDERR) if ctx.options[:debug] agent.cookie_jar.load(session_store) if File.exist?(session_store) end def save_session dir = File.dirname(session_store) Dir.mkdir(dir) unless Dir.exist?(dir) agent.cookie_jar.save_as(session_store) end def contest @contest ||= contest_name(ctx.path) end def config ctx.config end def common_url(path) File.join(BASE_URL, path) end def contest_url(path = '') File.join(BASE_URL, 'contests', contest, path) end def session_store @session_store ||= format(SESSION_STORE, user: config['user']) end def constraints_pat config['constraints_pat'] || '^制約$' end def input_fmt_pat config['input_fmt_pat'] || '^入出?力$' end def input_smp_pat config['input_smp_pat'] || '^入力例\s*(?<no>[\d0-9]+)$' end def output_smp_pat config['output_smp_pat'] || '^出力例\s*(?<no>[\d0-9]+)$' end def fetch_with_auth(url) begin page = agent.get(url) rescue Mechanize::ResponseCodeError => e raise e unless e.response_code == '404' raise e if username_link(e.page) page = agent.get(common_url('login') + '?continue=' + CGI.escape(url)) end if page.uri.path == '/login' user, pass = read_auth form = page.forms[1] form.field_with(name: 'username').value = user form.field_with(name: 'password').value = pass page = form.submit end page.uri.path == '/login' && (raise AppError, 'Authentication failed.') show_username(page) page end def read_auth user = config['user'].to_s if user.empty? print('Enter username:') user = STDIN.gets.chomp end pass = config['password'].to_s if pass.empty? print("Enter password for #{user}:") pass = STDIN.noecho(&:gets).chomp puts end [user, pass] end def show_username(page) username_old = @username link = username_link(page) @username = (link ? link.text.strip : '-') return if @username == username_old || @username == '-' puts "Logged in as #{@username}" end def username_link(page) link = page.search(XPATH_USERNAME)[0] link && link[:href] == '#' && link end def fetch_all puts "***** fetch_all #{contest} *****" fetch_assignments.map do |q, url| pbm = fetch_problem(q, url) yield pbm if block_given? pbm end end def fetch_assignments url = contest_url('tasks') puts "fetch list from #{url} ..." page = fetch_with_auth(url) page .search('//table[1]//td[1]//a') .each_with_object({}) do |a, h| h[a.text] = a[:href] end end def fetch_problem(q, url) puts "fetch problem from #{url} ..." page = fetch_with_auth(url) Problem.new(q) do |pbm| pbm.html = page.body if contest == 'arc001' page.search('//h3').each do |h3| query = format(XPATH_SECTION, title: h3.content) sections = page.search(query) sections[0] && parse_section(pbm, h3, sections[0]) end else page.search('//*[./h3]').each do |section| h3 = section.search('h3')[0] parse_section(pbm, h3, section) end end end end def parse_section(pbm, h3, section) title = h3.content.strip title.delete!("\u008f\u0090") # agc002 text = section.content code = section.search('pre')[0]&.content || '' case title when /#{constraints_pat}/ pbm.desc += text when /#{input_fmt_pat}/ pbm.desc += text pbm.fmt = code when /#{input_smp_pat}/ pbm.add_smp($LAST_MATCH_INFO[:no], :in, code) when /#{output_smp_pat}/ pbm.add_smp($LAST_MATCH_INFO[:no], :exp, code) end end def submit path, _dir, prg, _base, ext, q = split_prg_path(ctx.path) puts "***** submit #{prg} *****" src = File.read(path, encoding: Encoding::UTF_8) page = fetch_with_auth(contest_url('submit')) form = page.forms[1] form.field_with(name: 'data.TaskScreenName') do |sel| option = sel.options.find { |op| op.text.start_with?(q) } option&.select || (raise AppError, "unknown problem:#{q}.") end form.add_field!('data.LanguageId', lang_id(ext)) form.field_with(name: 'sourceCode').value = src form.submit end def code_test(infile) path, _dir, _prg, _base, ext, _q = split_prg_path(ctx.path) src = File.read(path, encoding: Encoding::UTF_8) data = File.read(infile) page = fetch_with_auth(contest_url('custom_test')) script = page.search('script').text csrf_token = script.scan(/var csrfToken = "(.*)"/)[0][0] page = agent.post( contest_url('custom_test/submit/json'), 'data.LanguageId' => lang_id(ext), 'sourceCode' => src, 'input' => data, 'csrf_token' => csrf_token ) msg = page.body raise AppError, msg unless msg.empty? 100.times do page = agent.get(contest_url('custom_test/json?reload=true')) data = JSON.parse(page.body) return nil unless data.is_a?(Hash) && data['Result'] return data if data.dig('Result', 'Status') == 3 return data unless data['Interval'] sleep 1.0 * data['Interval'] / 1000 end nil end def lang_list @lang_list ||= begin page = fetch_with_auth(contest_url('custom_test')) form = page.forms[1] sel = form.field_with(name: 'data.LanguageId') sel && sel .options .reject { |opt| opt.value.empty? } .map do |opt| { v: opt.value, t: opt.text } end end end def lang_list_txt lang_list &.map { |opt| "#{opt[:v]} - #{opt[:t]}" } &.join("\n") end def lang_id(ext) config.dig('ext_settings', ext, 'submit_lang') || ( msg = <<~MSG submit_lang for .#{ext} is not specified. Available languages: #{lang_list_txt || '(failed to fetch)'} MSG raise AppError, msg ) end end end