# frozen_string_literal: true require 'date' require 'json' require 'nokogiri' require_relative 'curlable' # Design decison to not use a class as only 'state' is in 2 env vars module Spn2 extend Curlable BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.' ERROR_CODES = [502].freeze class Spn2Error < StandardError; end class Spn2ErrorBadAuth < Spn2Error; end class Spn2ErrorFailedCapture < Spn2Error; end class Spn2ErrorInvalidOption < Spn2Error; end class Spn2ErrorMissingKeys < Spn2Error; end class Spn2ErrorNoOutlinks < Spn2Error; end class Spn2ErrorTooManyRequests < Spn2Error; end class Spn2ErrorUnknownResponse < Spn2Error; end class Spn2ErrorUnknownResponseCode < Spn2Error; end ERROR_CODES.each { |i| Spn2.const_set("Spn2Error#{i}", Class.new(Spn2Error)) } ESSENTIAL_STATUS_KEYS = %w[job_id resources status].freeze JOB_ID_REGEXP = /^(spn2-([a-f]|\d){40})$/ WEB_ARCHIVE = 'https://web.archive.org' BINARY_OPTS = %w[capture_all capture_outlinks capture_screenshot delay_wb_availabilty force_get skip_first_archive outlinks_availability email_result].freeze OTHER_OPTS = %w[if_not_archived_within js_behavior_timeout capture_cookie target_username target_password].freeze class << self def error_classes Spn2.constants.map { |e| Spn2.const_get(e) }.select { |e| e.is_a?(Class) && e < Exception } end def access_key ENV.fetch('SPN2_ACCESS_KEY') end def secret_key ENV.fetch('SPN2_SECRET_KEY') end def system_status json get(url: "#{WEB_ARCHIVE}/save/status/system") # no auth end def user_status json auth_get(url: "#{WEB_ARCHIVE}/save/status/user?t=#{DateTime.now.strftime('%Q').to_i}") end def save(url:, opts: {}) raise Spn2ErrorInvalidOption, "One or more invalid options: #{opts}" unless options_valid?(opts) json = json(auth_post(url: "#{WEB_ARCHIVE}/save/#{url}", params: { url: url }.merge(opts))) raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG raise Spn2ErrorFailedCapture, json.inspect unless json['job_id'] json end alias capture save def status_job_id(job_id:) json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_id: job_id })) raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty? json end alias status status_job_id def status_job_ids(job_ids:) json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_ids: job_ids })) raise Spn2Error, json.inspect unless json.is_a? Array json end alias statuses status_job_ids def status_job_id_outlinks(job_id:) json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_id_outlinks: job_id })) raise Spn2ErrorNoOutlinks, json.inspect unless json.is_a? Array json end alias status_outlinks status_job_id_outlinks private def auth_get(url:) get(url: url, headers: accept_header.merge(auth_header)) end def auth_post(url:, params: {}) post(url: url, headers: accept_header.merge(auth_header), params: params) end def accept_header { Accept: 'application/json' } end def auth_header { Authorization: "LOW #{Spn2.access_key}:#{Spn2.secret_key}" } end def doc(html_string) Nokogiri::HTML html_string end def json(html_string) JSON.parse(doc = doc(html_string)) rescue JSON::ParserError # an html response & therefore an error parse_error_code_from_page_title(doc.title) if doc.title parse_error_from_page_body(html_string) # if no title parse body end def parse_error_code_from_page_title(title_string) code = title_string.to_i raise Spn2.const_get("Spn2Error#{code}") if ERROR_CODES.include? code raise Spn2ErrorUnknownResponseCode end def parse_error_from_page_body(html_string) h1 = doc(html_string).xpath('//h1') raise Spn2ErrorTooManyRequests if !h1.empty? && h1.text == 'Too Many Requests' raise Spn2ErrorUnknownResponse, html_string # fall through end def options_valid?(opts) opts.keys.all? { |k| (BINARY_OPTS + OTHER_OPTS).include? k.to_s } end end end