lib/scrapers/rubytapas/dpdcart.rb in scrapers-3.0.0 vs lib/scrapers/rubytapas/dpdcart.rb in scrapers-3.1.0

- old
+ new

@@ -1,114 +1,150 @@ -require 'netrc' -require 'mechanize' +require "mechanize" +require "netrc" module Scrapers module RubyTapas # DpdCart is a remote service gateway object (Gateway Pattern) # that provides a connection to rubytapas.dpdcart.com where the # RubyTapas episodes and download files are available, as well as # the episode feed. + class DpdCart - RUBYTAPAS_HOST = 'rubytapas.dpdcart.com' - ENV_RUBYTAPAS_USER = 'RUBYTAPAS_USER' - ENV_RUBYTAPAS_PASSWORD = 'RUBYTAPAS_PASSWORD' - LOGIN_PATH = '/subscriber/login' - LOGIN_URL = "https://#{RUBYTAPAS_HOST}#{LOGIN_PATH}" - FEED_PATH = '/feed' - FEED_URL = "https://#{RUBYTAPAS_HOST}#{FEED_PATH}" - CONTENT_PATH = "/subscriber/content" - CONTENT_URL = "https://#{RUBYTAPAS_HOST}#{CONTENT_PATH}" + # NOTE: Updating this since now I have *two* subscriptions that + # use DPD Cart, rubytapas and elixirsips. Generalizing this + # accordingly. :) + # The subscription name will be filled in depending on which + # subscription I'm downloading from. This is a stock sprintf-type + # fill in where you pass in the subscription parameter with a + # value, thusly: + # + # DPDCART_HOST % {subscription: "rubytapas"} + # + DPDCART_HOST_FORMAT = "%{subscription}.dpdcart.com" + ENV_DPDCART_USER_FORMAT = "%{subscription}_USER" + ENV_DPDCART_PASSWORD_FORMAT = "%{subscription}_USER" + LOGIN_PATH = '/subscriber/login' + FEED_PATH = '/feed' + CONTENT_PATH = "/subscriber/content" + + # Subscription at dbdcart + attr_accessor :subscription + # User name for dpdcart account attr_accessor :user # Password for dpdcart acount attr_accessor :password - attr_accessor :dry_run, :debug + def dpdcart_host ; @dpdcart_host ||= DPDCART_HOST_FORMAT % {subscription: subscription} ; end + def env_dpdcart_user ; @env_dpdcart_user ||= ENV_DPDCART_PASSWORD_FORMAT % {subscription: subscription} ; end + def env_dpdcart_password ; @env_dpdcart_password ||= ENV_DPDCART_PASSWORD_FORMAT % {subscription: subscription} ; end + def debug ; @debug ||= options[:debug] ; end + def dry_run ; @dry_run ||= options[:dry_run] ; end + def feed_url ; @feed_url ||= URI("https://#{dpdcart_host}#{FEED_PATH}") ; end + def login_url ; @login_url ||= URI("https://#{dpdcart_host}#{LOGIN_PATH}") ; end # Create a new instance of the DpdCart gateway. # # @param user [String] - the DpdCart account name, typically an # email address. # @param password [String] - password associated with the # account. + # @param subscription [String] - subscription name at DPD Cart + # (e.g. 'rubytapas' or 'elixirsips') # # If the user and password are empty, the information will be # obtained in the following order: # - # - reading the environment variables `RUBYTAPAS_USER` and - # `RUBYTAPAS_PASSWORD` + # - reading the environment variables `<subscriptiion>_USER` and + # `<subscription>_PASSWORD` # + # Note that <subscription> will be the subscription passed in + # above. + # # - reading the user's `$HOME/.netrc` file and pulling the - # credentials that match the host name for the rubytapas + # credentials that match the host name for the subscription # account. # - # If no credentials can be found, it will raise and error: + # If no credentials can be found, it will raise an error: # `NoCredentialsError`. - def initialize(user=nil, password=nil, options={}) - self.dry_run = options[:dry_run] - self.debug = options[:debug] - if user && password - @user = user - @password = password - else - @user, @password = get_credentials_from_environment - unless user && password - @user, @password = get_credentials_from_netrc - end - end + # + def initialize(user=nil, password=nil, subscription='rubytapas', options={}) + self.options = options + self.subscription = subscription + set_user_and_password(user, password) self.agent = Mechanize.new end - # Return the episode feed from dpdcart + # Retreive the episode feed from dpdcart def feed! - uri = URI(FEED_URL) - request = Net::HTTP::Get.new(uri) - request.basic_auth user, password - Net::HTTP.start(uri.host, uri.port, {:use_ssl => true}) {|http| http.request(request)}.body + http_fetch(feed_url) end - # Login to dpdcart before downloading - def login! - page = agent.get LOGIN_URL - page.form.field_with(name: "username").value = user - page.form.field_with(name: "password").value = password - page.form.submit - unless agent.page.title.match(/Subscription Content/) - raise "Could not log in" - end - agent - end - # Download the file from dpdcart def download!(file) + login warn "DEBUG: downloading #{file}" if debug if dry_run warn "DEBUG: download skipped for dry run" if dry_run filename = file body = "no body" else - page = agent.get(file) unless dry_run + page = agent.get(file) filename = page.filename body = page.body end [ filename, body ] end private attr_accessor :options, :agent + def set_user_and_password(user, password) + if user && password + @user = user + @password = password + else + @user, @password = get_credentials_from_environment + unless user && password + @user, @password = get_credentials_from_netrc + end + end + end + def get_credentials_from_environment - [ ENV[ENV_RUBYTAPAS_USER], ENV[ENV_RUBYTAPAS_PASSWORD] ] + [ ENV[env_dpdcart_user], ENV[env_dpdcart_password] ] end def get_credentials_from_netrc - creds = Netrc.read[RUBYTAPAS_HOST] + creds = Netrc.read[dpdcart_host] + if creds.nil? + warn "Could not find credentials for #{dpdcart_host}" + exit -1 + end [ creds.login, creds.password ] + end + + # Login to dpdcart before downloading + def login + page = agent.get login_url + page.form.field_with(name: "username").value = user + page.form.field_with(name: "password").value = password + page.form.submit + unless agent.page.title.match(/Subscription Content/) + raise "Could not log in" + end + agent + end + + def http_fetch(uri) + request = Net::HTTP::Get.new(uri) + request.basic_auth user, password + Net::HTTP.start(uri.host, uri.port, {:use_ssl => true}) {|http| http.request(request)}.body end end end end