lib/harvestdor.rb in harvestdor-0.0.14 vs lib/harvestdor.rb in harvestdor-0.1.0

- old
+ new

@@ -1,121 +1,25 @@ require 'harvestdor/errors' -require 'harvestdor/oai_harvest' require 'harvestdor/purl_xml' require 'harvestdor/version' +require 'harvestdor/client' # external gems require 'confstruct' -require 'oai' # stdlib require 'logger' require 'open-uri' require 'yaml' module Harvestdor - + LOG_NAME_DEFAULT = "harvestdor.log" LOG_DIR_DEFAULT = File.join(File.dirname(__FILE__), "..", "logs") PURL_DEFAULT = 'http://purl.stanford.edu' - HTTP_OPTIONS_DEFAULT = { 'ssl' => { - 'verify' => false - }, - 'request' => { - 'timeout' => 60, # open/read timeout (seconds) - 'open_timeout' => 60 # connection open timeout (seconds) - } - } - OAI_CLIENT_DEBUG_DEFAULT = false - OAI_REPOSITORY_URL_DEFAULT = 'https://dor-oaiprovider-prod.stanford.edu/oai' - DEFAULT_METADATA_PREFIX = 'mods' - DEFAULT_FROM_DATE = nil - DEFAULT_UNTIL_DATE = nil - DEFAULT_SET = nil - - class Client - - # Set default values for the construction of Harvestdor::Client objects - def self.default_config - @class_config ||= Confstruct::Configuration.new({ - :log_dir => LOG_DIR_DEFAULT, - :log_name => LOG_NAME_DEFAULT, - :purl => PURL_DEFAULT, - :http_options => HTTP_OPTIONS_DEFAULT, - :oai_repository_url => OAI_REPOSITORY_URL_DEFAULT, - :oai_client_debug => OAI_CLIENT_DEBUG_DEFAULT, - :default_metadata_prefix => DEFAULT_METADATA_PREFIX, - :default_from_date => DEFAULT_FROM_DATE, - :default_until_date => DEFAULT_UNTIL_DATE, - :default_set => DEFAULT_SET - }) - end - - # Initialize a new instance of Harvestdor::Client - # @param Hash options - # @example - # client = Harvestdor::Client.new({ # Example with all possible options - # :log_dir => File.join(File.dirname(__FILE__), "..", "logs"), - # :log_name => 'harvestdor.log', - # :purl => 'http://purl.stanford.edu', - # :http_options => { 'ssl' => { - # 'verify' => false - # }, - # 'request' => { - # 'timeout' => 30, # open/read timeout (seconds) - # 'open_timeout' => 30 # connection open timeout (seconds) - # } - # }, - # :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai', # The OAI repository to connect to - # :oai_client_debug => false, - # :default_metadata_prefix => 'mods', - # :default_from_date => '2012-12-01', - # :default_until_date => '2014-12-01', - # :default_set => nil, - # }) - def initialize options = {} - config.configure(YAML.load_file(options[:config_yml_path])) if options[:config_yml_path] - config.configure options - yield(config) if block_given? - end - - def config - @config ||= Confstruct::Configuration.new(self.class.default_config) - end - - # @return OAI::Client an instantiated OAI::Client object, based on config options - def oai_client - @oai_client ||= OAI::Client.new config.oai_repository_url, :debug => config.oai_client_debug, :http => oai_http_client - end - - def logger - @logger ||= self.class.logger(config.log_dir, config.log_name) - end - - protected #--------------------------------------------------------------------- - - def oai_http_client - logger.info "Constructing OAI http client with faraday options #{config.http_options.to_hash.inspect}" - @oai_http_client ||= Faraday.new config.oai_repository_url, config.http_options.to_hash - end - - # Global, memoized, lazy initialized instance of a logger - # @param [String] log_dir directory for to get log file - # @param [String] log_name name of log file - def self.logger(log_dir, log_name) - Dir.mkdir(log_dir) unless File.directory?(log_dir) - @logger ||= Logger.new(File.join(log_dir, log_name), 'daily') - end - - end # class Client - - # @param [Object] arg OAI::Header object or OAI::Record object or String (oai identifier) - # @return [String] the druid part of an OAI identifier in an OAI header, e.g. bb134cc1324 - def self.druid(arg) - oai_id = arg - if arg.is_a?(OAI::Header) - oai_id = arg.identifier - elsif arg.is_a?(OAI::Record) - oai_id = arg.header.identifier - end - oai_id.split('druid:').last - end - + HTTP_OPTIONS_DEFAULT = { 'ssl' => { + 'verify' => false + }, + 'request' => { + 'timeout' => 60, # open/read timeout (seconds) + 'open_timeout' => 60 # connection open timeout (seconds) + } +} end # module Harvestdor \ No newline at end of file