Sha256: ce6ad3e502a6c4a144731bcb8cb7e09a12a3783f289c3154d1d2d58a627e2657

Contents?: true

Size: 1.86 KB

Versions: 3

Compression:

Stored size: 1.86 KB

Contents

require 'date'
require 'find'
require 'optparse'

module NewspaperWorks
  module Ingest
    module NDNP
      class BatchIngester
        extend NewspaperWorks::Ingest::FromCommand
        include NewspaperWorks::Logging

        attr_accessor :path, :batch, :opts

        # normalize path, possibly from directory, to contained batch
        #   manifest XML path:
        # @param path [String]
        def self.normalize_path(path)
          return path unless File.directory?(path)
          batch_xml_path = Find.find(path).select do |f|
            f.downcase.end_with?('batch_1.xml', 'batch.xml')
          end
          batch_xml_path.find { |f| f.end_with?('_1.xml') } || batch_xml_path[0]
        end

        # @param path [String] path to batch xml or directory
        # @param opts [Hash]
        #   global ingest options, to be passed to ingester components,
        #   may include administrative metadata.
        def initialize(path, opts = {})
          @path = self.class.normalize_path(path)
          raise IOError, "No batch file found: #{path}" if @path.empty?
          @opts = opts
          @batch = batch_enumerator
          configure_logger('ingest')
        end

        def ingest
          write_log("Beginning NDNP batch ingest for #{@path}")
          batch.each do |issue|
            issue_ingester(issue).ingest
          end
          write_log(
            "NDNP batch ingest complete!"
          )
        end

        private

          # Return BatchIngest object as enumerable of issues:
          def batch_enumerator
            NewspaperWorks::Ingest::NDNP::BatchXMLIngest.new(path)
          end

          def issue_ingester(issue)
            NewspaperWorks::Ingest::NDNP::IssueIngester.new(issue, @opts)
          end

          def normalize_date(v)
            (v.is_a?(String) ? Date.parse(v) : v).to_s
          end
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
newspaper_works-1.0.1 lib/newspaper_works/ingest/ndnp/batch_ingester.rb
newspaper_works-1.0.0 lib/newspaper_works/ingest/ndnp/batch_ingester.rb
newspaper_works-0.1.0 lib/newspaper_works/ingest/ndnp/batch_ingester.rb