require 'nokogiri' require 'marc/xml_parsers' require 'berkeley_library/util/files' module BerkeleyLibrary module Alma module SRU # A customized XML reader for reading MARC records from SRU search results. class XMLReader include Enumerable include ::MARC::NokogiriReader include BerkeleyLibrary::Util::Files # ############################################################ # Constants NS_SRW = 'http://www.loc.gov/zing/srw/'.freeze NS_MARC = 'http://www.loc.gov/MARC21/slim'.freeze # ############################################################ # Attributes # @return [Integer, nil] the record identifier of the most recently parsed record, if any attr_reader :last_record_id # @return [Integer, nil] the record position of the most recently parsed record, if any attr_reader :last_record_position # @return [Integer, nil] the next record position, if present attr_reader :next_record_position # Returns the total number of records, based on the `` tag # returned in the SRU response. # # Note that the total is not guaranteed to be present, and if present, # may not be present unless at least some records have been parsed. # # @return [Integer, nil] the total number of records, or `nil` if the total has not been read yet def num_records @num_records&.to_i end # Returns the number of records yielded. # # @return [Integer] the number of records yielded. def records_yielded @records_yielded ||= 0 end # ############################################################ # Initializer def initialize(source, freeze: false) @handle = ensure_io(source) @freeze = freeze init end class << self # Reads MARC records from an XML datasource given either as an XML string, a file path, # or as an IO object. # # @param source [String, Pathname, IO] an XML string, the path to a file, or an IO to read from directly # @param freeze [Boolean] whether to freeze each record after reading def read(source, freeze: false) new(source, freeze: freeze) end end # ############################################################ # MARC::GenericPullParser overrides def yield_record @record[:record].tap do |record| record.freeze if @freeze end super ensure increment_records_yielded! end # ############################################################ # Nokogiri::XML::SAX::Document overrides # @see Nokogiri::XML::Sax::Document#start_element_namespace # rubocop:disable Metrics/ParameterLists def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) super @current_element_ns = uri @current_element_name = name end # rubocop:enable Metrics/ParameterLists # @see Nokogiri::XML::Sax::Document#end_element_namespace def end_element_namespace(name, prefix = nil, uri = nil) # Delay yielding record till we reach the end of the outer SRU # element (not the inner MARC element), so we can record the # values of and if name.downcase == 'record' yield_record if uri == NS_SRW elsif uri == NS_MARC super end @current_element_name = nil end # @see Nokogiri::XML::Sax::Document#characters # rubocop:disable Metrics/MethodLength def characters(string) return super unless NS_SRW == @current_element_ns return unless (name = @current_element_name) case name when 'numberOfRecords' @num_records = string when 'recordIdentifier' @last_record_id = string when 'recordPosition' @last_record_position = string.to_i when 'nextRecordPosition' @next_record_position = string.to_i end end # rubocop:enable Metrics/MethodLength # ############################################################ # Private private def ensure_io(file) return file if reader_like?(file) return File.new(file) if file_exists?(file) return StringIO.new(file) if file =~ /^\s*