import 'org.marc4j.ErrorHandler' require 'logger' $LOG ||= Logger.new(STDERR) module Java::OrgMarc4j::MarcReader include Enumerable # Return the next record, after calling #hashify on it def each(hashify=true) while self.hasNext begin r = self.next # rescue Java::org.marc4j.MarcException => e rescue org.marc4j.MarcException => e unless self.methods.include? 'errors' puts "#{e}" raise e end self.errors.getErrors.each do |err| case err.severity when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR, ErrorHandler::MAJOR_ERROR $LOG.warn err.toString when ErrorHandler::INFO $LOG.info err.toString when ErrorHandler::FATAL $LOG.error err.toString next # skip to the next record end end end r.hashify if hashify yield r end end end module MARC4J4R class Reader ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8'] ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'} # @attr_reader [File] handle The handle of the File (or IO) object being read from attr_reader :handle # Get a marc reader of the appropriate type # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want. # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each) # # @example Get a strict binary MARC reader for the file 'test.mrc' # reader = MARC4J4R::Reader.new('test.mrc') # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default # # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8 # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8) # # @example Get a permissive binary MARC reader # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc) # # @example Get a reader for an xml file # reader = MARC4J4R::Reader.new('test.xml', :marcxml) # # @example Get a reader based on an existing IO object # require 'open-uri' # infile = open('http://my.machine.com/test.mrc') # reader = MARC4J4R::Reader.new(infile) def self.new(input, type = :strictmarc, encoding = nil) if encoding encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding] unless ENCODINGS.include? encoding raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\"" end end @handle = IOConvert.byteinstream(input) case type when :strictmarc then Java::org.marc4j.MarcStreamReader.module_eval("include Enumerable") return Java::org.marc4j.MarcStreamReader.new(@handle, encoding) when :permissivemarc then encoding ||= 'BESTGUESS' Java::org.marc4j.MarcPermissiveStreamReader.module_eval("include Enumerable") return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding) when :marcxml then Java::org.marc4j.MarcXmlReader.module_eval("include Enumerable") return Java::org.marc4j.MarcXmlReader.new(@handle) when :alephsequential then Java::org.marc4j.MarcAlephSequentialReader.module_eval("include Enumerable") return Java::org.marc4j.MarcAlephSequentialReader.new(@handle) else raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential" end end end end