lib/marc4j4r/reader.rb in marc4j4r-0.2.4 vs lib/marc4j4r/reader.rb in marc4j4r-0.9.0

- old
+ new

@@ -11,36 +11,32 @@ end end module MARC4J4R - # Add some sugar to the MarcReader interface - # - # Adjust the interface so that a #new call to any implementations that - # implement it can take a java.io.InputStream, ruby IO obejct, or String - # (that will be interpreted as a filename) without complaining. - # - # The mechanism -- running module_eval on a string-representation of the - # new method in each of the hard-coded implementations of MarcReader - # (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly - # and deeply unsettling. - # - # @author Bill Dueber - + # First, add Enumerable to the interface Java::org.marc4j.MarcReader.module_eval("include Enumerable") - class Reader - + + ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8'] + ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'} + attr_reader :handle + # Get a marc reader of the appropriate type # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want. + # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each) # # @example Get a strict binary MARC reader for the file 'test.mrc' # reader = MARC4J4R::Reader.new('test.mrc') + # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default + # + # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8 + # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8) # # @example Get a permissive binary MARC reader # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc) # # @example Get a reader for an xml file @@ -49,17 +45,23 @@ # @example Get a reader based on an existing IO object # require 'open-uri' # infile = open('http://my.machine.com/test.mrc') # reader = MARC4J4R::Reader.new(infile) - attr_reader :handle - def self.new(input, type = :strictmarc) + def self.new(input, type = :strictmarc, encoding = nil) + if encoding + encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding] + unless ENCODINGS.include? encoding + raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\"" + end + end @handle = IOConvert.byteinstream(input) case type when :strictmarc then - return Java::org.marc4j.MarcStreamReader.new(@handle) + return Java::org.marc4j.MarcStreamReader.new(@handle, encoding) when :permissivemarc then - return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true) + encoding ||= 'BESTGUESS' + return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding) when :marcxml then return Java::org.marc4j.MarcXmlReader.new(@handle) when :alephsequential then return MARC4J4R::AlephSequentialReader.new(@handle) else \ No newline at end of file