lib/marc4j4r/reader.rb in marc4j4r-0.2.4 vs lib/marc4j4r/reader.rb in marc4j4r-0.9.0
- old
+ new
@@ -11,36 +11,32 @@
end
end
module MARC4J4R
- # Add some sugar to the MarcReader interface
- #
- # Adjust the interface so that a #new call to any implementations that
- # implement it can take a java.io.InputStream, ruby IO obejct, or String
- # (that will be interpreted as a filename) without complaining.
- #
- # The mechanism -- running module_eval on a string-representation of the
- # new method in each of the hard-coded implementations of MarcReader
- # (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly
- # and deeply unsettling.
- #
- # @author Bill Dueber
-
+
# First, add Enumerable to the interface
Java::org.marc4j.MarcReader.module_eval("include Enumerable")
-
class Reader
-
+
+ ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
+ ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
+ attr_reader :handle
+
# Get a marc reader of the appropriate type
# @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
# @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
+ # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
# @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
#
# @example Get a strict binary MARC reader for the file 'test.mrc'
# reader = MARC4J4R::Reader.new('test.mrc')
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
+ #
+ # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
#
# @example Get a permissive binary MARC reader
# reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
#
# @example Get a reader for an xml file
@@ -49,17 +45,23 @@
# @example Get a reader based on an existing IO object
# require 'open-uri'
# infile = open('http://my.machine.com/test.mrc')
# reader = MARC4J4R::Reader.new(infile)
- attr_reader :handle
- def self.new(input, type = :strictmarc)
+ def self.new(input, type = :strictmarc, encoding = nil)
+ if encoding
+ encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
+ unless ENCODINGS.include? encoding
+ raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
+ end
+ end
@handle = IOConvert.byteinstream(input)
case type
when :strictmarc then
- return Java::org.marc4j.MarcStreamReader.new(@handle)
+ return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
when :permissivemarc then
- return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true)
+ encoding ||= 'BESTGUESS'
+ return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
when :marcxml then
return Java::org.marc4j.MarcXmlReader.new(@handle)
when :alephsequential then
return MARC4J4R::AlephSequentialReader.new(@handle)
else
\ No newline at end of file