unless defined? JRUBY_VERSION raise "Only works under JRUBY" end begin include_class Java::org.marc4j.marc.impl.RecordImpl rescue NameError => e jardir = File.join(File.dirname(__FILE__), '..', 'jars') require "#{jardir}/marc4j.jar" end require 'set' module MARC4J4R # Add some sugar to the MarcReader interface # # Adjust the interface so that a #new call to any implementations that # implement it can take a java.io.InputStream, ruby IO obejct, or String # (that will be interpreted as a filename) without complaining. # # The mechanism -- running module_eval on a string-representation of the # new method in each of the hard-coded implementations of MarcReader # (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly # and deeply unsettling. # # @author Bill Dueber # # A string used to override the initializer for each stream reader # Need to do it this ugly way because of the way java and ruby interact; # can't just add it to the MarcReader interface the way I wanted to. NEWINIT = <<-ENDBINDER alias_method :oldinit, :initialize def initialize(fromwhere) stream = nil if fromwhere.is_a? Java::JavaIO::InputStream stream = fromwhere elsif fromwhere.is_a? IO stream = fromwhere.to_inputstream else stream = java.io.FileInputStream.new(fromwhere.to_java_string) end if self.class == Java::org.marc4j.MarcPermissiveStreamReader self.oldinit(stream, true, true) else self.oldinit(stream) end end ENDBINDER Java::org.marc4j.MarcStreamReader.module_eval(NEWINIT) Java::org.marc4j.MarcPermissiveStreamReader.module_eval(NEWINIT) Java::org.marc4j.MarcXmlReader.module_eval(NEWINIT) # Get a marc reader of the appropriate type # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want. # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each) # # @example Get a strict binary MARC reader for the file 'test.mrc' # reader = MARC4J4R.reader('test.mrc') # # @example Get a permissive binary MARC reader # reader = MARC4J4R.reader('test.mrc', :permissivemarc) # # @example Get a reader for an xml file # reader = MARC4J4R.reader('test.xml', :marcxml) # # @example Get a reader based on an existing IO object # require 'open-uri' # infile = open('http://my.machine.com/test.mrc') # reader = MARC4J4R.reader(infile) def reader(input, type = :strictmarc) case type when :strictmarc return Java::org.marc4j.MarcStreamReader.new(input) when :permissivemarc return Java::org.marc4j.MarcPermissiveStreamReader.new(input) when :marcxml return Java::org.marc4j.MarcXmlReader.new(input) else raise ArgumentError, "Reader type must be :strictmarc, :permissivemarc, or :marcxml" end end module_function :reader end # Re-open the MarcReader interface, define #each and include Enumerable # # We also automatically call #hashify on the records that stream through # #each in order to speed up RecordImpl#[] when (a) doing many operations on a single # record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed # by another 520) module Java::OrgMarc4j::MarcReader include Enumerable # Return the next record, after calling #hashify on it def each while self.hasNext r = self.next r.hashify yield r end end end include_class Java::org.marc4j.marc.impl::RecordImpl include_class Java::org.marc4j.marc.impl::ControlFieldImpl include_class Java::org.marc4j.marc.impl::DataFieldImpl include_class Java::org.marc4j.marc.impl::SubfieldImpl # Open up RecordImpl to add some sugar, including Enumberable as well # @author Bill Dueber class RecordImpl include Enumerable # Create a local hash by tag number; makes some stuff faster # Called automatically if you use reader.each def hashify return if @hashedtags # don't do it more than once @hashedtags = {} self.getVariableFields.each do |f| @hashedtags[f.tag] ||= [] @hashedtags[f.tag].push f end end # Create a nice string of the record def to_s arr = ['LEADER ' + self.leader] self.each do |f| arr.push f.to_s end return arr.join("\n") end # Get the leader as a string (marc4j would otherwise return Leader object) def leader self.get_leader.toString end # Cycle through the fields in the order the appear in the record def each self.getVariableFields.each do |f| yield f end end # Get the first field associated with a tag # @param [String] tag The tag # @return [Field] The first matching field, or nil if none. Note that # to mirror ruby-marc, this returns a single field def [] tag if defined? @hashedtags return @hashedtags[tag][0] else return self.getVariableField(tag) end end # Get a (possibly empty) list of fields with the given tag(s) # # @param [String, Array] tags A string (or Array of strings) with the tags you're interested in # @param [Boolean] originalorder Whether or not results should be presented in the original order within the # record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag # in the record # @return [Array] Either an empty list or a list of one or more matched fields will be returned. # # originalorder == false will use an internal hash and be faster in many cases (see #hashify) # # @example originalorder == false # # Given a record that looks like # # 010 $a 68027371 # # 035 $a (RLIN)MIUG0001728-B # # 035 $a (CaOTULAS)159818044 # # 035 $a (OCoLC)ocm00001728 # # r.find_by_tag(['035', '010']).each {|f| puts f.to_s} # # 035 $a (RLIN)MIUG0001728-B # # 035 $a (CaOTULAS)159818044 # # 035 $a (OCoLC)ocm00001728 # # 010 $a 68027371 # # # The results are ordered first by tag as passed in, then by original order within the tag # # @example Just get all fields for a single tag # ohThirtyFives = r.find_by_tag('035') # # @example Get a bunch of standard identifiers # standardIDs = r.find_by_tag(['022', '020', '010']) # # @example originalorder == true # r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s} # # 010 $a 68027371 # # 035 $a (RLIN)MIUG0001728-B # # 035 $a (CaOTULAS)159818044 # # 035 $a (OCoLC)ocm00001728 def find_by_tag(tags, originalorder = false) self.hashify unless @hashedtags if !tags.is_a? Array return @hashedtags[tags] || [] end if originalorder return self.find_all {|f| tags.include? f.tag} else # puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}" return @hashedtags.values_at(*tags).flatten.compact end end # Return the record as valid MARC-XML # @return String A MARC-XML representation of the record, including the XML header def to_xml return @xml if @xml @xml = java.io.StringWriter.new res = javax.xml.transform.stream.StreamResult.new(@xml) writer = org.marc4j.MarcXmlWriter.new(res) writer.write(self) return @xml.toString end end class ControlFieldImpl def value return self.data end # Pretty-print # @param [String] joiner What string to use to join the subfields # @param [String] The pretty string def to_s return self.tag + " " + self.value end end class DataFieldImpl include Enumerable # Pretty-print # @param [String] joiner What string to use to join the subfields # @param [String] The pretty string def to_s (joiner = ' ') arr = [self.tag + ' ' + self.indicator1 + self.indicator2] self.each do |s| arr.push s.to_s end return arr.join(joiner) end # Get the value of the first subfield of this field with the given code # @param [String] code 1-character string of the subfield code # @return [String] The value of the first matched subfield def [] code raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1 # note that code[0] is just converting the one-character string into an integer # char value that the underlying java can deal with self.getSubfield(code[0]).getData end # Get all values from the subfields for the given code or array of codes # @param [String, Array] code (Array of?) 1-character string(s) of the subfield code # @param [Boolean] myorder Use the order of subfields that I gave instead of the order they're in the record # @return [Array] A possibly-empty array of Strings made up of the values in the subfields whose # code is included in the given codes. If myorder == true, use the order in which they are passed in; if a code is repeated # (ocassionally legal) subfield values will appear first ordered by the passed array, then by order within # the document. # # If myorder is false, just return the values for matching subfields in the order they appear in the field. # # @example Quick examples: # # 260 $a New York, $b Van Nostrand Reinhold Co. $c 1969 # rec['260'].sub_values('a') #=> ["New York,"] # rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"] # rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"] # rec['260'].sub_values(['c', 'a'], true) #=> ["1969", "New York"] def sub_values(code, myorder = false) # Do a little razzle-dazzle for the common case when a single code is given if not [Set, Array].include? code.class c = code elsif code.size == 1 c = code.first end if c return self.find_all { |s| c == s.code}.map {|s| s.data} end # unless [Set, Array].include? code.class # code = [code] # # puts "Arrayified for code #{code} / #{code.class}" # end if myorder subs = [] code.each do |c| subs << self.find_all {|s| c == s.code} end return subs.flatten.map {|s| s.data} else return self.find_all{|s| code.include? s.code}.map {|s| s.data} end end # Get first indicator as a one-character string def indicator1 return self.getIndicator1.chr end # Get second indicator as a one-character string def indicator2 return self.getIndicator2.chr end # Iterate over the subfields def each self.getSubfields.each do |s| yield s end end # Get the concatentated values of the subfields in order the appear in the field # @param [String] joiner The string used to join the subfield values def value joiner=' ' data = self.getSubfields.map {|s| s.data} return data.join(joiner) end end class SubfieldImpl def value return self.data end def code return self.getCode.chr end def to_s return '$' + self.code + " " + self.data end end