require 'benchmark' require '../lib/marc4j4r.rb' curdir = File.dirname(__FILE__) Dir.glob("#{curdir}/../../jruby_marc_to_solr/jars/*.jar") do |x| require x end include_class Java::org.solrmarc.marc.MarcAlephSequentialReader Benchmark.bmbm do |x| x.report("Java AS") do count = 0 3.times do reader = MarcAlephSequentialReader.new(java.io.FileInputStream.new('test.seq'.to_java_string)) reader.each do |r| count += 1 end end # puts "AS read #{count} records" end x.report("strict") do count = 0 3.times do reader = MARC4J4R.reader('test.mrc', :strictmarc) reader.each do |r| count += 1 end end # puts "Strict binary read #{count} records" end x.report("xml") do count = 0 3.times do reader = MARC4J4R.reader('test.xml', :marcxml) reader.each do |r| count += 1 end end # puts "XML read #{count} records" end x.report("alephsequential") do count = 0 3.times do reader = MARC4J4R.reader('test.seq', :alephsequential) reader.each do |r| count += 1 end end # puts "AS read #{count} records" end end __END__ class AlephSequentialReader include Enumerable def initialize(fromwhere) stream = nil if fromwhere.is_a? Java::JavaIO::InputStream stream = fromwhere.to_io elsif fromwhere.is_a? IO stream = fromwhere else stream = File.new(fromwhere) end @handle = stream end def each record = nil currentID = nil @handle.each_line do |l| l.chomp! next unless l =~ /\S/ vals = l.unpack('a9 a a3 c c a3 a*') id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6] # id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*')) if id != currentID if record yield record end record = RecordImpl.new currentID = id end if tag == 'LDR' record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data)) else record << buildField(tag,ind1,ind2,data) end end yield record end SUBREGEXP = /\$\$(.)/ def buildField (tag, ind1, ind2, data) if Java::org.marc4j.marc.impl.Verifier.isControlField tag return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data) else f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2) data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value| f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value) end return f end end end # End of class AlephSequentialReader