lib/solrizer/fedora/solrizer.rb in solrizer-fedora-2.0.0.rc2 vs lib/solrizer/fedora/solrizer.rb in solrizer-fedora-2.0.0

- old
+ new

@@ -5,13 +5,10 @@ require 'solrizer/xml' require 'solrizer/html' require 'active_support/core_ext/hash' -require 'fastercsv' # this is used by solrize_objects when you pass it a csv file of pids - - module Solrizer::Fedora class Solrizer ALL_FIELDS = [ :pid, :label, :fType, :cModel, :state, :ownerId, :cDate, :mDate, :dcmDate, :bMech, :title, :creator, :subject, :description, :contributor, @@ -93,50 +90,30 @@ # retrieve a list of all the pids in the fedora repository num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false if @@index_list == false - - objects = find_objects(:limit=>num_docs) - - puts "Shelving #{objects.length} Fedora objects" - objects.each do |object| - solrize( object, opts ) - end - + solrize_from_fedora_search(opts) else - if File.exists?(@@index_list) - arr_of_pids = FasterCSV.read(@@index_list, :headers=>false) - - puts "Indexing from list at #{@@index_list}" - puts "Shelving #{arr_of_pids.length} Fedora objects" - - arr_of_pids.each do |row| - pid = row[0] - solrize( pid ) - end #FASTERCSV - else - puts "#{@@index_list} does not exists!" - end #if File.exists - - end #if Index_LISTS - end #solrize_objects + solrize_from_csv + end + end - def find_objects(*args) - raise ArgumentError, "Missing query string" unless args.length >= 1 - options = args.last.is_a?(Hash) ? args.pop : {} - - params = {} - params[:query] = '' - params[:maxResults] = options[:limit] if options[:limit] - params[:pid] = true - - pids = [] - connections.each do |conn| - response = Hash.from_xml(conn.find_objects(params)) - pids << response["result"]["resultList"]["objectFields"].map{|x| x["pid"]} + def solrize_from_fedora_search(opts) + connections.each do |conn| + conn.search(nil) do |object| + solrize( object.pid, opts ) end - pids.flatten + end + end + + def solrize_from_csv + raise ArgumentException, "#{@@index_list} does not exists!" unless File.exists?(@@index_list) + puts "Indexing from list at #{@@index_list}" + CSV.foreach(@@index_list) do |row| + pid = row[0] + solrize( pid ) + end end private def connections