lib/solrizer/fedora/solrizer.rb in solrizer-fedora-2.0.0.rc2 vs lib/solrizer/fedora/solrizer.rb in solrizer-fedora-2.0.0
- old
+ new
@@ -5,13 +5,10 @@
require 'solrizer/xml'
require 'solrizer/html'
require 'active_support/core_ext/hash'
-require 'fastercsv' # this is used by solrize_objects when you pass it a csv file of pids
-
-
module Solrizer::Fedora
class Solrizer
ALL_FIELDS = [
:pid, :label, :fType, :cModel, :state, :ownerId, :cDate, :mDate, :dcmDate,
:bMech, :title, :creator, :subject, :description, :contributor,
@@ -93,50 +90,30 @@
# retrieve a list of all the pids in the fedora repository
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
if @@index_list == false
-
- objects = find_objects(:limit=>num_docs)
-
- puts "Shelving #{objects.length} Fedora objects"
- objects.each do |object|
- solrize( object, opts )
- end
-
+ solrize_from_fedora_search(opts)
else
- if File.exists?(@@index_list)
- arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
-
- puts "Indexing from list at #{@@index_list}"
- puts "Shelving #{arr_of_pids.length} Fedora objects"
-
- arr_of_pids.each do |row|
- pid = row[0]
- solrize( pid )
- end #FASTERCSV
- else
- puts "#{@@index_list} does not exists!"
- end #if File.exists
-
- end #if Index_LISTS
- end #solrize_objects
+ solrize_from_csv
+ end
+ end
- def find_objects(*args)
- raise ArgumentError, "Missing query string" unless args.length >= 1
- options = args.last.is_a?(Hash) ? args.pop : {}
-
- params = {}
- params[:query] = ''
- params[:maxResults] = options[:limit] if options[:limit]
- params[:pid] = true
-
- pids = []
- connections.each do |conn|
- response = Hash.from_xml(conn.find_objects(params))
- pids << response["result"]["resultList"]["objectFields"].map{|x| x["pid"]}
+ def solrize_from_fedora_search(opts)
+ connections.each do |conn|
+ conn.search(nil) do |object|
+ solrize( object.pid, opts )
end
- pids.flatten
+ end
+ end
+
+ def solrize_from_csv
+ raise ArgumentException, "#{@@index_list} does not exists!" unless File.exists?(@@index_list)
+ puts "Indexing from list at #{@@index_list}"
+ CSV.foreach(@@index_list) do |row|
+ pid = row[0]
+ solrize( pid )
+ end
end
private
def connections