lib/solrizer/fedora/solrizer.rb in solrizer-fedora-1.2.5 vs lib/solrizer/fedora/solrizer.rb in solrizer-fedora-2.0.0.rc1
- old
+ new
@@ -4,139 +4,150 @@
require 'solrizer/fedora/indexer'
require 'solrizer/xml'
require 'solrizer/html'
require 'active_support/core_ext/hash'
-# Let people explicitly require xml support if they want it ...
-# require 'solrizer/xml.rb'
require 'fastercsv' # this is used by solrize_objects when you pass it a csv file of pids
module Solrizer::Fedora
-class Solrizer
- ALL_FIELDS = [
- :pid, :label, :fType, :cModel, :state, :ownerId, :cDate, :mDate, :dcmDate,
- :bMech, :title, :creator, :subject, :description, :contributor,
- :date, :type, :format, :identifier, :source, :language, :relation, :coverage, :rights
- ]
+ class Solrizer
+ ALL_FIELDS = [
+ :pid, :label, :fType, :cModel, :state, :ownerId, :cDate, :mDate, :dcmDate,
+ :bMech, :title, :creator, :subject, :description, :contributor,
+ :date, :type, :format, :identifier, :source, :language, :relation, :coverage, :rights
+ ]
- attr_accessor :indexer, :index_full_text
+ attr_accessor :indexer, :index_full_text
- #
- # This method initializes the indexer
- # If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
- #
- def initialize( opts={} )
- @@index_list = false unless defined?(@@index_list)
- if opts[:index_full_text] == true || opts[:index_full_text] == "true"
- @index_full_text = true
- else
- @index_full_text = false
+ #
+ # This method initializes the indexer
+ # If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
+ #
+ def initialize( opts={} )
+ @@index_list = false unless defined?(@@index_list)
+ if opts[:index_full_text] == true || opts[:index_full_text] == "true"
+ @index_full_text = true
+ else
+ @index_full_text = false
+ end
+ @indexer = Indexer.new( :index_full_text=>@index_full_text )
end
- @indexer = Indexer.new( :index_full_text=>@index_full_text )
- end
- # Solrize the given Fedora object's full-text and facets into the search index
- #
- # @param [String or ActiveFedora::Base] obj the object to solrize
- # @param [Hash] opts optional parameters
- # @example Suppress errors using :suppress_errors option
- # solrizer.solrize("my:pid", :suppress_errors=>true)
- def solrize( obj, opts={} )
- # retrieve the Fedora object based on the given unique id
-
+ # Solrize the given Fedora object's full-text and facets into the search index
+ #
+ # @param [String or ActiveFedora::Base] obj the object to solrize
+ # @param [Hash] opts optional parameters
+ # @example Suppress errors using :suppress_errors option
+ # solrizer.solrize("my:pid", :suppress_errors=>true)
+ def solrize( obj, opts={} )
+ # retrieve the Fedora object based on the given unique id
+
begin
-
- start = Time.now
- logger.debug "SOLRIZER Retrieving object #{obj} ..."
+
+ start = Time.now
+ logger.debug "SOLRIZER Retrieving object #{obj} ..."
- if obj.kind_of? ActiveFedora::Base
- # do nothing
- elsif obj.kind_of? String
- obj = Repository.get_object( obj )
- elsif obj.respond_to? :pid
- obj = Repository.get_object( obj.pid )
- else
- raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
- end
-
- obj_done = Time.now
- obj_done_elapse = obj_done - start
- logger.debug " completed. Duration: #{obj_done_elapse}"
+ if obj.kind_of? ActiveFedora::Base
+ # do nothing
+ elsif obj.kind_of? String
+ obj = Repository.get_object( obj )
+ elsif obj.respond_to? :pid
+ obj = Repository.get_object( obj.pid )
+ else
+ raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
+ end
- logger.debug "\t Indexing object #{obj.pid} ... "
- # add the keywords and facets to the search index
- index_start = Time.now
- indexer.index( obj )
-
- index_done = Time.now
- index_elapsed = index_done - index_start
-
- logger.debug "completed. Duration: #{index_elapsed} ."
+ obj_done = Time.now
+ obj_done_elapse = obj_done - start
+ logger.debug " completed. Duration: #{obj_done_elapse}"
+
+ logger.debug "\t Indexing object #{obj.pid} ... "
+ # add the keywords and facets to the search index
+ index_start = Time.now
+ indexer.index( obj )
+
+ index_done = Time.now
+ index_elapsed = index_done - index_start
+
+ logger.debug "completed. Duration: #{index_elapsed} ."
+
-
rescue Exception => e
- if opts[:suppress_errors]
- logger.debug "SOLRIZER unable to index #{obj}. Failed with #{e.inspect}"
- else
- raise e
- end
- end #begin
+ if opts[:suppress_errors]
+ logger.debug "SOLRIZER unable to index #{obj}. Failed with #{e.inspect}"
+ else
+ raise e
+ end
+ end
- end
-
- # Retrieve a comprehensive list of all the unique identifiers in Fedora and
- # solrize each object's full-text and facets into the search index
- #
- # @example Suppress errors using :suppress_errors option
- # solrizer.solrize_objects( :suppress_errors=>true )
- def solrize_objects(opts={})
- # retrieve a list of all the pids in the fedora repository
- num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
- puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
+ end
+
+ # Retrieve a comprehensive list of all the unique identifiers in Fedora and
+ # solrize each object's full-text and facets into the search index
+ #
+ # @example Suppress errors using :suppress_errors option
+ # solrizer.solrize_objects( :suppress_errors=>true )
+ def solrize_objects(opts={})
+ # retrieve a list of all the pids in the fedora repository
+ num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
+ puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
- if @@index_list == false
-
- objects = find_objects(:limit=>num_docs)
+ if @@index_list == false
+
+ objects = find_objects(:limit=>num_docs)
- puts "Shelving #{objects.length} Fedora objects"
- objects.each do |object|
- solrize( object, opts )
- end
-
- else
-
- if File.exists?(@@index_list)
- arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
-
- puts "Indexing from list at #{@@index_list}"
- puts "Shelving #{arr_of_pids.length} Fedora objects"
-
- arr_of_pids.each do |row|
- pid = row[0]
- solrize( pid )
- end #FASTERCSV
- else
- puts "#{@@index_list} does not exists!"
- end #if File.exists
-
- end #if Index_LISTS
- end #solrize_objects
+ puts "Shelving #{objects.length} Fedora objects"
+ objects.each do |object|
+ solrize( object, opts )
+ end
+
+ else
+ if File.exists?(@@index_list)
+ arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
+
+ puts "Indexing from list at #{@@index_list}"
+ puts "Shelving #{arr_of_pids.length} Fedora objects"
+
+ arr_of_pids.each do |row|
+ pid = row[0]
+ solrize( pid )
+ end #FASTERCSV
+ else
+ puts "#{@@index_list} does not exists!"
+ end #if File.exists
+
+ end #if Index_LISTS
+ end #solrize_objects
- def find_objects(*args)
- raise ArgumentError, "Missing query string" unless args.length >= 1
- options = args.last.is_a?(Hash) ? args.pop : {}
+ def find_objects(*args)
+ raise ArgumentError, "Missing query string" unless args.length >= 1
+ options = args.last.is_a?(Hash) ? args.pop : {}
- params = {}
- params[:query] = ''
- params[:maxResults] = options[:limit] if options[:limit]
- params[:pid] = true
- connection = ActiveFedora::RubydoraConnection.instance.connection
- response = Hash.from_xml(connection.find_objects(params))
- response["result"]["resultList"]["objectFields"].map{|x| x["pid"]}
- end
+ params = {}
+ params[:query] = ''
+ params[:maxResults] = options[:limit] if options[:limit]
+ params[:pid] = true
+
+ pids = []
+ connections.each do |conn|
+ response = Hash.from_xml(conn.find_objects(params))
+ pids << response["result"]["resultList"]["objectFields"].map{|x| x["pid"]}
+ end
+ pids.flatten
+ end
+
+ private
+ def connections
+ if ActiveFedora.config.sharded?
+ return ActiveFedora.config.credentials.map { |cred| ActiveFedora::RubydoraConnection.new(cred).connection}
+ else
+ return [ActiveFedora::RubydoraConnection.new(ActiveFedora.config.credentials).connection]
+ end
+ end
+
-end #class
+ end #class
end #module