module Sunspot #:nodoc: module Rails #:nodoc: # # This module adds Sunspot functionality to ActiveRecord models. As well as # providing class and instance methods, it optionally adds lifecycle hooks # to automatically add and remove models from the Solr index as they are # created and destroyed. # module Searchable class <Sunspot.setup method. See the Sunspot documentation for # complete information on the functionality provided by that method. # # ==== Options (+options+) # # :auto_index:: # Automatically index models in Solr when they are saved. # Default: true # :auto_remove:: # Automatically remove models from the Solr index when they are # destroyed. Setting this option to +false+ is not recommended # (see the README). # :ignore_attribute_changes_of:: # Define attributes, that should not trigger a reindex of that # object. Usual suspects are updated_at or counters. # :include:: # Define default ActiveRecord includes, set this to allow ActiveRecord # to load required associations when indexing. See ActiveRecord's # documentation on eager-loading for examples on how to set this # Default: [] # :paginate:: # Retreive models using page numbers rather than database-style limit # and offset. Assumes the total count of models cannot be determined # without performing at least one request. # Default: false # :max_batch_size:: # Override the command line specified batch size that is used for # indexing on a per-model basis. # # ==== Example # # class Post < ActiveRecord::Base # searchable do # text :title, :body # string :sort_title do # title.downcase.sub(/^(an?|the)/, '') # end # integer :blog_id # time :updated_at # end # end # def searchable(options = {}, &block) Sunspot.setup(self, &block) if searchable? sunspot_options[:include].concat(Util::Array(options[:include])) else extend ClassMethods include InstanceMethods class_inheritable_hash :sunspot_options unless options[:auto_index] == false before_save :maybe_mark_for_auto_indexing after_save :maybe_auto_index end unless options[:auto_remove] == false after_destroy do |searchable| searchable.remove_from_index end end options[:include] = Util::Array(options[:include]) self.sunspot_options = options end end # # This method is defined on all ActiveRecord::Base subclasses. It # is false for classes on which #searchable has not been called, and # true for classes on which #searchable has been called. # # ==== Returns # # +false+ # def searchable? false end end module ClassMethods def self.extended(base) #:nodoc: class < [:blog]) do # keywords 'best pizza' # with :blog_id, 1 # order :updated_at, :desc # facet :category_ids # end # # ==== Options # # :include:: Specify associations to eager load # :select:: Specify columns to select from database when loading results # # ==== Returns # # Sunspot::Search:: Object containing results, totals, facets, etc. # def solr_search(options = {}, &block) solr_execute_search(options) do Sunspot.new_search(self, &block) end end # # Get IDs of matching results without loading the result objects from # the database. This method may be useful if search is used as an # intermediate step in a larger find operation. The block is the same # as the block provided to the #search method. # # ==== Returns # # Array:: Array of IDs, in the order returned by the search # def solr_search_ids(&block) solr_execute_search_ids do solr_search(&block) end end # # Remove instances of this class from the Solr index. # def solr_remove_all_from_index Sunspot.remove_all(self) end # # Remove all instances of this class from the Solr index and immediately # commit. # # def solr_remove_all_from_index! Sunspot.remove_all!(self) end # # Completely rebuild the index for this class. First removes all # instances from the index, then loads records and indexes them. # # See #index for information on options, etc. # def solr_reindex(options = {}) solr_remove_all_from_index solr_index(options) end # # Add/update all existing records in the Solr index. The # +batch_size+ argument specifies how many records to load out of the # database at a time. The default batch size is 500; if nil is passed, # records will not be indexed in batches. By default, a commit is issued # after each batch; passing +false+ for +batch_commit+ will disable # this, and only issue a commit at the end of the process. If associated # objects need to indexed also, you can specify +include+ in format # accepted by ActiveRecord to improve your sql select performance # # ==== Options (passed as a hash) # # batch_size:: Batch size with which to load records. Passing # 'nil' will skip batches. Default is 500. # batch_commit:: Flag signalling if a commit should be done after # after each batch is indexed, default is 'true' # include:: include option to be passed to the ActiveRecord find, # used for including associated objects that need to be # indexed with the parent object, accepts all formats # ActiveRecord::Base.find does # first_id:: The lowest possible ID for this class. Defaults to 0, which # is fine for integer IDs; string primary keys will need to # specify something reasonable here. # # ==== Examples # # # index in batches of 500, commit after each # Post.index # # # index all rows at once, then commit # Post.index(:batch_size => nil) # # # index in batches of 500, commit when all batches complete # Post.index(:batch_commit => false) # # # include the associated +author+ object when loading to index # Post.index(:include => :author) # def solr_index(opts={}) if self.sunspot_options[:paginate] solr_index_paged(opts) else solr_index_batched(opts) end end # # The default method of indexing records into the Solr index. Performs # batching using the ID of the last retrieved records and a limit. Ideal # for use with database/activerecord etc. # def solr_index_batched(opts={}) options = { :batch_size => 500, :batch_commit => true, :include => self.sunspot_options[:include], :first_id => 0}.merge(opts) unless options[:batch_size] Sunspot.index!(all(:include => options[:include])) else offset = 0 counter = 1 record_count = count last_id = options[:first_id] while(offset < record_count) solr_benchmark options[:batch_size], counter do records = find(:all, :include => options[:include], :conditions => ["#{table_name}.#{primary_key} > ?", last_id], :limit => options[:batch_size], :order => primary_key) Sunspot.index(records) last_id = records.last.id end Sunspot.commit if options[:batch_commit] offset += options[:batch_size] counter += 1 end Sunspot.commit unless options[:batch_commit] end end # # Index records into Solr using pagintated pattern. Ideal for when you # won't know the total number of records until after the first batch is # retreived. Use with activeresource/pulling records from a webservice # etc. # def solr_index_paged(opts={}) options = { :batch_size => 500, :batch_commit => true, :include => self.sunspot_options[:include], :first_id => 0}.merge(opts) options[:batch_size] = self.sunspot_options[:max_batch_size] if options[:batch_size] > self.sunspot_options[:max_batch_size] unless options[:batch_size] Sunspot.index!(all(:include => options[:include])) else page = 0 per_page = options[:batch_size] total_entries = options[:batch_size] while ((page * per_page) < total_entries) records = find(:all, :include => options[:include], :params => { :page => page + 1, :per_page => per_page }) Sunspot.index(records) Sunspot.commit if options[:batch_commit] total_entries = records.total_entries page += 1 end Sunspot.commit unless options[:batch_commit] end end # # Return the IDs of records of this class that are indexed in Solr but # do not exist in the database. Under normal circumstances, this should # never happen, but this method is provided in case something goes # wrong. Usually you will want to rectify the situation by calling # #clean_index_orphans or #reindex # # ==== Returns # # Array:: Collection of IDs that exist in Solr but not in the database def solr_index_orphans count = self.count indexed_ids = solr_search_ids { paginate(:page => 1, :per_page => count) }.to_set all(:select => 'id').each do |object| indexed_ids.delete(object.id) end indexed_ids.to_a end # # Find IDs of records of this class that are indexed in Solr but do not # exist in the database, and remove them from Solr. Under normal # circumstances, this should not be necessary; this method is provided # in case something goes wrong. # def solr_clean_index_orphans solr_index_orphans.each do |id| new do |fake_instance| fake_instance.id = id end.solr_remove_from_index end end # # Classes that have been defined as searchable return +true+ for this # method. # # ==== Returns # # +true+ # def searchable? true end def solr_execute_search(options = {}) options.assert_valid_keys(:include, :select) search = yield unless options.empty? search.build do |query| if options[:include] query.data_accessor_for(self).include = options[:include] end if options[:select] query.data_accessor_for(self).select = options[:select] end end end search.execute end def solr_execute_search_ids(options = {}) search = yield search.raw_results.map { |raw_result| raw_result.primary_key.to_i } end protected # # Does some logging for benchmarking indexing performance # def solr_benchmark(batch_size, counter, &block) start = Time.now logger.info("[#{Time.now}] Start Indexing") yield elapsed = Time.now-start logger.info("[#{Time.now}] Completed Indexing. Rows indexed #{counter * batch_size}. Rows/sec: #{batch_size/elapsed.to_f} (Elapsed: #{elapsed} sec.)") end end module InstanceMethods def self.included(base) #:nodoc: base.module_eval do alias_method :index, :solr_index unless method_defined? :index alias_method :index!, :solr_index! unless method_defined? :index! alias_method :remove_from_index, :solr_remove_from_index unless method_defined? :remove_from_index alias_method :remove_from_index!, :solr_remove_from_index! unless method_defined? :remove_from_index! alias_method :more_like_this, :solr_more_like_this unless method_defined? :more_like_this alias_method :more_like_this_ids, :solr_more_like_this_ids unless method_defined? :more_like_this_ids end end # # Index the model in Solr. If the model is already indexed, it will be # updated. Using the defaults, you will usually not need to call this # method, as models are indexed automatically when they are created or # updated. If you have disabled automatic indexing (see # ClassMethods#searchable), this method allows you to manage indexing # manually. # def solr_index Sunspot.index(self) end # # Index the model in Solr and immediately commit. See #index # def solr_index! Sunspot.index!(self) end # # Remove the model from the Solr index. Using the defaults, this should # not be necessary, as models will automatically be removed from the # index when they are destroyed. If you disable automatic removal # (which is not recommended!), you can use this method to manage removal # manually. # def solr_remove_from_index Sunspot.remove(self) end # # Remove the model from the Solr index and commit immediately. See # #remove_from_index # def solr_remove_from_index! Sunspot.remove!(self) end def solr_more_like_this(*args, &block) options = args.extract_options! self.class.solr_execute_search(options) do Sunspot.new_more_like_this(self, *args, &block) end end def solr_more_like_this_ids(&block) self.class.solr_execute_search_ids do solr_more_like_this(&block) end end private def maybe_mark_for_auto_indexing @marked_for_auto_indexing = if !new_record? && ignore_attributes = self.class.sunspot_options[:ignore_attribute_changes_of] @marked_for_auto_indexing = !(changed.map { |attr| attr.to_sym } - ignore_attributes).blank? else true end true end def maybe_auto_index if @marked_for_auto_indexing solr_index remove_instance_variable(:@marked_for_auto_indexing) end end end end end end