lib/tanker.rb in tanker-0.5.6 vs lib/tanker.rb in tanker-1.0.0
- old
+ new
@@ -1,18 +1,25 @@
-require "rubygems"
-require "bundler"
-Bundler.setup :default
+begin
+ require "rubygems"
+ require "bundler"
+ Bundler.setup :default
+rescue => e
+ puts "Tanker: #{e.message}"
+end
require 'indextank_client'
require 'tanker/configuration'
require 'tanker/utilities'
require 'will_paginate/collection'
if defined? Rails
- require 'tanker/railtie'
+ begin
+ require 'tanker/railtie'
+ rescue LoadError
+ end
end
module Tanker
class NotConfigured < StandardError; end
@@ -31,107 +38,251 @@
def included(klass)
@included_in ||= []
@included_in << klass
@included_in.uniq!
- klass.instance_variable_set('@tanker_configuration', configuration)
- klass.instance_variable_set('@tanker_indexes', [])
+ configuration # raises error if not defined
klass.send :include, InstanceMethods
klass.extend ClassMethods
class << klass
define_method(:per_page) { 10 } unless respond_to?(:per_page)
end
end
+
+ def batch_update(records)
+ return false if records.empty?
+ data = records.map do |record|
+ options = record.tanker_index_options
+ options.merge!( :docid => record.it_doc_id, :fields => record.tanker_index_data )
+ options
+ end
+ records.first.class.tanker_index.add_documents(data)
+ end
+
+ def search(models, query, options = {})
+ ids = []
+ models = [models].flatten.uniq
+ page = (options.delete(:page) || 1).to_i
+ per_page = (options.delete(:per_page) || models.first.per_page).to_i
+ index = models.first.tanker_index
+ query = query.join(' ') if Array === query
+
+ if (index_names = models.map(&:tanker_config).map(&:index_name).uniq).size > 1
+ raise "You can't search across multiple indexes in one call (#{index_names.inspect})"
+ end
+
+
+ # move conditions into the query body
+ if conditions = options.delete(:conditions)
+ conditions.each do |field, value|
+ value = [value].flatten.compact
+ value.each do |item|
+ query += " #{field}:(#{item})"
+ end
+ end
+ end
+
+ # rephrase filter_functions
+ if filter_functions = options.delete(:filter_functions)
+ filter_functions.each do |function_number, ranges|
+ options[:"filter_function#{function_number}"] = ranges.map{|r|r.join(':')}.join(',')
+ end
+ end
+
+ # rephrase filter_docvars
+ if filter_docvars = options.delete(:filter_docvars)
+ filter_docvars.each do |var_number, ranges|
+ options[:"filter_docvar#{var_number}"] = ranges.map{|r|r.join(':')}.join(',')
+ end
+ end
+
+ query = "__any:(#{query.to_s}) __type:(#{models.map(&:name).join(' OR ')})"
+ options = { :start => per_page * (page - 1), :len => per_page }.merge(options)
+ results = index.search(query, options)
+
+ @entries = WillPaginate::Collection.create(page, per_page) do |pager|
+ # inject the result array into the paginated collection:
+ pager.replace instantiate_results(results)
+
+ unless pager.total_entries
+ # the pager didn't manage to guess the total count, do it manually
+ pager.total_entries = results["matches"]
+ end
+ end
+ end
+
+ protected
+
+ def instantiate_results(index_result)
+ results = index_result['results']
+ return [] if results.empty?
+
+ id_map = results.inject({}) do |acc, result|
+ model, id = result["docid"].split(" ", 2)
+ acc[model] ||= []
+ acc[model] << id.to_i
+ acc
+ end
+
+ if 1 == id_map.size # check for simple case, just one model involved
+ klass = constantize(id_map.keys.first)
+ # eager-load and return just this model's records
+ klass.find(id_map.values.flatten)
+ else # complex case, multiple models involved
+ id_map.each do |klass, ids|
+ # replace the id list with an eager-loaded list of records for this model
+ id_map[klass] = constantize(klass).find(ids)
+ end
+ # return them in order
+ results.map do |result|
+ model, id = result["docid"].split(" ", 2)
+ id_map[model].detect {|record| id.to_i == record.id }
+ end
+ end
+ end
+
+ def constantize(klass_name)
+ Object.const_defined?(klass_name) ?
+ Object.const_get(klass_name) :
+ Object.const_missing(klass_name)
+ end
end
# these are the class methods added when Tanker is included
+ # They're kept to a minimum to prevent namespace pollution
module ClassMethods
- attr_reader :tanker_indexes, :index_name
+ attr_accessor :tanker_config
def tankit(name, &block)
if block_given?
- @index_name = name
- self.instance_exec(&block)
+ self.tanker_config = ModelConfig.new(name, block)
else
raise(NoBlockGiven, 'Please provide a block')
end
end
- def indexes(field)
- @tanker_indexes << field
+ def search_tank(query, options = {})
+ Tanker.search([self], query, options)
end
- def index
- @index ||= Tanker.api.get_index(self.index_name)
+ def tanker_index
+ tanker_config.index
end
- def search_tank(query, options = {})
- ids = []
- page = options.delete(:page) || 1
- per_page = options.delete(:per_page) || self.per_page
+ def tanker_reindex(options = {})
+ puts "Indexing #{self} model"
- # transform fields in query
- if options.has_key? :conditions
- options[:conditions].each do |field,value|
- query += " #{field}:(#{value})"
- end
+ batches = []
+ options[:batch_size] ||= 200
+ records = options[:scope] ? send(options[:scope]).all : all
+ record_size = records.size
+
+ records.each_with_index do |model_instance, idx|
+ batch_num = idx / options[:batch_size]
+ (batches[batch_num] ||= []) << model_instance
end
- query = "__any:(#{query.to_s}) __type:#{self.name}"
- options = { :start => per_page * (page - 1), :len => per_page }.merge(options)
- results = index.search(query, options)
-
- ids = unless results["results"].empty?
- results["results"].map{|res| res["docid"].split(" ", 2)[1]}
- else
- []
+ timer = Time.now
+ batches.each_with_index do |batch, idx|
+ Tanker.batch_update(batch)
+ puts "Indexed #{batch.size} records #{(idx * options[:batch_size]) + batch.size}/#{record_size}"
end
+ puts "Indexed #{record_size} #{self} records in #{Time.now - timer} seconds"
+ end
+ end
- @entries = WillPaginate::Collection.create(page, per_page) do |pager|
- result = self.find(ids)
- # inject the result array into the paginated collection:
- pager.replace(result)
+ class ModelConfig
+ attr_reader :index_name
- unless pager.total_entries
- # the pager didn't manage to guess the total count, do it manually
- pager.total_entries = results["matches"]
- end
- end
+ def initialize(index_name, block)
+ @index_name = index_name
+ @indexes = []
+ @functions = {}
+ instance_exec &block
end
+ def indexes(field = nil, &block)
+ @indexes << [field, block] if field
+ @indexes
+ end
+
+ def variables(&block)
+ @variables = block if block
+ @variables
+ end
+
+ def functions(&block)
+ @functions = block.call if block
+ @functions
+ end
+
+ def index
+ @index ||= Tanker.api.get_index(index_name)
+ end
+
end
# these are the instance methods included
module InstanceMethods
+ def tanker_config
+ self.class.tanker_config || raise(NotConfigured, "Please configure Tanker for #{self.class.inspect} with the 'tankit' block")
+ end
+
def tanker_indexes
- self.class.tanker_indexes
+ tanker_config.indexes
end
+ def tanker_variables
+ tanker_config.variables
+ end
+
# update a create instance from index tank
def update_tank_indexes
+ tanker_config.index.add_document(
+ it_doc_id, tanker_index_data, tanker_index_options
+ )
+ end
+
+ # delete instance from index tank
+ def delete_tank_indexes
+ tanker_config.index.delete_document(it_doc_id)
+ end
+
+ def tanker_index_data
data = {}
- tanker_indexes.each do |field|
- val = self.instance_eval(field.to_s)
- data[field.to_s] = val.to_s unless val.nil?
+ # attempt to autodetect timestamp
+ if respond_to?(:created_at)
+ data[:timestamp] = created_at.to_i
end
- data[:__any] = data.values.join " . "
+ tanker_indexes.each do |field, block|
+ val = block ? instance_exec(&block) : send(field)
+ val = val.join(' ') if Array === val
+ data[field.to_sym] = val.to_s unless val.nil?
+ end
+
+ data[:__any] = data.values.sort_by{|v| v.to_s}.join " . "
data[:__type] = self.class.name
- self.class.index.add_document(it_doc_id, data)
+ data
end
- # delete instance from index tank
- def delete_tank_indexes
- self.class.index.delete_document(it_doc_id)
+ def tanker_index_options
+ options = {}
+
+ if tanker_variables
+ options[:variables] = instance_exec(&tanker_variables)
+ end
+
+ options
end
# create a unique index based on the model name and unique id
def it_doc_id
self.class.name + ' ' + self.id.to_s
end
end
-end
\ No newline at end of file
+end