lib/supernova/solr_indexer.rb in supernova-0.6.5 vs lib/supernova/solr_indexer.rb in supernova-0.6.6

- old
+ new

@@ -1,11 +1,12 @@ require "json" require "fileutils" require "time" +require "typhoeus" class Supernova::SolrIndexer - attr_accessor :options, :db, :ids, :max_rows_to_direct_index, :local_solr + attr_accessor :options, :db, :ids, :max_rows_to_direct_index, :local_solr, :current_json_string attr_writer :index_file_path, :debug MAX_ROWS_TO_DIRECT_INDEX = 100 include Supernova::Solr @@ -219,19 +220,23 @@ debug "mapped %COUNT% rows to solr in %TIME%" do rows.map! { |r| map_for_solr(r) } end if self.max_rows_to_direct_index < rows.count debug "indexed #{rows.length} rows with json in %TIME%" do - index_with_json_file(rows) + index_with_json(rows) end else debug "indexed #{rows.length} rows directly in %TIME%" do index_directly(rows) end end end + def index_with_json(rows) + options && options[:use_json_file] ? index_with_json_file(rows) : index_with_json_string(rows) + end + def solr_rows_to_index_for_query(query) query_db(query).map do |row| map_for_solr(row) end end @@ -255,10 +260,40 @@ end end finish end + def append_to_json_string(row) + if self.current_json_string.nil? + self.current_json_string = "\{\n" + else + self.current_json_string << ",\n" + end + self.current_json_string << %("add":#{{:doc => row}.to_json}) + end + + def finalize_json_string + self.current_json_string << "\n}" + end + + def post_json_string + Typhoeus::Request.post("#{solr_update_url}?commit=true", + :body => self.current_json_string, + :headers => { "Content-type" => "application/json; charset=utf-8" } + ).tap do |response| + self.current_json_string = nil + end + end + + def index_with_json_string(rows) + rows.each do |row| + append_to_json_string(row) + end + finalize_json_string + post_json_string + end + def ids_given? self.ids.is_a?(Array) end def index_file_path @@ -295,13 +330,17 @@ def solr_url Supernova::Solr.url.present? ? Supernova::Solr.url.to_s.gsub(/\/$/, "") : nil end + def solr_update_url + "#{solr_url}/update/json" + end + def do_index_file(options = {}) raise "solr not configured" if solr_url.nil? cmd = if self.local_solr - %(curl -s '#{solr_url}/update/json?commit=true\\&stream.file=#{index_file_path}') + %(curl -s '#{solr_update_url}?commit=true\\&stream.file=#{index_file_path}') else %(cd #{File.dirname(index_file_path)} && curl -s '#{solr_url}/update/json?commit=true' --data-binary @#{File.basename(index_file_path)} -H 'Content-type:application/json') end debug "run command: #{cmd}" out = Kernel.send(:`, cmd) \ No newline at end of file