lib/solrizer/extractor.rb in solrizer-1.0.0 vs lib/solrizer/extractor.rb in solrizer-1.0.1

- old
+ new

@@ -1,7 +1,5 @@ -require 'solr' -require 'rexml/document' require "nokogiri" require 'yaml' module Solrizer @@ -10,39 +8,53 @@ # # Each of the Solrizer implementations provides its own Extractor module that extends the behaviors of Solrizer::Extractor # with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr) # class Extractor - - # Populates a solr doc with values from a hash. - # Accepts two forms of hashes: - # => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} - # or - # => {:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} } - # - # Note that values for individual fields can be a single string or an array of strings. - def extract_hash( input_hash, solr_doc=Solr::Document.new ) - facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash - facets.each_pair do |facet_name, value| - case value.class.to_s - when "String" - solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" ) - when "Array" - value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) } - end + + # Insert +field_value+ for +field_name+ into +solr_doc+ + # Ensures that field values are always appended to arrays within the values hash. + # Also ensures that values are run through format_node_value + # @param [Hash] solr_doc + # @param [String] field_name + # @param [String] field_value + def self.insert_solr_field_value(solr_doc, field_name, field_value) + formatted_value = self.format_node_value(field_value) + if solr_doc.has_key?(field_name) + solr_doc[field_name] << formatted_value + else + solr_doc.merge!( {field_name => [formatted_value]} ) end - - if input_hash.has_key?(:symbols) - input_hash[:symbols].each do |symbol_name, value| - case value.class.to_s - when "String" - solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" ) - when "Array" - value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) } - end - end - end return solr_doc + end + + # Strips the majority of whitespace from the values array and then joins them with a single blank delimitter + # @param [Array] values Array of strings representing the values returned + def self.format_node_value values + values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ") + end + + # Instance Methods + + # Alias for Solrizer::Extractor#insert_solr_field_value + def insert_solr_field_value(solr_doc, field_name, field_value) + Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value) + end + + # Alias for Solrizer::Extractor#format_node_value + def format_node_value values + Solrizer::Extractor.format_node_value(values) + end + + # Deprecated. + # merges input_hash into solr_hash + # @param [Hash] input_hash the input hash of values + # @param [Hash] solr_hash the solr values hash to add the values into + # @return [Hash] the populated Solr values hash + # + def extract_hash( input_hash, solr_hash=Hash.new ) + warn "[DEPRECATION] `extract_hash` is deprecated. Just pass values directly into your solr values hash" + return solr_hash.merge!(input_hash) end end end