lib/solrizer/extractor.rb in solrizer-1.0.0 vs lib/solrizer/extractor.rb in solrizer-1.0.1
- old
+ new
@@ -1,7 +1,5 @@
-require 'solr'
-require 'rexml/document'
require "nokogiri"
require 'yaml'
module Solrizer
@@ -10,39 +8,53 @@
#
# Each of the Solrizer implementations provides its own Extractor module that extends the behaviors of Solrizer::Extractor
# with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
#
class Extractor
-
- # Populates a solr doc with values from a hash.
- # Accepts two forms of hashes:
- # => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}
- # or
- # => {:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} }
- #
- # Note that values for individual fields can be a single string or an array of strings.
- def extract_hash( input_hash, solr_doc=Solr::Document.new )
- facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
- facets.each_pair do |facet_name, value|
- case value.class.to_s
- when "String"
- solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
- when "Array"
- value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
- end
+
+ # Insert +field_value+ for +field_name+ into +solr_doc+
+ # Ensures that field values are always appended to arrays within the values hash.
+ # Also ensures that values are run through format_node_value
+ # @param [Hash] solr_doc
+ # @param [String] field_name
+ # @param [String] field_value
+ def self.insert_solr_field_value(solr_doc, field_name, field_value)
+ formatted_value = self.format_node_value(field_value)
+ if solr_doc.has_key?(field_name)
+ solr_doc[field_name] << formatted_value
+ else
+ solr_doc.merge!( {field_name => [formatted_value]} )
end
-
- if input_hash.has_key?(:symbols)
- input_hash[:symbols].each do |symbol_name, value|
- case value.class.to_s
- when "String"
- solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
- when "Array"
- value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
- end
- end
- end
return solr_doc
+ end
+
+ # Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
+ # @param [Array] values Array of strings representing the values returned
+ def self.format_node_value values
+ values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
+ end
+
+ # Instance Methods
+
+ # Alias for Solrizer::Extractor#insert_solr_field_value
+ def insert_solr_field_value(solr_doc, field_name, field_value)
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
+ end
+
+ # Alias for Solrizer::Extractor#format_node_value
+ def format_node_value values
+ Solrizer::Extractor.format_node_value(values)
+ end
+
+ # Deprecated.
+ # merges input_hash into solr_hash
+ # @param [Hash] input_hash the input hash of values
+ # @param [Hash] solr_hash the solr values hash to add the values into
+ # @return [Hash] the populated Solr values hash
+ #
+ def extract_hash( input_hash, solr_hash=Hash.new )
+ warn "[DEPRECATION] `extract_hash` is deprecated. Just pass values directly into your solr values hash"
+ return solr_hash.merge!(input_hash)
end
end
end