lib/solrizer/xml/terminology_based_solrizer.rb in solrizer-2.0.0 vs lib/solrizer/xml/terminology_based_solrizer.rb in solrizer-2.1.0.rc1
- old
+ new
@@ -1,101 +1,104 @@
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
module Solrizer::XML::TerminologyBasedSolrizer
-
- def self.default_field_mapper
- @@default_field_mapper ||= Solrizer::FieldMapper::Default.new
+ def self.included(klass)
+ klass.send(:include, Solrizer::Common)
+ klass.send(:extend, ClassMethods)
end
# Module Methods
+ module ClassMethods
- # Build a solr document from +doc+ based on its terminology
- # @param [OM::XML::Document] doc
- # @param [Hash] (optional) solr_doc (values hash) to populate
- def self.solrize(doc, solr_doc=Hash.new, field_mapper = nil)
- unless doc.class.terminology.nil?
- doc.class.terminology.terms.each_pair do |term_name,term|
- doc.solrize_term(term, solr_doc, field_mapper)
+ # Build a solr document from +doc+ based on its terminology
+ # @param [OM::XML::Document] doc
+ # @param [Hash] (optional) solr_doc (values hash) to populate
+ def solrize(doc, solr_doc=Hash.new, field_mapper = nil)
+ unless doc.class.terminology.nil?
+ doc.class.terminology.terms.each_pair do |term_name,term|
+ doc.solrize_term(term, solr_doc, field_mapper)
+ end
end
- end
- return solr_doc
- end
-
- # Populate a solr document with fields based on nodes in +xml+
- # Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
- # and are deserialized by OM according to :type, as determined in its terminology.
- # The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
- # rendered to a string.
- # @param [OM::XML::Document] doc xml document to extract values from
- # @param [OM::XML::Term] term corresponding to desired xml values
- # @param [Hash] (optional) solr_doc (values hash) to populate
- def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
- parents = opts.fetch(:parents, [])
- term_pointer = parents+[term.name]
- nodeset = doc.term_values(*term_pointer)
+ return solr_doc
+ end
- nodeset.each do |n|
+ # Populate a solr document with fields based on nodes in +xml+
+ # Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
+ # and are deserialized by OM according to :type, as determined in its terminology.
+ # The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
+ # rendered to a string.
+ # @param [OM::XML::Document] doc xml document to extract values from
+ # @param [OM::XML::Term] term corresponding to desired xml values
+ # @param [Hash] (optional) solr_doc (values hash) to populate
+ def solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
+ parents = opts.fetch(:parents, [])
+ term_pointer = parents+[term.name]
+ nodeset = doc.term_values(*term_pointer)
- # TODO: Solrizer::FieldMapper::Default is supposed to translate dates into full ISO 8601 formatted strings.
- # However, there an integration issue with ActiveFedora using OM: it ignores the default field mapper given
- # in this gem that does this. So, the following is a workaround until it is fixed.
- node = n.is_a?(Date) ? DateTime.parse(n.to_s).to_time.utc.iso8601 : n.to_s
-
- doc.solrize_node(node.to_s, term_pointer, term, solr_doc, field_mapper)
- unless term.kind_of? OM::XML::NamedTermProxy
- term.children.each_pair do |child_term_name, child_term|
- doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node.to_s)}]})
+ nodeset.each do |n|
+
+ # TODO: Solrizer::FieldMapper::Default is supposed to translate dates into full ISO 8601 formatted strings.
+ # However, there an integration issue with ActiveFedora using OM: it ignores the default field mapper given
+ # in this gem that does this. So, the following is a workaround until it is fixed.
+ node = n.is_a?(Date) ? DateTime.parse(n.to_s).to_time.utc.iso8601 : n.to_s
+
+ doc.solrize_node(node.to_s, term_pointer, term, solr_doc, field_mapper)
+ unless term.kind_of? OM::XML::NamedTermProxy
+ term.children.each_pair do |child_term_name, child_term|
+ doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node.to_s)}]})
+ end
end
end
+ solr_doc
end
- solr_doc
- end
-
- # Populate a solr document with solr fields corresponding to the given xml node
- # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
- # If the supplied term does not have an index_as attribute, no indexing will be performed.
- # @param [Nokogiri::XML::Node] node to solrize
- # @param [OM::XML::Document] doc document the node came from
- # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
- # @param [Term] term the term to be solrized
- # @param [Hash] (optional) solr_doc (values hash) to populate
- # @return [Hash] the solr doc
- def self.solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
- return solr_doc unless term.index_as && !term.index_as.empty?
- field_mapper ||= self.default_field_mapper
-
- generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
-
- field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.type, term.index_as).each do |field_name, field_value|
- unless field_value.join("").strip.empty?
- ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
+
+ # Populate a solr document with solr fields corresponding to the given xml node
+ # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
+ # If the supplied term does not have an index_as attribute, no indexing will be performed.
+ # @param [Nokogiri::XML::Node] node to solrize
+ # @param [OM::XML::Document] doc document the node came from
+ # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
+ # @param [Term] term the term to be solrized
+ # @param [Hash] (optional) solr_doc (values hash) to populate
+ # @return [Hash] the solr doc
+ def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
+ return solr_doc unless term.index_as && !term.index_as.empty?
+
+ directive = term_to_solrizer_directive(term)
+
+ generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
+ create_and_insert_terms(generic_field_name_base, node_value, directive, solr_doc)
+
+
+ if term_pointer.length > 1
+ hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
+ create_and_insert_terms(hierarchical_field_name_base, node_value, directive, solr_doc)
end
+ solr_doc
end
-
- if term_pointer.length > 1
- hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
- field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.type, term.index_as).each do |field_name, field_value|
- unless field_value.join("").strip.empty?
- ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
- end
- end
+
+ private
+
+ def term_to_solrizer_directive(term)
+ Solrizer::Directive.new(term.type, term.index_as)
end
- solr_doc
+
end
+
# Instance Methods
attr_accessor :field_mapper
def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
- Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
+ self.class.solrize(self, solr_doc, field_mapper)
end
def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
- Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)
+ self.class.solrize_term(self, term, solr_doc, field_mapper, opts)
end
def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
- Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
+ self.class.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
end
end