Sha256: 31ebc986d5d2fe4ede3baabd20aeaad5e13538a45505b83bd2e1f387bd75068b
Contents?: true
Size: 732 Bytes
Versions: 4
Compression:
Stored size: 732 Bytes
Contents
require 'solr' require 'rexml/document' require "nokogiri" require 'yaml' module Solrizer::XML::Extractor def extract_tags(text) doc = REXML::Document.new( text ) extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags')) end def extract_tag(doc, type) tags = doc.elements["/fields/#{type}"] return {} unless tags {type => tags.text.split(/,/).map {|t| t.strip}} end # # This method extracts solr fields from simple xml # def xml_to_solr( text, solr_doc=Solr::Document.new ) doc = REXML::Document.new( text ) doc.root.elements.each do |element| solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" ) end return solr_doc end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
solrizer-1.0.0 | lib/solrizer/xml/extractor.rb |
solrizer-0.3.2 | lib/solrizer/xml/extractor.rb |
solrizer-0.3.1 | lib/solrizer/xml/extractor.rb |
solrizer-0.3.0 | lib/solrizer/xml/extractor.rb |