Sha256: 31ebc986d5d2fe4ede3baabd20aeaad5e13538a45505b83bd2e1f387bd75068b

Contents?: true

Size: 732 Bytes

Versions: 4

Compression:

Stored size: 732 Bytes

Contents

require 'solr'
require 'rexml/document'
require "nokogiri"
require 'yaml'

module Solrizer::XML::Extractor
  
  def extract_tags(text)
    doc = REXML::Document.new( text )
    extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
  end
  
  def extract_tag(doc, type)
    tags = doc.elements["/fields/#{type}"]
    return {} unless tags
    {type => tags.text.split(/,/).map {|t| t.strip}}
  end

  #
  # This method extracts solr fields from simple xml
  #
  def xml_to_solr( text, solr_doc=Solr::Document.new )
    doc = REXML::Document.new( text )
    doc.root.elements.each do |element|
      solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" )
    end

    return solr_doc
  end
  
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
solrizer-1.0.0 lib/solrizer/xml/extractor.rb
solrizer-0.3.2 lib/solrizer/xml/extractor.rb
solrizer-0.3.1 lib/solrizer/xml/extractor.rb
solrizer-0.3.0 lib/solrizer/xml/extractor.rb