Sha256: a7afd157ef5d9c235e35c089f0763b7eefc58e8134afac304d0200fe9bd3ea50
Contents?: true
Size: 1.79 KB
Versions: 4
Compression:
Stored size: 1.79 KB
Contents
require 'rexml/document' module ETL module Parser class XmlParser < ETL::Parser::Parser # Initialize the parser # * <tt>source</tt>: The Source object # * <tt>options</tt>: Parser options Hash def initialize(source, options={}) super configure end # Returns each row def each Dir.glob(file).each do |file| doc = nil t = Benchmark.realtime do doc = REXML::Document.new(File.new(file)) end Engine.logger.info "XML #{file} parsed in #{t}s" doc.elements.each(@collection_xpath) do |element| row = {} fields.each do |f| value = element.text(f.xpath) row[f.name] = convert(f.name, value, f.type) end yield row end end end # Get an array of defined fields def fields @fields ||= [] end private def configure @collection_xpath = source.definition[:collection] raise "Collection XPath is required" if @collection_xpath.nil? source.definition[:fields].each do |options| case options when Symbol fields << Field.new(options, options.to_s) when Hash options[:xpath] ||= options[:name] fields << Field.new(options[:name], options[:xpath].to_s, options[:type]) else raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field" end end end class Field attr_reader :name, :xpath, :type def initialize(name, xpath, type=:string) @name = name @xpath = xpath @type = type end end end end end
Version data entries
4 entries across 4 versions & 1 rubygems