Sha256: e0428c61d4f6c529fbc557418603adbc482760a0a85943ead6261255e9f42a9b
Contents?: true
Size: 1.76 KB
Versions: 3
Compression:
Stored size: 1.76 KB
Contents
require 'rexml/document' module ETL module Parser class XmlParser < ETL::Parser::Parser include Enumerable # Initialize the parser # * <tt>source</tt>: The Source object def initialize(source) super configure end # Returns each row def each Dir.glob(file).each do |file| doc = nil t = Benchmark.realtime do doc = REXML::Document.new(File.new(file)) end Engine.logger.info "XML #{file} parsed in #{t}s" doc.elements.each(@collection_xpath) do |element| row = {} fields.each do |f| value = element.text(f.xpath) row[f.name] = convert(f.name, value, f.type) end yield row end end end # Get an array of defined fields def fields @fields ||= [] end private def configure @collection_xpath = source.definition[:collection] raise "Collection XPath is required" if @collection_xpath.nil? source.definition[:fields].each do |options| case options when Symbol fields << Field.new(options, options.to_s) when Hash options[:xpath] ||= options[:name] fields << Field.new(options[:name], options[:xpath].to_s, options[:type]) else raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field" end end end class Field attr_reader :name, :xpath, :type def initialize(name, xpath, type=:string) @name = name @xpath = xpath @type = type end end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
activewarehouse-etl-0.2.0 | lib/etl/parser/xml_parser.rb |
activewarehouse-etl-0.3.0 | lib/etl/parser/xml_parser.rb |
activewarehouse-etl-0.4.0 | lib/etl/parser/xml_parser.rb |