Sha256: e0428c61d4f6c529fbc557418603adbc482760a0a85943ead6261255e9f42a9b

Contents?: true

Size: 1.76 KB

Versions: 3

Compression:

Stored size: 1.76 KB

Contents

require 'rexml/document'

module ETL
  module Parser
    class XmlParser < ETL::Parser::Parser
      include Enumerable
      # Initialize the parser
      # * <tt>source</tt>: The Source object
      def initialize(source)
        super
        configure
      end
      
      # Returns each row
      def each
        Dir.glob(file).each do |file|
          doc = nil
          t = Benchmark.realtime do
            doc = REXML::Document.new(File.new(file))
          end
          Engine.logger.info "XML #{file} parsed in #{t}s"
          doc.elements.each(@collection_xpath) do |element|
            row = {}
            fields.each do |f|
              value = element.text(f.xpath)
              row[f.name] = convert(f.name, value, f.type)
            end
            yield row
          end
        end
      end
      
      # Get an array of defined fields
      def fields
        @fields ||= []
      end
      
      private
      def configure
        @collection_xpath = source.definition[:collection]
        raise "Collection XPath is required" if @collection_xpath.nil?
        
        source.definition[:fields].each do |options|
          case options
          when Symbol
            fields << Field.new(options, options.to_s)
          when Hash
            options[:xpath] ||= options[:name]
            fields << Field.new(options[:name], options[:xpath].to_s, options[:type])
          else
            raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
          end
        end
      end
      
      class Field
        attr_reader :name, :xpath, :type
        def initialize(name, xpath, type=:string)
          @name = name
          @xpath = xpath
          @type = type
        end
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
activewarehouse-etl-0.2.0 lib/etl/parser/xml_parser.rb
activewarehouse-etl-0.3.0 lib/etl/parser/xml_parser.rb
activewarehouse-etl-0.4.0 lib/etl/parser/xml_parser.rb