require 'open-uri'
require 'fastercsv'

module CSV

  # Extract and transform for CSV files: in context (as a string), from a
  # local file, or from a remote file.  Uses FasterCSV and open-uri
  class ET < ETL

    attr_reader :header
    
    after_transform :get_header_conditionally
    
    protected
    
      def get_header_conditionally
        @header = @raw.shift if self.options[:extract_header]
      end
      
      # Attempts to get a string from a file, a uri, or a string
      def extract
        obj = self.options.fetch(:source, nil)
        extract_locally(obj) or extract_remotely(obj) or extract_from_string(obj)
        raise ArgumentError, "Could not determine what #{obj.inspect} was.  CSV::ET cannot work with this data." unless @raw
      end
      
      # Handles local filename cases, reading the contents of the file.
      def extract_locally(filename)
        @raw = File.read(filename) if File.exist?(filename)
        ET.logger.info "Extracted the data from from filesystem" if @raw
        @raw ? true : false
      end
      
      # Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
      def extract_remotely(uri)
        begin
          open(uri) {|f| @raw = f.read}
          ET.logger.info "Extracted the data from a remote location."
          return true
        rescue
          ET.logger.info "Tested whether #{uri} was a remote resource.  Failed to read it."
          return false
        end
      end
      
      # If this is a string, assumes that the contents of the string are CSV contents.
      def extract_from_string(string)
        @raw = string if string.is_a?(String)
        @raw ? true : false
      end

      def transform
        opts = self.options.fetch(:parse_with, {})
        ET.logger.info "Parsing the data with FasterCSV and #{default_csv_opts.merge(opts).inspect}"
        @raw = FCSV.parse(@data, default_csv_opts.merge(opts))
      end

      def default_csv_opts; {:converters => :all}; end
  end

  # Try this out for size:
  # file = CSV::ET.process(:source => 'http://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv')
  
end