require 'uri'
class RemoteTable
  # Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
  class Properties
    attr_reader :t
    attr_reader :current_options
    
    def initialize(t)
      @t = t
      @current_options = t.options.symbolize_keys
    end
    
    def update(options)
      current_options.update options
    end
        
    # The parsed URI of the file to get.
    def uri
      return @uri if @uri.is_a?(::URI)
      @uri = ::URI.parse t.url
      if @uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
        @uri.query = 'output=csv&' + @uri.query.sub(/\&?output=.*?(\&|\z)/, '\1')
      end
      @uri
    end
    
    # Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you...
    # * call []
    # * call each
    # Defaults to false.
    def streaming
      current_options[:streaming] || false
    end

    # Defaults to true.
    def warn_on_multiple_downloads
      current_options[:warn_on_multiple_downloads] != false
    end
    
    # The headers specified by the user
    #
    # Default: :first_row
    def headers
      current_options[:headers].nil? ? :first_row : current_options[:headers]
    end
    
    def use_first_row_as_header?
      headers == :first_row
    end
    
    def output_class
      headers == false ? ::Array : ::ActiveSupport::OrderedHash
    end
    
    # The sheet specified by the user as a number or a string
    #
    # Default: 0
    def sheet
      current_options[:sheet] || 0
    end
    
    # Whether to keep blank rows
    #
    # Default: false
    def keep_blank_rows
      current_options[:keep_blank_rows] || false
    end
    
    # Form data to send in with the download request
    def form_data
      current_options[:form_data]
    end
    
    # How many rows to skip
    #
    # Default: 0
    def skip
      current_options[:skip] || 0
    end
    
    def internal_encoding
      (current_options[:encoding] || 'UTF-8').upcase
    end
    
    def external_encoding
      'UTF-8'
    end
    
    def external_encoding_iconv
      'UTF-8//TRANSLIT'
    end
    
    # The delimiter
    #
    # Default: ","
    def delimiter
      current_options[:delimiter] || ','
    end
    
    # The XPath used to find rows
    def row_xpath
      current_options[:row_xpath]
    end
    
    # The XPath used to find columns
    def column_xpath
      current_options[:column_xpath]
    end

    # The CSS selector used to find rows
    def row_css
      current_options[:row_css]
    end
    
    # The CSS selector used to find columns
    def column_css
      current_options[:column_css]
    end
    
    # The compression type.
    #
    # Default: guessed from URI.
    #
    # Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)
    def compression
      if current_options.has_key?(:compression)
        return current_options[:compression]
      end
      case ::File.extname(uri.path).downcase
      when /gz/, /gunzip/
        :gz
      when /zip/
        :zip
      when /bz2/, /bunzip2/
        :bz2
      when /exe/
        :exe
      end
    end
    
    # The packing type.
    #
    # Default: guessed from URI.
    #
    # Can be specified as: :tar
    def packing
      if current_options.has_key?(:packing)
        return current_options[:packing]
      end
      if uri.path =~ %r{\.tar(?:\.|$)}i
        :tar
      end
    end
    
    # The glob used to pick a file out of an archive.
    #
    # Example:
    #     RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
    def glob
      current_options[:glob]
    end
    
    # The filename, which can be used to pick a file out of an archive.
    #
    # Example:
    #     RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
    def filename
      current_options[:filename]
    end
    
    # Cut columns up to this character
    def cut
      current_options[:cut]
    end
    
    # Crop rows after this line
    def crop
      current_options[:crop]
    end
    
    # The fixed-width schema, given as an array
    #
    # Example:
    #     RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
    #                      :format => :fixed_width,
    #                      :skip => 1,
    #                      :schema => [[ 'header4', 10, { :type => :string }  ],
    #                                  [  'spacer',  1 ],
    #                                  [  'header5', 10, { :type => :string } ],
    #                                  [  'spacer',  12 ],
    #                                  [  'header6', 10, { :type => :string } ]])
    def schema
      current_options[:schema]
    end
    
    # The name of the fixed-width schema according to FixedWidth
    def schema_name
      current_options[:schema_name]
    end
    
    # A proc to call to decide whether to return a row.
    def select
      current_options[:select]
    end
    
    # A proc to call to decide whether to return a row.
    def reject
      current_options[:reject]
    end
    
    # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
    def errata
      return unless current_options.has_key? :errata
      @errata ||= if current_options[:errata].is_a? ::Hash
        ::Errata.new current_options[:errata]
      else
        current_options[:errata]
      end
    end
    
    # Get the format in the form of RemoteTable::Format::Excel, etc.
    #
    # Note: treats all spreadsheets.google.com URLs as Format::Delimited (i.e., CSV)
    #
    # Default: guessed from file extension (which is usually the same as the URI, but sometimes not if you pick out a specific file from an archive)
    #
    # Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html
    def format
      return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
      clue = if current_options.has_key?(:format)
        current_options[:format]
      else
        t.local_file.path
      end
      case clue.to_s.downcase
      when /xlsx/, /excelx/
        Format::Excelx
      when /xls/, /excel/
        Format::Excel
      when /csv/, /tsv/, /delimited/
        Format::Delimited
      when /ods/, /open_?office/
        Format::OpenOffice
      when /fixed_?width/
        Format::FixedWidth
      when /htm/
        Format::HTML
      when /xml/
        Format::XML
      else
        Format::Delimited
      end
    end
  end
end