lib/remote_table/properties.rb in remote_table-1.2.2 vs lib/remote_table/properties.rb in remote_table-1.2.3

- old
+ new

@@ -1,14 +1,21 @@ require 'uri' class RemoteTable # Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically. class Properties attr_reader :t + attr_reader :current_options + def initialize(t) @t = t + @current_options = t.options.dup end + def update(options) + current_options.update options + end + # The parsed URI of the file to get. def uri return @uri if @uri.is_a?(::URI) @uri = ::URI.parse t.url if @uri.host == 'spreadsheets.google.com' @@ -20,23 +27,23 @@ # Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you... # * call [] # * call each # Defaults to false. def streaming - t.options['streaming'] || false + current_options['streaming'] || false end # Defaults to true. def warn_on_multiple_downloads - t.options['warn_on_multiple_downloads'] != false + current_options['warn_on_multiple_downloads'] != false end # The headers specified by the user # # Default: :first_row def headers - t.options['headers'].nil? ? :first_row : t.options['headers'] + current_options['headers'].nil? ? :first_row : current_options['headers'] end def use_first_row_as_header? headers == :first_row end @@ -47,74 +54,79 @@ # The sheet specified by the user as a number or a string # # Default: 0 def sheet - t.options['sheet'] || 0 + current_options['sheet'] || 0 end # Whether to keep blank rows # # Default: false def keep_blank_rows - t.options['keep_blank_rows'] || false + current_options['keep_blank_rows'] || false end # Form data to send in with the download request def form_data - t.options['form_data'] + current_options['form_data'] end # How many rows to skip # # Default: 0 def skip - t.options['skip'].to_i + current_options['skip'].to_i end - # Likely external encoding - # - # Default: "UTF-8" - def encoding - @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'ISO-8859-1', 'US-ASCII', 'WINDOWS-1252', 'ASCII-8BIT', 'UTF-8' ]) + def internal_encoding + (current_options['encoding'] || 'UTF-8').upcase end + def external_encoding + 'UTF-8' + end + + def external_encoding_iconv + 'UTF-8//TRANSLIT' + end + # The delimiter # # Default: "," def delimiter - t.options['delimiter'] || ',' + current_options['delimiter'] || ',' end # The XPath used to find rows def row_xpath - t.options['row_xpath'] + current_options['row_xpath'] end # The XPath used to find columns def column_xpath - t.options['column_xpath'] + current_options['column_xpath'] end # The CSS selector used to find rows def row_css - t.options['row_css'] + current_options['row_css'] end # The CSS selector used to find columns def column_css - t.options['column_css'] + current_options['column_css'] end # The compression type. # # Default: guessed from URI. # # Can be specified as: "gz", "zip", "bz2", "exe" (treated as "zip") def compression - clue = if t.options['compression'] - t.options['compression'].to_s + clue = if current_options['compression'] + current_options['compression'].to_s else ::File.extname uri.path end case clue.downcase when /gz/, /gunzip/ @@ -132,12 +144,12 @@ # # Default: guessed from URI. # # Can be specified as: "tar" def packing - clue = if t.options['packing'] - t.options['packing'].to_s + clue = if current_options['packing'] + current_options['packing'].to_s else ::File.extname(uri.path.sub(/\.#{compression}\z/, '')) end case clue.downcase when /tar/ @@ -148,29 +160,29 @@ # The glob used to pick a file out of an archive. # # Example: # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'glob' => '/*.csv' def glob - t.options['glob'] + current_options['glob'] end # The filename, which can be used to pick a file out of an archive. # # Example: # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'filename' => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' def filename - t.options['filename'] + current_options['filename'] end # Cut columns up to this character def cut - t.options['cut'] + current_options['cut'] end # Crop rows after this line def crop - t.options['crop'] + current_options['crop'] end # The fixed-width schema, given as an array # # Example: @@ -181,35 +193,35 @@ # [ 'spacer', 1 ], # [ 'header5', 10, { :type => :string } ], # [ 'spacer', 12 ], # [ 'header6', 10, { :type => :string } ]]) def schema - t.options['schema'] + current_options['schema'] end - # The name of the fixed-width schema according to Slither + # The name of the fixed-width schema according to FixedWidth def schema_name - t.options['schema_name'] + current_options['schema_name'] end # A proc to call to decide whether to return a row. def select - t.options['select'] + current_options['select'] end # A proc to call to decide whether to return a row. def reject - t.options['reject'] + current_options['reject'] end # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row. def errata - return unless t.options.has_key? 'errata' - @errata ||= if t.options['errata'].is_a? ::Hash - ::Errata.new t.options['errata'] + return unless current_options.has_key? 'errata' + @errata ||= if current_options['errata'].is_a? ::Hash + ::Errata.new current_options['errata'] else - t.options['errata'] + current_options['errata'] end end # Get the format in the form of RemoteTable::Format::Excel, etc. # @@ -218,11 +230,11 @@ # Default: guessed from file extension (which is usually the same as the URI, but sometimes not if you pick out a specific file from an archive) # # Can be specified as: "xlsx", "xls", "csv", "ods", "fixed_width", "html" def format return Format::Delimited if uri.host == 'spreadsheets.google.com' - clue = if t.options['format'] - t.options['format'].to_s + clue = if current_options['format'] + current_options['format'].to_s else ::File.extname t.local_file.path end case clue.downcase when /xlsx/, /excelx/