lib/remote_table/file.rb in remote_table-0.2.11 vs lib/remote_table/file.rb in remote_table-0.2.12
- old
+ new
@@ -2,10 +2,12 @@
class File
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
attr_accessor :encoding
attr_accessor :path
attr_accessor :keep_blank_rows
+ attr_accessor :row_xpath
+ attr_accessor :column_xpath
def initialize(bus)
@filename = bus[:filename]
@format = bus[:format] || format_from_filename
@delimiter = bus[:delimiter]
@@ -17,26 +19,15 @@
@headers = bus[:headers]
@schema = bus[:schema]
@schema_name = bus[:schema_name]
@trap = bus[:trap]
@encoding = bus[:encoding] || 'UTF-8'
+ @row_xpath = bus[:row_xpath]
+ @column_xpath = bus[:column_xpath]
extend "RemoteTable::#{format.to_s.camelcase}".constantize
end
- class << self
- # http://santanatechnotes.blogspot.com/2005/12/matching-iso-8859-1-strings-with-ruby.html
- def convert_to_utf8(str, encoding)
- if encoding == 'UTF-8' or encoding == 'UTF8'
- str.toutf8 # just in case
- else
- @_iconv ||= Hash.new
- @_iconv[encoding] ||= Iconv.new 'UTF-8', encoding
- @_iconv[encoding].iconv(str).toutf8
- end
- end
- end
-
def tabulate(path)
define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
self.path = path
self
end
@@ -96,9 +87,10 @@
def format_from_filename
extname = ::File.extname(filename).gsub('.', '')
return :csv if extname.blank?
format = [ :xls, :ods ].detect { |i| i == extname.to_sym }
+ format = :html if extname =~ /\Ahtm/
format = :csv if format.blank?
format
end
end
end