lib/dullard/reader.rb in dullard-0.0.5 vs lib/dullard/reader.rb in dullard-0.1.0
- old
+ new
@@ -1,14 +1,88 @@
-require 'zip/zipfilesystem'
+require 'zip/filesystem'
require 'nokogiri'
module Dullard; end
class Dullard::Workbook
- def initialize(file)
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
+ # Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
+ FORMATS = {
+ 'general' => :float,
+ '0' => :float,
+ '0.00' => :float,
+ '#,##0' => :float,
+ '#,##0.00' => :float,
+ '0%' => :percentage,
+ '0.00%' => :percentage,
+ '0.00E+00' => :float,
+ '# ?/?' => :float, #??? TODO:
+ '# ??/??' => :float, #??? TODO:
+ 'mm-dd-yy' => :date,
+ 'd-mmm-yy' => :date,
+ 'd-mmm' => :date,
+ 'mmm-yy' => :date,
+ 'h:mm am/pm' => :date,
+ 'h:mm:ss am/pm' => :date,
+ 'h:mm' => :time,
+ 'h:mm:ss' => :time,
+ 'm/d/yy h:mm' => :date,
+ '#,##0 ;(#,##0)' => :float,
+ '#,##0 ;[red](#,##0)' => :float,
+ '#,##0.00;(#,##0.00)' => :float,
+ '#,##0.00;[red](#,##0.00)' => :float,
+ 'mm:ss' => :time,
+ '[h]:mm:ss' => :time,
+ 'mmss.0' => :time,
+ '##0.0e+0' => :float,
+ '@' => :float,
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
+ "yyyy\\-mm\\-dd" => :date,
+ 'dd/mm/yy' => :date,
+ 'hh:mm:ss' => :time,
+ "dd/mm/yy\\ hh:mm" => :datetime,
+ 'm/d/yy' => :date,
+ 'mm/dd/yy' => :date,
+ 'mm/dd/yyyy' => :date,
+ }
+
+ STANDARD_FORMATS = {
+ 0 => 'General',
+ 1 => '0',
+ 2 => '0.00',
+ 3 => '#,##0',
+ 4 => '#,##0.00',
+ 9 => '0%',
+ 10 => '0.00%',
+ 11 => '0.00E+00',
+ 12 => '# ?/?',
+ 13 => '# ??/??',
+ 14 => 'mm-dd-yy',
+ 15 => 'd-mmm-yy',
+ 16 => 'd-mmm',
+ 17 => 'mmm-yy',
+ 18 => 'h:mm AM/PM',
+ 19 => 'h:mm:ss AM/PM',
+ 20 => 'h:mm',
+ 21 => 'h:mm:ss',
+ 22 => 'm/d/yy h:mm',
+ 37 => '#,##0 ;(#,##0)',
+ 38 => '#,##0 ;[Red](#,##0)',
+ 39 => '#,##0.00;(#,##0.00)',
+ 40 => '#,##0.00;[Red](#,##0.00)',
+ 45 => 'mm:ss',
+ 46 => '[h]:mm:ss',
+ 47 => 'mmss.0',
+ 48 => '##0.0E+0',
+ 49 => '@',
+ }
+
+ def initialize(file, user_defined_formats = {})
@file = file
- @zipfs = Zip::ZipFile.open(@file)
+ @zipfs = Zip::File.open(@file)
+ @user_defined_formats = user_defined_formats
+ read_styles
end
def sheets
workbook = Nokogiri::XML::Document.parse(@zipfs.file.open("xl/workbook.xml"))
@sheets = workbook.css("sheet").each_with_index.map {|n,i| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId"), i+1) }
@@ -31,10 +105,55 @@
end
end
@string_table
end
+ def read_styles
+ doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
+
+ @num_formats = {}
+ @cell_xfs = []
+
+ doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
+ numFmtId = numFmt.attributes['numFmtId'].value.to_i
+ formatCode = numFmt.attributes['formatCode'].value
+ @num_formats[numFmtId] = formatCode
+ end
+
+ doc.css('/styleSheet/cellXfs/xf').each do |xf|
+ numFmtId = xf.attributes['numFmtId'].value.to_i
+ @cell_xfs << numFmtId
+ end
+ end
+
+
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
+ # convert internal excelx attribute to a format
+ def attribute2format(s)
+ id = @cell_xfs[s.to_i].to_i
+ result = @num_formats[id]
+
+ if result == nil
+ if STANDARD_FORMATS.has_key? id
+ result = STANDARD_FORMATS[id]
+ end
+ end
+
+ result.downcase
+ end
+
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
+ def format2type(format)
+ if FORMATS.has_key? format
+ FORMATS[format]
+ elsif @user_defined_formats.has_key? format
+ @user_defined_formats[format]
+ else
+ :float
+ end
+ end
+
def zipfs
@zipfs
end
def close
@@ -55,25 +174,31 @@
def string_lookup(i)
@workbook.string_table[i]
end
def rows
- Enumerator.new(rows_size) do |y|
+ Enumerator.new(row_count) do |y|
next unless @file
@file.rewind
shared = false
row = nil
column = nil
+ cell_type = nil
Nokogiri::XML::Reader(@file).each do |node|
case node.node_type
when Nokogiri::XML::Reader::TYPE_ELEMENT
case node.name
when "row"
row = []
column = 0
next
when "c"
+ if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
+ cell_format_index = node.attributes['s'].to_i
+ cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
+ end
+
rcolumn = node.attributes["r"]
if rcolumn
rcolumn.delete!("0-9")
while column < self.class.column_names.size and rcolumn != self.class.column_names[column]
row << nil
@@ -89,11 +214,25 @@
y << row
next
end
end
value = node.value
+
if value
+ case cell_type
+ when :datetime
+ when :time
+ when :date
+ value = (DateTime.new(1899,12,30) + value.to_f)
+ when :percentage # ? TODO
+ when :float
+ value = value.to_f
+ else
+ # leave as string
+ end
+ cell_type = nil
+
row << (shared ? string_lookup(value.to_i) : value)
end
end
end
end
@@ -111,30 +250,31 @@
z = y.map(&proc).flatten
@column_names = x + y + z
end
end
- private
- def path
- "xl/worksheets/sheet#{@index}.xml"
- end
-
- def rows_size
- if defined? @rows_size
- @rows_size
+ def row_count
+ if defined? @row_count
+ @row_count
elsif @file
@file.rewind
Nokogiri::XML::Reader(@file).each do |node|
if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
case node.name
when "dimension"
if ref = node.attributes["ref"]
- break @rows_size = ref.scan(/\d+$/).first.to_i
+ break @row_count = ref.scan(/\d+$/).first.to_i
end
when "sheetData"
- break @rows_size = nil
+ break @row_count = nil
end
end
end
end
end
+
+ private
+ def path
+ "xl/worksheets/sheet#{@index}.xml"
+ end
+
end