lib/dullard/reader.rb in dullard-0.1.0 vs lib/dullard/reader.rb in dullard-0.2.0
- old
+ new
@@ -1,21 +1,26 @@
require 'zip/filesystem'
require 'nokogiri'
-module Dullard; end
+module Dullard
+ class Error < StandardError; end
+ OOXMLEpoch = DateTime.new(1899,12,30)
+ SharedStringPath = 'xl/sharedStrings.xml'
+ StylesPath = 'xl/styles.xml'
+end
class Dullard::Workbook
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
# Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
FORMATS = {
'general' => :float,
'0' => :float,
'0.00' => :float,
'#,##0' => :float,
'#,##0.00' => :float,
- '0%' => :percentage,
- '0.00%' => :percentage,
+ '0%' => :float,
+ '0.00%' => :float,
'0.00E+00' => :float,
'# ?/?' => :float, #??? TODO:
'# ??/??' => :float, #??? TODO:
'mm-dd-yy' => :date,
'd-mmm-yy' => :date,
@@ -76,28 +81,46 @@
49 => '@',
}
def initialize(file, user_defined_formats = {})
@file = file
- @zipfs = Zip::File.open(@file)
+ begin
+ @zipfs = Zip::File.open(@file)
+ rescue Zip::Error => e
+ raise Dullard::Error, e.message
+ end
@user_defined_formats = user_defined_formats
read_styles
end
def sheets
- workbook = Nokogiri::XML::Document.parse(@zipfs.file.open("xl/workbook.xml"))
- @sheets = workbook.css("sheet").each_with_index.map {|n,i| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId"), i+1) }
+ begin
+ workbook = Nokogiri::XML::Document.parse(@zipfs.file.open('xl/workbook.xml'))
+ rescue Zip::Error
+ raise Dullard::Error, 'Invalid file, could not open xl/workbook.xml'
+ end
+ @sheets = workbook.css('sheet').each_with_index.map do |n, i|
+ Dullard::Sheet.new(self, n.attr('name'), n.attr('sheetId'), i+1)
+ end
end
def string_table
- @string_tabe ||= read_string_table
+ @string_table ||= read_string_table
end
def read_string_table
- @string_table = []
+ return [] unless @zipfs.file.exist? Dullard::SharedStringPath
+
+ begin
+ shared_string = @zipfs.file.open(Dullard::SharedStringPath)
+ rescue Zip::Error
+ raise Dullard::Error, 'Invalid file, could not open shared string file.'
+ end
+
entry = ''
- Nokogiri::XML::Reader(@zipfs.file.open("xl/sharedStrings.xml")).each do |node|
+ @string_table = []
+ Nokogiri::XML::Reader(shared_string).each do |node|
if node.name == "si" and node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
entry = ''
elsif node.name == "si" and node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
@string_table << entry
elsif node.value?
@@ -106,24 +129,33 @@
end
@string_table
end
def read_styles
- doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
-
@num_formats = {}
@cell_xfs = []
+ return unless @zipfs.file.exist? Dullard::StylesPath
+
+ begin
+ doc = Nokogiri::XML(@zipfs.file.open(Dullard::StylesPath))
+ rescue Zip::Error
+ raise Dullard::Error, 'Invalid file, could not open styles'
+ end
doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
- numFmtId = numFmt.attributes['numFmtId'].value.to_i
- formatCode = numFmt.attributes['formatCode'].value
- @num_formats[numFmtId] = formatCode
+ if numFmt.attributes['numFmtId'] && numFmt.attributes['formatCode']
+ numFmtId = numFmt.attributes['numFmtId'].value.to_i
+ formatCode = numFmt.attributes['formatCode'].value
+ @num_formats[numFmtId] = formatCode
+ end
end
doc.css('/styleSheet/cellXfs/xf').each do |xf|
- numFmtId = xf.attributes['numFmtId'].value.to_i
- @cell_xfs << numFmtId
+ if xf.attributes['numFmtId']
+ numFmtId = xf.attributes['numFmtId'].value.to_i
+ @cell_xfs << numFmtId
+ end
end
end
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
@@ -166,79 +198,94 @@
def initialize(workbook, name, id, index)
@workbook = workbook
@name = name
@id = id
@index = index
- @file = @workbook.zipfs.file.open(path) if @workbook.zipfs.file.exist?(path)
+ begin
+ @file = @workbook.zipfs.file.open(path) if @workbook.zipfs.file.exist?(path)
+ rescue Zip::Error => e
+ raise Dullard::Error, "Couldn't open sheet #{index}: #{e.message}"
+ end
end
def string_lookup(i)
- @workbook.string_table[i]
+ @workbook.string_table[i] || (raise Dullard::Error, 'File invalid, invalid string table.')
end
def rows
Enumerator.new(row_count) do |y|
next unless @file
@file.rewind
shared = false
row = nil
+ cell_map = nil # Map of column letter to cell value for a row
column = nil
cell_type = nil
Nokogiri::XML::Reader(@file).each do |node|
case node.node_type
when Nokogiri::XML::Reader::TYPE_ELEMENT
case node.name
when "row"
- row = []
- column = 0
+ cell_map = {}
next
- when "c"
- if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
+ when 'c'
+ node_type = node.attributes['t']
+ cell_index = node.attributes['r']
+ if !cell_index
+ raise Dullard::Error, 'Invalid spreadsheet XML.'
+ end
+
+ if node_type != 's' && node_type != 'b'
cell_format_index = node.attributes['s'].to_i
cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
end
- rcolumn = node.attributes["r"]
- if rcolumn
- rcolumn.delete!("0-9")
- while column < self.class.column_names.size and rcolumn != self.class.column_names[column]
- row << nil
- column += 1
- end
- end
- shared = (node.attribute("t") == "s")
- column += 1
+ column = cell_index.delete('0-9')
+ shared = (node_type == 's')
next
end
when Nokogiri::XML::Reader::TYPE_END_ELEMENT
- if node.name == "row"
- y << row
- next
+ if node.name == 'row'
+ y << process_row(cell_map)
end
+ next
end
- value = node.value
- if value
+ if node.value
+ value = (shared ? string_lookup(value.to_i) : value)
case cell_type
when :datetime
when :time
when :date
- value = (DateTime.new(1899,12,30) + value.to_f)
- when :percentage # ? TODO
+ value = (Dullard::OOXMLEpoch + node.value.to_f)
when :float
- value = value.to_f
+ value = node.value.to_f
else
# leave as string
end
cell_type = nil
-
- row << (shared ? string_lookup(value.to_i) : value)
+ cell_map[column] = value
end
end
end
end
+ def process_row(cell_map)
+ max = cell_map.keys.map {|c| self.class.column_name_to_index c }.max
+ row = []
+ self.class.column_names[0..max].each do |col|
+ if self.class.column_name_to_index(col) > max
+ break
+ else
+ row << cell_map[col]
+ end
+ end
+ row
+ end
+
+
+
# Returns A to ZZZ.
def self.column_names
if @column_names
@column_names
else
@@ -250,22 +297,32 @@
z = y.map(&proc).flatten
@column_names = x + y + z
end
end
+ def self.column_name_to_index(name)
+ if not @column_names_to_indices
+ @column_names_to_indices = {}
+ self.column_names.each_with_index do |name, i|
+ @column_names_to_indices[name] = i
+ end
+ end
+ @column_names_to_indices[name]
+ end
+
def row_count
if defined? @row_count
@row_count
elsif @file
@file.rewind
Nokogiri::XML::Reader(@file).each do |node|
if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
case node.name
- when "dimension"
+ when 'dimension'
if ref = node.attributes["ref"]
break @row_count = ref.scan(/\d+$/).first.to_i
end
- when "sheetData"
+ when 'sheetData'
break @row_count = nil
end
end
end
end