Surpass

biff_record.rb

class SharedStringTable
  SST_ID = 0x00FC
  
  def initialize
    @sst_record = nil
    @continues = []
    @current_piece = [0,0].pack('V2')

    @str_indexes = {} # TODO replace with array? or is hash more efficient?
    @add_calls = 0
  end
  
  def add_str(s)
    @add_calls += 1
    index = @str_indexes[s]
    if index.nil?
      # This is a new string for the SST.
      position = @str_indexes.length
      @str_indexes[s] = position
      index = position
      add_to_sst(s)
    end
    index
  end
  
  def str_index(s)
    @str_indexes[s]
  end
  
  def to_biff
    new_piece # flush the 'current' piece
    result = [SST_ID, @sst_record.length, @add_calls, @str_indexes.length].pack('v2V2')
    result += @sst_record[8..-1]
    result += @continues.join
    result
  end
  
  def add_to_sst(s)
    u_str = mock_unicode_string(s)
    raise "very long string" if u_str.length > 0xFFFF
    save_atom(u_str[0...4])
    save_splitted(u_str[4..-1], false)
  end
  
  # Store the @current_piece in @continues and initialize a new @current_piece
  def new_piece
    if @sst_record.nil?
      # We get here when we first run out of space, or if that never happens then we end
      # up here when everything is finished and we call to_biff for the first time.
      @sst_record = @current_piece
    else
      @continues << [BiffRecord::CONTINUE_RECORD_ID, @current_piece.length].pack('v2') + @current_piece
    end
    @current_piece = ''
  end
  
  def save_atom(atom)
    free_space = 0x2020 - @current_piece.length
    new_piece if free_space < atom.length
    @current_piece += atom
  end
  
  def save_splitted(s, is_unicode_str)
    i = 0
    while i < s.length do
      free_space = 0x2020 - @current_piece.length
      tail_length = s.length - i
      need_more_space = free_space < tail_length
      
      if !need_more_space
        atom_length = tail_length
      else
        if is_unicode_str
          atom_length = free_space & 0xFFFE
        else
          atom_length = free_space
        end
      end
      @current_piece += s[i...(i+atom_length)]
      
      if need_more_space
        new_piece
        if is_unicode_str
          @current_piece += "\001"
        else
          @current_piece += "\000"
        end
      end
      
      i += atom_length
    end
  end
end

class BiffRecord
  attr_accessor :record_data
  
  BIFF_LIMIT = 0x2020 # limit for BIFF7/8
  CONTINUE_RECORD_ID = 0x003C

  # By default, initialize to ''.
  # May be overridden in subclass.
  def initialize
    @record_data = ''
  end
  
  def record_header
    # TODO figure out if Ruby's or Python's length function is correct here.
    [self.class::RECORD_ID, @record_data.length].pack('v2')
  end

  def to_biff
    if @record_data.length > BIFF_LIMIT
      chunks = []
      pos = 0
      while pos < @record_data.length
        chunk_pos = pos + BIFF_LIMIT
        chunk = @record_data[pos...chunk_pos]
        chunks << chunk
        pos = chunk_pos
      end
      
      continues = [self.class::RECORD_ID, chunks[0].length].pack('v2') + chunks[0]
      chunks.each_with_index do |c, i|
        next if i == 0
        continues += [CONTINUE_RECORD_ID, c.length].pack('v2') + c
      end
      continues
    else
      record_header + @record_data
    end
  end
end

# Offset Size Contents
# 0      2    Version, contains 0600H for BIFF8 and BIFF8X
# 2      2    Type of the following data:
#               0005H = Workbook globals
#               0006H = Visual Basic module
#               0010H = Worksheet
#               0020H = Chart
#               0040H = Macro sheet
#               0100H = Workspace file
# 4      2    Build identifier
# 6      2    Build year
# 8      4    File history flags
# 12     4    Lowest Excel version that can read all records in this file
class Biff8BOFRecord < BiffRecord
  RECORD_ID = 0x0809
  
  # Stream Types
  BOOK_GLOBAL = 0x0005
  VB_MODULE   = 0x0006
  WORKSHEET   = 0x0010
  CHART       = 0x0020
  MACROSHEET  = 0x0040
  WORKSPACE   = 0x0100
  
  def initialize(rec_type)
    version  = 0x0600
    build    = 0x0DBB
    year     = 0x07CC
    file_hist_flags = 0x00
    ver_can_read    = 0x06
    
    @record_data = [version, rec_type, build, year, file_hist_flags, ver_can_read].pack('v4V2')
  end
end

class InterfaceHeaderRecord < BiffRecord
  RECORD_ID = 0x00E1
  
  def initialize
    @record_data = [0xB0, 0x04].pack('C2')
  end
end

class InterfaceEndRecord < BiffRecord
  RECORD_ID = 0x00E2
end

class MMSRecord < BiffRecord
  RECORD_ID = 0x00C1
  def initialize
    @record_data = [0x00].pack('v')
  end
end

# This record is part of the file protection. It contains the name of the
# user  that  has  saved  the  file. The user name is always stored as an
# equal-sized  string.  All  unused  characters after the name are filled
# with space characters. It is not required to write the mentioned string
# length. Every other length will be accepted too.
class WriteAccessRecord < BiffRecord
  RECORD_ID = 0x005C
  
  # TODO Can we extend this to 0x70? I think 0x30 is a holdover from Biff7 but 112 chars is Biff8.
  def initialize(owner)
    @record_data = [owner[0, 0x30]].pack('A112')
  end
end

# This  record  specifies  if the file contains an additional BIFF5/BIFF7
# workbook stream.
# Record DSF, BIFF8:
# Offset Size Contents
# 0        2     0 = Only the BIFF8 Workbook stream is present
#                1 = Additional BIFF5/BIFF7 Book stream is in the file
# A  double  stream file can be read by Excel 5.0 and Excel 95, and still
# contains  all  new  features  added to BIFF8 (which are left out in the
# BIFF5/BIFF7 Book stream).
class DSFRecord < BiffRecord
  RECORD_ID = 0x0161
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

class TabIDRecord < BiffRecord
  RECORD_ID = 0x013D
  
  def initialize(sheetcount)
    @record_data = ''
    for i in 1..sheetcount do
      @record_data += [i].pack('v')
    end
  end
end

class FnGroupCountRecord < BiffRecord
  RECORD_ID = 0x009C
  
  def initialize
    @record_data = [0x0E, 0X00].pack('C2')
  end
end

# This record is part of the worksheet/workbook protection. It determines
# whether  the window configuration of this document is protected. Window
# protection is not active, if this record is omitted.
class WindowProtectRecord < BiffRecord
  RECORD_ID = 0x0019
  
  def initialize(protect)
    @record_data = [protect].pack('v')
  end
end

# This record is part of the worksheet/workbook protection.
# It determines whether the objects of the current sheet are protected.
# Object protection is not active, if this record is omitted.
class ObjectProtectRecord < BiffRecord
  RECORD_ID = 0x0063
  
  def initialize(protect)
    @record_data = [protect].pack('v')
  end
end

# This record is part of the worksheet/workbook protection. It
# determines whether the scenarios of the current sheet are protected.
# Scenario protection is not active, if this record is omitted.
class ScenarioProtectRecord < BiffRecord
  RECORD_ID = 0x00DD
  
  def initialize(protect)
    @record_data = [protect].pack('v')
  end
end

# This record is part of the worksheet/workbook protection. It
# determines whether the scenarios of the current sheet are protected.
# Scenario protection is not active, if this record is omitted.
class ProtectRecord < BiffRecord
  RECORD_ID = 0x0012
  
  def initialize(protect)
    @record_data = [protect].pack('v')
  end
end

# This record is part of the worksheet/workbook protection. It
# stores a 16-bit hash value, calculated from the worksheet or workbook
# protection password.
class PasswordRecord < BiffRecord
  RECORD_ID = 0x0013
  
  def initialize(password = "")
    @record_data = [password_hash(password)].pack('v')
  end
  
  # Based on the algorithm provided by Daniel Rentz of OpenOffice.
  def password_hash(plaintext)
    return 0 if plaintext === ""
    hash = 0x0000
    plaintext.unpack('C*').each_with_index do |t, i|
      c = t << (i + 1)
      low_15 = c & 0x7fff
      high_15 = c & 0x7fff << 15
      high_15 = high_15 >> 15
      c = low_15 | high_15
      hash = (hash ^ c)
    end
    hash = (hash ^ plaintext.length)
    hash = (hash ^ 0xCE4B)
    hash
  end
end

class Prot4RevRecord < BiffRecord
  RECORD_ID = 0x01AF
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

class Prot4RevPassRecord < BiffRecord
  RECORD_ID = 0x01BC
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

# This  record  contains  a Boolean value determining whether Excel makes
# a backup of the file while saving.
class BackupRecord < BiffRecord
  RECORD_ID = 0x0040
  
  def initialize(backup)
    @record_data = [backup].pack('v')
  end
end

# This record specifies whether and how to show objects in the workbook.
# 
# Record HIDEOBJ, BIFF3-BIFF8:
# Offset  Size    Contents
# 0       2       Viewing mode for objects:
#                     0 = Show all objects
#                     1 = Show placeholders
#                     2 = Do not show objects
class HideObjRecord < BiffRecord
  RECORD_ID = 0x008D
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

class RefreshAllRecord < BiffRecord
  RECORD_ID = 0x01B7
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

# This record contains a Boolean value determining whether to save values
# linked  from external workbooks (CRN records and XCT records). In BIFF3
# and BIFF4 this option is stored in the WSBOOL record.
# 
# Record BOOKBOOL, BIFF5-BIFF8:
# 
# Offset  Size    Contents
# 0       2       0 = Save external linked values;
#                 1 = Do not save external linked values
class BookBoolRecord < BiffRecord
  RECORD_ID = 0x00DA
  
  def initialize
    @record_data = [0x00].pack('v')
  end
end

# This   record   stores  two  Windows  country  identifiers.  The  first
# represents  the  user  interface language of the Excel version that has
# saved  the file, and the second represents the system regional settings
# at the time the file was saved.
# 
# Record COUNTRY, BIFF3-BIFF8:
# 
# Offset  Size    Contents
# 0       2       Windows country identifier of the user interface language of Excel
# 2       2       Windows country identifier of the system regional settings
# 
# The  following  table  shows most of the used country identifiers. Most
# of  these  identifiers  are  equal to the international country calling
# codes.
# 
# 1   USA
# 2   Canada
# 7   Russia
class CountryRecord < BiffRecord
  RECORD_ID = 0x00DA
  
  def initialize(ui_id, sys_settings_id)
    @record_data = [ui_id, sys_settings_id].pack('v2')
  end
end

# This  record  specifies if the formulas in the workbook can use natural