require 'ole/types/property_set' module Ole class Storage # # The MetaData class is designed to be high level interface to all the # underlying meta data stored within different sections, themselves within # different property set streams. # # With this class, you can simply get properties using their names, without # needing to know about the underlying guids, property ids etc. # # Example: # # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author } # # TODO: # # * add write support # * fix some of the missing type coercion (eg FileTime) # * maybe add back the ability to access individual property sets as a unit # directly. ie ole.summary_information. Is this useful? # * full key support, for unknown keys, like # ole.meta_data[myguid, myid]. probably needed for user-defined # properties too. # class MetaData include Enumerable FILE_MAP = { Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation" } FORMAT_MAP = { 'MSWordDoc' => :doc } CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}" CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}" CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}" CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}" CLSID_MAP = { CLSID_EXCEL97 => :xls, CLSID_EXCEL95 => :xls, CLSID_WORD97 => :doc, CLSID_WORD95 => :doc } MIME_TYPES = { :xls => 'application/vnd.ms-excel', :doc => 'application/msword', :ppt => 'application/vnd.ms-powerpoint', :msg => 'application/vnd.ms-outlook' # not registered at IANA, but sdeems most common usage } def initialize ole @ole = ole end # i'm thinking of making file_format and mime_type available through # #[], #each, and #to_h also, as calculated meta data (not assignable) def comp_obj return {} unless dirent = @ole.root["\001CompObj"] data = dirent.read # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html # compobj_version: 0x0001 # byte_order: 0xffe # windows_version: 0x00000a03 (win31 apparently) # marker: 0xffffffff compobj_version, byte_order, windows_version, marker, clsid = data.unpack("vvVVa#{Types::Clsid::SIZE}") strings = [] i = 28 while i < data.length len = data[i, 4].unpack('V').first i += 4 strings << data[i, len - 1] i += len end # in the unknown chunk, you usually see something like 'Word.Document.6' {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]} end private :comp_obj def file_format comp_obj[:file_format] end def mime_type # based on the CompObj stream contents type = FORMAT_MAP[file_format] return MIME_TYPES[type] if type # based on the root clsid type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)] return MIME_TYPES[type] if type # fallback to heuristics has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten] return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0'] return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document'] return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book'] end def [] key pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil file = FILE_MAP[pair.first] or return nil dirent = @ole.root[file] or return nil dirent.open { |io| return Types::PropertySet.new(io)[key] } end def []= key, value raise NotImplementedError, 'meta data writes not implemented' end def each(&block) FILE_MAP.values.each do |file| dirent = @ole.root[file] or next dirent.open { |io| Types::PropertySet.new(io).each(&block) } end end def to_h inject({}) { |hash, (name, value)| hash.update name.to_sym => value } end def method_missing name, *args, &block return super unless args.empty? pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super self[name] end end def meta_data @meta_data ||= MetaData.new(self) end end end