Sha256: b3091baefcf0c946eb385900ce6d58557bf8a030983044697d606637ddabc344

Contents?: true

Size: 1.9 KB

Versions: 2

Compression:

Stored size: 1.9 KB

Contents

require 'rubygems'
require 'zip/zip'
File.expand_path("#{File.dirname(__FILE__)}/version")

class Hypodermic

  MIME_TYPES = {
    '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
    '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
    '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
  }

  def self.extract(path, opts = {})
    mime_type = opts[:mime_type] || MIME_TYPES[File.extname(path).downcase]
    document = self.document(path, mime_type)
    
    if opts[:thumbnail]
      thumbnail = self.thumbnail(path)
    end
    
    return document, thumbnail
  end
  
  private
  
  def self.document(path, mime_type)
    case mime_type
    when 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
      doc_xml = self.xml_from_word(path)
    when 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
      doc_xml = self.xml_from_excel(path)
    when 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
      doc_xml = self.xml_from_powerpoint(path)
    else
      raise ArugmentError, "Invalid file!"
    end
    doc_xml.gsub(/<.*?>/, ' ')
  end
  
  def self.xml_from_word(path)
    Zip::ZipFile.open(path) do |z|
      z.read('word/document.xml')
    end
  end
  
  def self.xml_from_excel(path)
    Zip::ZipInputStream::open(path) do |io|
      xml = ''
      while(entry = io.get_next_entry)
        xml << io.read if entry.name =~ /(xl\/worksheets)|(sharedStrings.xml)/
      end
      xml
    end      
  end
  
  def self.xml_from_powerpoint(path)
    Zip::ZipInputStream::open(path) do |io|
      xml = ''
      while(entry = io.get_next_entry)
        xml << io.read if entry.name =~ /(ppt\/slides)|(presentation.xml)/
      end
      xml
    end
  end
  
  def self.thumbnail(path)
    Zip::ZipFile.open(path) { |z| z.read('docProps/thumbnail.jpeg') }
  end
      
end

Version data entries

2 entries across 2 versions & 2 rubygems

Version Path
loe-hypodermic-0.1.2 lib/hypodermic.rb
hypodermic-0.1.2 lib/hypodermic.rb