Sha256: 0afbd0dee722c2e524f827b8158e5848395248e0b6d9520adbd48b7318343c82

Contents?: true

Size: 1.46 KB

Versions: 1

Compression:

Stored size: 1.46 KB

Contents

#Powerpoint 97-2003

PlainTextExtractor.new {
  every :ppt, :pps
  as "application/powerpoint"
  aka "Microsoft Office Powerpoint document"
  extract_content_with "catppt SOURCE" => :on_linux_and_mac_os,
                       "some other command" => :on_windows
  which_should_for_example_extract 'unofficial written by OOo Impress', :from => 'one_page.ppt'
  #FIXME: it seems that catppt cannot open .pps files.
  #or_extract 'a lightweight ferret-powered search engine written in Ruby on rails.', :from => 'picolena.pps'
}

#Powerpoint 2007

require 'zip/zip'
PlainTextExtractor.new {
  every :pptx
  as 'application/vnd.openxmlformats-officedocument.presentationml.presentation' #could that mime BE any longer?
  aka "Microsoft Office 2007 Powerpoint document"
  extract_content_with {|source|
    Zip::ZipFile.open(source){|zipfile|
      slides=zipfile.entries.select{|l| l.name=~/^ppt\/slides\/slide\d+.xml/}
      slides.collect{|entry|
        zipfile.read(entry).split(/</).grep(/^a:t/).collect{|l|
            l.sub(/^[^>]+>/,'')
          }
      }.join("\n")
    }
  }
  which_should_for_example_extract 'Welcome to Picolena (one more time!)', :from => 'office2007-powerpoint.pptx'
}

## Microsoft Powerpoint to text conversion:
##   Program: catppt
##   Version tested: Catdoc Version 0.94.2
##   Installation: Ubuntu catdoc package
##   Home page: http://www.wagner.pp.ru/~vitus/software/catdoc/

## MS OOXML powerpoint to text conversion:
## Ruby code written by Eric DUMINIL

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
picolena-0.2.2 lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb