lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb in picolena-0.2.0 vs lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb in picolena-0.2.2
- old
+ new
@@ -2,12 +2,12 @@
PlainTextExtractor.new {
every :ppt, :pps
as "application/powerpoint"
aka "Microsoft Office Powerpoint document"
- with "catppt SOURCE" => :on_linux_and_mac_os,
- "some other command" => :on_windows
+ extract_content_with "catppt SOURCE" => :on_linux_and_mac_os,
+ "some other command" => :on_windows
which_should_for_example_extract 'unofficial written by OOo Impress', :from => 'one_page.ppt'
#FIXME: it seems that catppt cannot open .pps files.
#or_extract 'a lightweight ferret-powered search engine written in Ruby on rails.', :from => 'picolena.pps'
}
@@ -16,11 +16,11 @@
require 'zip/zip'
PlainTextExtractor.new {
every :pptx
as 'application/vnd.openxmlformats-officedocument.presentationml.presentation' #could that mime BE any longer?
aka "Microsoft Office 2007 Powerpoint document"
- with {|source|
+ extract_content_with {|source|
Zip::ZipFile.open(source){|zipfile|
slides=zipfile.entries.select{|l| l.name=~/^ppt\/slides\/slide\d+.xml/}
slides.collect{|entry|
zipfile.read(entry).split(/</).grep(/^a:t/).collect{|l|
l.sub(/^[^>]+>/,'')
@@ -36,6 +36,6 @@
## Version tested: Catdoc Version 0.94.2
## Installation: Ubuntu catdoc package
## Home page: http://www.wagner.pp.ru/~vitus/software/catdoc/
## MS OOXML powerpoint to text conversion:
-## Ruby code written by Eric DUMINIL
\ No newline at end of file
+## Ruby code written by Eric DUMINIL