lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb in picolena-0.2.0 vs lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb in picolena-0.2.2

- old
+ new

@@ -2,12 +2,12 @@ PlainTextExtractor.new { every :ppt, :pps as "application/powerpoint" aka "Microsoft Office Powerpoint document" - with "catppt SOURCE" => :on_linux_and_mac_os, - "some other command" => :on_windows + extract_content_with "catppt SOURCE" => :on_linux_and_mac_os, + "some other command" => :on_windows which_should_for_example_extract 'unofficial written by OOo Impress', :from => 'one_page.ppt' #FIXME: it seems that catppt cannot open .pps files. #or_extract 'a lightweight ferret-powered search engine written in Ruby on rails.', :from => 'picolena.pps' } @@ -16,11 +16,11 @@ require 'zip/zip' PlainTextExtractor.new { every :pptx as 'application/vnd.openxmlformats-officedocument.presentationml.presentation' #could that mime BE any longer? aka "Microsoft Office 2007 Powerpoint document" - with {|source| + extract_content_with {|source| Zip::ZipFile.open(source){|zipfile| slides=zipfile.entries.select{|l| l.name=~/^ppt\/slides\/slide\d+.xml/} slides.collect{|entry| zipfile.read(entry).split(/</).grep(/^a:t/).collect{|l| l.sub(/^[^>]+>/,'') @@ -36,6 +36,6 @@ ## Version tested: Catdoc Version 0.94.2 ## Installation: Ubuntu catdoc package ## Home page: http://www.wagner.pp.ru/~vitus/software/catdoc/ ## MS OOXML powerpoint to text conversion: -## Ruby code written by Eric DUMINIL \ No newline at end of file +## Ruby code written by Eric DUMINIL