Sha256: dde202be063d588325f22b96ec3207c0d0df4d633c04c8d5a94d8fb63e08bc02
Contents?: true
Size: 1.22 KB
Versions: 3
Compression:
Stored size: 1.22 KB
Contents
#Word 97-2003 PlainText.extract { from :doc, :dot as "application/msword" aka "Microsoft Office Word document" with "antiword SOURCE" => :on_linux, "some other command" => :on_windows which_should_for_example_extract 'district heating', :from => 'Types of malfunction in DH substations.doc' or_extract 'Basic Word template for Picolena specs', :from => 'office2003-word-template.dot' } #Word 2007 require 'zip/zip' PlainText.extract { from :docx, :dotx as 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' aka "Microsoft Office 2007 Word document" with {|source| Zip::ZipFile.open(source){|zipfile| zipfile.read("word/document.xml").split(/</).grep(/^w:t/).collect{|l| l.sub(/^[^>]+>/,'') }.join("\n") } } which_should_for_example_extract 'Can this office 2007 document be indexed\?', :from => 'office2007-word.docx' or_extract 'Basic Word 2007 template for Picolena specs', :from => 'office2007-word-template.dotx' } ## Microsoft Word to text conversion: ## Program: antiword ## Version tested: 0.37 ## Installation: Ubuntu antiword package ## Home page: http://www.winfield.demon.nl/ ## MS OOXML word to text conversion: ## Ruby code written by Eric DUMINIL
Version data entries
3 entries across 3 versions & 1 rubygems