lib/html2doc/mime.rb in html2doc-0.7.0 vs lib/html2doc/mime.rb in html2doc-0.7.1
- old
+ new
@@ -82,9 +82,30 @@
i["src"] = new_full_filename
end
docxml
end
+ # do not parse the header through Nokogiri, since it will contain
+ # non-XML like <![if !supportFootnotes]>
+ def self.header_image_cleanup(doc, dir, filename)
+ doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
+ header_image_cleanup1(a, dir, filename)
+ end.join
+ end
+
+ def self.header_image_cleanup1(a, dir, filename)
+ if a.size == 2
+ matched = / src=['"](?<src>[^"']+)['"]/.match a[1]
+ matched2 = /\.(?<suffix>\S+)$/.match matched[:src]
+ uuid = UUIDTools::UUID.random_create.to_s
+ new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
+ dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
+ system "cp #{matched[:src]} #{dest_filename}"
+ a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
+ end
+ a.join
+ end
+
def self.generate_filelist(filename, dir)
File.open(File.join(dir, "filelist.xml"), "w") do |f|
f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
<o:MainFile HRef="../#{filename}.htm"/>}
Dir.entries(dir).sort.each do |item|