lib/html2doc/mime.rb in html2doc-0.7.0 vs lib/html2doc/mime.rb in html2doc-0.7.1

- old
+ new

@@ -82,9 +82,30 @@ i["src"] = new_full_filename end docxml end + # do not parse the header through Nokogiri, since it will contain + # non-XML like <![if !supportFootnotes]> + def self.header_image_cleanup(doc, dir, filename) + doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a| + header_image_cleanup1(a, dir, filename) + end.join + end + + def self.header_image_cleanup1(a, dir, filename) + if a.size == 2 + matched = / src=['"](?<src>[^"']+)['"]/.match a[1] + matched2 = /\.(?<suffix>\S+)$/.match matched[:src] + uuid = UUIDTools::UUID.random_create.to_s + new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}" + dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}") + system "cp #{matched[:src]} #{dest_filename}" + a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'") + end + a.join + end + def self.generate_filelist(filename, dir) File.open(File.join(dir, "filelist.xml"), "w") do |f| f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office"> <o:MainFile HRef="../#{filename}.htm"/>} Dir.entries(dir).sort.each do |item|