mime.rb in html2doc-0.7.1

- old
+ new
@@ -82,9 +82,30 @@
       i["src"] = new_full_filename
     end
     docxml
   end
 
+  # do not parse the header through Nokogiri, since it will contain 
+  # non-XML like <![if !supportFootnotes]>
+  def self.header_image_cleanup(doc, dir, filename)
+    doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
+      header_image_cleanup1(a, dir, filename)
+    end.join
+  end
+
+  def self.header_image_cleanup1(a, dir, filename)
+      if a.size == 2
+        matched = / src=['"](?<src>[^"']+)['"]/.match a[1]
+        matched2 = /\.(?<suffix>\S+)$/.match matched[:src]
+        uuid = UUIDTools::UUID.random_create.to_s
+        new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
+        dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
+        system "cp #{matched[:src]} #{dest_filename}"
+        a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
+      end
+      a.join
+  end
+
   def self.generate_filelist(filename, dir)
     File.open(File.join(dir, "filelist.xml"), "w") do |f|
       f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
         <o:MainFile HRef="../#{filename}.htm"/>}
       Dir.entries(dir).sort.each do |item|