module Avv2word class Document include XSLTHelper class << self include TemplatesHelper def extract_images_html(html) require "base64" require "fileutils" # create working dir FileUtils.mkdir_p("tmp_imgs") resize = 1 # future feature; half size: 0.5 doc = Nokogiri::HTML.fragment(html) doc.css("img").each_with_index do |img_elm,i| mime_type, img_data = img_elm.attributes["src"].value.split(",") # ^--data:image/jpeg;base64 ext = mime_type.match(/image\/(\w+?);base64/)[1] img_path = "tmp_imgs/image#{i+1}.#{ext}" File.open(img_path,"wb"){|f| f.write Base64.decode64(img_data)} img_elm.attributes["src"].value = img_path match_dimensions = %x( identify #{img_path} ).match(/(?\d+)x(?\d+)/) if match_dimensions img_elm["style"] = "width:#{match_dimensions[:x].to_i*resize}px;height:#{match_dimensions[:y].to_i*resize}px" else img_elm["style"] = "width:100px;height:100px" end # img_elm["style"] = "width:100.5px;height:100.5px" # alt: style="width:350px;height:150px" # alt: img_elm["data-width"] = "236px" end doc.to_html end def escape_footnotes(html) doc = Nokogiri::HTML.fragment(html) nodes_with_footnotes = doc.css("footnote") nodes_with_footnotes.each do |node| node.name = "footnote" node.content = node.attributes["data-value"] node.attributes.map{|k,v| node.attributes[k].remove } end doc.to_html end def create(content, template_name = nil, extras = false) content = extract_images_html(content) content = escape_footnotes(content) template_name += extension if template_name && !template_name.end_with?(extension) document = new(template_file(template_name)) document.replace_files(content, extras) document.generate end def create_and_save(content, file_path, template_name = nil, extras = false) File.open(file_path, 'wb') do |out| out << create(content, template_name, extras) end end def create_with_content(template, content, extras = false) content = extract_images_html(content) template += extension unless template.end_with?(extension) document = new(template_file(template)) document.replace_files(content, extras) document.generate end def extension '.docx' end def doc_xml_file 'word/document.xml' end def numbering_xml_file 'word/numbering.xml' end def relations_xml_file 'word/_rels/document.xml.rels' end def footer_xml_file 'word/footer.xml' end def header_xml_file 'word/header.xml' end def content_types_xml_file '[Content_Types].xml' end def footnotes_xml_file 'word/footnotes.xml' end end def initialize(template_path) @replaceable_files = {} @template_path = template_path @image_files = [] end # # Generate a string representing the contents of a docx file. # def generate Zip::File.open(@template_path) do |template_zip| buffer = Zip::OutputStream.write_buffer do |out| template_zip.each do |entry| next if entry.name =~ /\/$/ out.put_next_entry entry.name if @replaceable_files[entry.name] && entry.name == Document.doc_xml_file source = entry.get_input_stream.read # Change only the body of document. TODO: Improve this... source = source.sub(/()((.|\n)*?)(-->','') if @header source.sub!('','') if @footer end out.write(source) elsif @replaceable_files[entry.name] out.write(@replaceable_files[entry.name]) elsif entry.name == Document.content_types_xml_file raw_file = entry.get_input_stream.read content_types = @image_files.empty? ? raw_file : inject_image_content_types(raw_file) out.write(content_types) else out.write(template_zip.read(entry.name)) end end unless @image_files.empty? #stream the image files into the media folder using open-uri @image_files.each do |hash| out.put_next_entry("word/media/#{hash[:filename]}") open(hash[:url], 'rb') do |f| out.write(f.read) end end end %w( word/_rels/header.xml.rels word/_rels/footer.xml.rels ).each do |f| if @replaceable_files[f] out.put_next_entry f out.write(@replaceable_files[f]) end end end buffer.string end end def replace_files(html, extras = false) html = '' if html.nil? || html.empty? header_html = (html =~ /(
.*?<\/header>)/m ? $1 : '') footer_html = (html =~ /(