module Htmltoword
class Document
include XSLTHelper
class << self
include TemplatesHelper
def create(content, template_name = nil, extras = false)
template_name += extension if template_name && !template_name.end_with?(extension)
document = new(template_file(template_name))
document.replace_files(content, extras)
document.generate
end
def create_and_save(content, file_path, template_name = nil, extras = false)
File.open(file_path, 'wb') do |out|
out << create(content, template_name, extras)
end
end
def create_with_content(template, content, extras = false)
template += extension unless template.end_with?(extension)
document = new(template_file(template))
document.replace_files(content, extras)
document.generate
end
def extension
'.docx'
end
def doc_xml_file
'word/document.xml'
end
def numbering_xml_file
'word/numbering.xml'
end
def relations_xml_file
'word/_rels/document.xml.rels'
end
def content_types_xml_file
'[Content_Types].xml'
end
end
def initialize(template_path)
@replaceable_files = {}
@template_path = template_path
@image_files = []
end
#
# Generate a string representing the contents of a docx file.
#
def generate
Zip::File.open(@template_path) do |template_zip|
buffer = Zip::OutputStream.write_buffer do |out|
template_zip.each do |entry|
out.put_next_entry entry.name
if @replaceable_files[entry.name] && entry.name == Document.doc_xml_file
source = entry.get_input_stream.read
# Change only the body of document. TODO: Improve this...
source = source.sub(/()((.|\n)*?)(\s+, '><'))
source = xslt(stylesheet_name: 'cleanup').transform(original_source)
transform_and_replace(source, xslt_path('numbering'), Document.numbering_xml_file)
transform_and_replace(source, xslt_path('relations'), Document.relations_xml_file)
transform_doc_xml(source, extras)
local_images(source)
end
def transform_doc_xml(source, extras = false)
transformed_source = xslt(stylesheet_name: 'cleanup').transform(source)
transformed_source = xslt(stylesheet_name: 'inline_elements').transform(transformed_source)
transform_and_replace(transformed_source, document_xslt(extras), Document.doc_xml_file, extras)
end
private
def transform_and_replace(source, stylesheet_path, file, remove_ns = false)
stylesheet = xslt(stylesheet_path: stylesheet_path)
content = stylesheet.apply_to(source)
content.gsub!(/\s*xmlns:(\w+)="(.*?)\s*"/, '') if remove_ns
@replaceable_files[file] = content
end
#generates an array of hashes with filename and full url
#for all images to be embeded in the word document
def local_images(source)
source.css('img').each_with_index do |image,i|
filename = image['data-filename'] ? image['data-filename'] : image['src'].split("/").last
ext = File.extname(filename).delete(".").downcase
@image_files << { filename: "image#{i+1}.#{ext}", url: image['src'], ext: ext }
end
end
#get extension from filename and clean to match content_types
def content_type_from_extension(ext)
ext == "jpg" ? "jpeg" : ext
end
#inject the required content_types into the [content_types].xml file...
def inject_image_content_types(source)
doc = Nokogiri::XML(source)
#get a list of all extensions currently in content_types file
existing_exts = doc.css("Default").map { |node| node.attribute("Extension").value }.compact
#get a list of extensions we need for our images
required_exts = @image_files.map{ |i| i[:ext] }
#workout which required extensions are missing from the content_types file
missing_exts = (required_exts - existing_exts).uniq
#inject missing extensions into document
missing_exts.each do |ext|
doc.at_css("Types").add_child( "")
end
#return the amended source to be saved into the zip
doc.to_s
end
end
end