lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.2.0 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.3.0
- old
+ new
@@ -9,50 +9,57 @@
def self.open(path, verbose: false, &block)
new(path, verbose: verbose, &block)
path
end
+ def initialize(path, verbose: false, &block)
+ @verbose = verbose
+ @output = {}
+ @zip = Zip::File.open(path)
+ @documents = load_documents
+ block&.call self
+ end
+
+ # Helper method to display the contents of the document and the merge fields from the CLI
def examine
documents = document_names.join(", ")
fields = field_names.join(", ")
puts "Documents: #{documents}"
puts "Merge fields: #{fields}"
end
+ # Read all documents within the docx and extract any merge fields
def field_names
- documents.collect do |name, document|
- (document / "//w:t").collect do |text_node|
- next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/))
- field_name = matches[1].strip
- puts "...field #{field_name} found in #{name}" if verbose
- field_name
- end.compact
- end.flatten
+ tag_nodes.collect do |tag_node|
+ field_name_from tag_node
+ end.compact.uniq
end
+ # List the documents stored within this docx
def document_names
@zip.entries.collect { |entry| entry.name }
end
- def data fields = {}
- fields = fields.transform_keys(&:to_s)
+ # Substitute the data from the merge fields with the values provided
+ def data mapping = {}
+ mapping = mapping.transform_keys(&:to_s)
if verbose
puts "...substitutions..."
fields.each do |key, value|
puts " #{key} => #{value}"
end
end
@documents.each do |name, document|
- result = document.dup
- result = substitute result, fields, name
+ output_document = substitute document.dup, mapping: mapping, document_name: name
- @output[name] = result.serialize save_with: 0
+ @output[name] = output_document.serialize save_with: 0
end
end
+ # Write the new document to the given path
def write_to path
puts "...writing to #{path}" if verbose
Zip::File.open(path, Zip::File::CREATE) do |out|
zip.each do |entry|
out.get_output_stream(entry.name) do |o|
@@ -60,53 +67,73 @@
end
end
end
end
- protected
+ private
attr_reader :verbose
attr_reader :zip
attr_reader :xml
attr_reader :documents
attr_accessor :output
- def initialize(path, verbose: false, &block)
- raise "No block given" unless block
- @verbose = verbose
- @output = {}
- @documents = {}
- begin
- @zip = Zip::File.open(path)
- @zip.entries.each do |entry|
- next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/)
- puts "...reading #{entry.name}" if verbose
- xml = @zip.get_input_stream entry
- @documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent }
- end
- yield self
- ensure
- @zip.close
+ def load_documents
+ @zip.entries.each_with_object({}) do |entry, documents|
+ next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/)
+ puts "...reading #{entry.name}" if verbose
+ xml = @zip.get_input_stream entry
+ documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent }
end
+ ensure
+ @zip.close
end
- def substitute document, fields, document_name
- (document / "//w:t").each do |text_node|
- next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/))
- field_name = matches[1].strip
- if fields.has_key? field_name
- field_value = fields[field_name]
- puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose
- text_node.content = text_node.content.gsub(matches[1], field_value).gsub("{{", "").gsub("}}", "")
- elsif verbose
- puts "...found #{field_name} but no replacement value"
- end
+ # Collect all the nodes that contain merge fields
+ def tag_nodes
+ documents.collect do |name, document|
+ tag_nodes_for document
+ end.flatten
+ end
+
+ # go through all w:t (Word Text???) nodes of the document
+ # find any nodes that contain "{{"
+ # then find the ancestor node that also includes the ending "}}"
+ # This collection of nodes contains all the merge fields for this document
+ def tag_nodes_for document
+ (document / "//w:t").collect do |node|
+ (node.children.any? { |child| child.content.include? "{{" }) ? enclosing_node_for_start_tag(node) : nil
+ end.compact
+ end
+
+ # Extract the merge field name from the node
+ def field_name_from(tag_node)
+ return nil unless (matches = tag_node.content.match(/{{\s*(\S+)\s*}}/))
+ field_name = matches[1].strip
+ puts "...field #{field_name} found in #{name}" if verbose
+ field_name.to_s
+ end
+
+ # Go through the given document, replacing any merge fields with the values provided
+ # and storing the results in a new document
+ def substitute document, document_name:, mapping: {}
+ tag_nodes_for(document).each do |tag_node|
+ field_name = field_name_from tag_node
+ next unless mapping.has_key? field_name
+ field_value = mapping[field_name]
+ puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose
+ tag_node.content = tag_node.content.gsub(field_name, field_value).gsub(/{{\s*/, "").gsub(/\s*}}/, "")
+ rescue => ex
+ # Have to mangle the exception message otherwise it outputs the entire document
+ puts ex.message.to_s[0..255]
end
document
end
- def close
- zip.close
+ # Find the ancestor node that contains both the start {{ text and the end }} text enclosing the merge field
+ def enclosing_node_for_start_tag(node)
+ return node if node.content.include? "}}"
+ node.parent.nil? ? nil : enclosing_node_for_start_tag(node.parent)
end
end
end
end