lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.2.0 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.3.0

- old
+ new

@@ -9,50 +9,57 @@ def self.open(path, verbose: false, &block) new(path, verbose: verbose, &block) path end + def initialize(path, verbose: false, &block) + @verbose = verbose + @output = {} + @zip = Zip::File.open(path) + @documents = load_documents + block&.call self + end + + # Helper method to display the contents of the document and the merge fields from the CLI def examine documents = document_names.join(", ") fields = field_names.join(", ") puts "Documents: #{documents}" puts "Merge fields: #{fields}" end + # Read all documents within the docx and extract any merge fields def field_names - documents.collect do |name, document| - (document / "//w:t").collect do |text_node| - next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/)) - field_name = matches[1].strip - puts "...field #{field_name} found in #{name}" if verbose - field_name - end.compact - end.flatten + tag_nodes.collect do |tag_node| + field_name_from tag_node + end.compact.uniq end + # List the documents stored within this docx def document_names @zip.entries.collect { |entry| entry.name } end - def data fields = {} - fields = fields.transform_keys(&:to_s) + # Substitute the data from the merge fields with the values provided + def data mapping = {} + mapping = mapping.transform_keys(&:to_s) if verbose puts "...substitutions..." fields.each do |key, value| puts " #{key} => #{value}" end end @documents.each do |name, document| - result = document.dup - result = substitute result, fields, name + output_document = substitute document.dup, mapping: mapping, document_name: name - @output[name] = result.serialize save_with: 0 + @output[name] = output_document.serialize save_with: 0 end end + # Write the new document to the given path def write_to path puts "...writing to #{path}" if verbose Zip::File.open(path, Zip::File::CREATE) do |out| zip.each do |entry| out.get_output_stream(entry.name) do |o| @@ -60,53 +67,73 @@ end end end end - protected + private attr_reader :verbose attr_reader :zip attr_reader :xml attr_reader :documents attr_accessor :output - def initialize(path, verbose: false, &block) - raise "No block given" unless block - @verbose = verbose - @output = {} - @documents = {} - begin - @zip = Zip::File.open(path) - @zip.entries.each do |entry| - next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/) - puts "...reading #{entry.name}" if verbose - xml = @zip.get_input_stream entry - @documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent } - end - yield self - ensure - @zip.close + def load_documents + @zip.entries.each_with_object({}) do |entry, documents| + next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/) + puts "...reading #{entry.name}" if verbose + xml = @zip.get_input_stream entry + documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent } end + ensure + @zip.close end - def substitute document, fields, document_name - (document / "//w:t").each do |text_node| - next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/)) - field_name = matches[1].strip - if fields.has_key? field_name - field_value = fields[field_name] - puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose - text_node.content = text_node.content.gsub(matches[1], field_value).gsub("{{", "").gsub("}}", "") - elsif verbose - puts "...found #{field_name} but no replacement value" - end + # Collect all the nodes that contain merge fields + def tag_nodes + documents.collect do |name, document| + tag_nodes_for document + end.flatten + end + + # go through all w:t (Word Text???) nodes of the document + # find any nodes that contain "{{" + # then find the ancestor node that also includes the ending "}}" + # This collection of nodes contains all the merge fields for this document + def tag_nodes_for document + (document / "//w:t").collect do |node| + (node.children.any? { |child| child.content.include? "{{" }) ? enclosing_node_for_start_tag(node) : nil + end.compact + end + + # Extract the merge field name from the node + def field_name_from(tag_node) + return nil unless (matches = tag_node.content.match(/{{\s*(\S+)\s*}}/)) + field_name = matches[1].strip + puts "...field #{field_name} found in #{name}" if verbose + field_name.to_s + end + + # Go through the given document, replacing any merge fields with the values provided + # and storing the results in a new document + def substitute document, document_name:, mapping: {} + tag_nodes_for(document).each do |tag_node| + field_name = field_name_from tag_node + next unless mapping.has_key? field_name + field_value = mapping[field_name] + puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose + tag_node.content = tag_node.content.gsub(field_name, field_value).gsub(/{{\s*/, "").gsub(/\s*}}/, "") + rescue => ex + # Have to mangle the exception message otherwise it outputs the entire document + puts ex.message.to_s[0..255] end document end - def close - zip.close + # Find the ancestor node that contains both the start {{ text and the end }} text enclosing the merge field + def enclosing_node_for_start_tag(node) + return node if node.content.include? "}}" + node.parent.nil? ? nil : enclosing_node_for_start_tag(node.parent) end end end end