lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.3.0 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.3.1

- old
+ new

@@ -28,12 +28,12 @@ end # Read all documents within the docx and extract any merge fields def field_names tag_nodes.collect do |tag_node| - field_name_from tag_node - end.compact.uniq + field_names_from tag_node + end.flatten.compact.uniq end # List the documents stored within this docx def document_names @zip.entries.collect { |entry| entry.name } @@ -43,11 +43,11 @@ def data mapping = {} mapping = mapping.transform_keys(&:to_s) if verbose puts "...substitutions..." - fields.each do |key, value| + mapping.each do |key, value| puts " #{key} => #{value}" end end @documents.each do |name, document| @@ -74,10 +74,11 @@ attr_reader :verbose attr_reader :zip attr_reader :xml attr_reader :documents attr_accessor :output + TAG = /\{\{\s*(\S+)\s*\}\}/ def load_documents @zip.entries.each_with_object({}) do |entry, documents| next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/) puts "...reading #{entry.name}" if verbose @@ -98,42 +99,54 @@ # go through all w:t (Word Text???) nodes of the document # find any nodes that contain "{{" # then find the ancestor node that also includes the ending "}}" # This collection of nodes contains all the merge fields for this document def tag_nodes_for document - (document / "//w:t").collect do |node| - (node.children.any? { |child| child.content.include? "{{" }) ? enclosing_node_for_start_tag(node) : nil - end.compact + (document / "//w:p").select do |paragraph| + paragraph.content.match(TAG) + end end # Extract the merge field name from the node - def field_name_from(tag_node) - return nil unless (matches = tag_node.content.match(/{{\s*(\S+)\s*}}/)) - field_name = matches[1].strip - puts "...field #{field_name} found in #{name}" if verbose - field_name.to_s + def field_names_from(tag_node) + matches = tag_node.content.scan(TAG) + matches.empty? ? nil : matches.flatten.map(&:strip) end # Go through the given document, replacing any merge fields with the values provided # and storing the results in a new document def substitute document, document_name:, mapping: {} tag_nodes_for(document).each do |tag_node| - field_name = field_name_from tag_node - next unless mapping.has_key? field_name - field_value = mapping[field_name] - puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose - tag_node.content = tag_node.content.gsub(field_name, field_value).gsub(/{{\s*/, "").gsub(/\s*}}/, "") + field_names = field_names_from tag_node + puts "Original Node for #{field_names} is #{tag_node}" if verbose + + # Extract the paragraph properties node if it exists + paragraph_properties = tag_node.search ".//w:pPr" + run_properties = tag_node.at_xpath ".//w:rPr" + + text = tag_node.content + field_names.each do |field_name| + field_value = mapping[field_name].to_s + puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose + text = text.gsub(/{{\s*#{field_name}\s*}}/, field_value) + end + + # Create a new text node with the substituted text + text_node = Nokogiri::XML::Node.new("w:t", tag_node.document) + text_node.content = text + + # Create a new run node to hold the substituted text and the paragraph properties + run_node = Nokogiri::XML::Node.new("w:r", tag_node.document) + run_node << run_properties if run_properties + run_node << text_node + tag_node.children = Nokogiri::XML::NodeSet.new(document, paragraph_properties.to_a + [run_node]) + + puts "TAG NODE FOR #{field_names} IS #{tag_node}" if verbose rescue => ex # Have to mangle the exception message otherwise it outputs the entire document puts ex.message.to_s[0..255] end document - end - - # Find the ancestor node that contains both the start {{ text and the end }} text enclosing the merge field - def enclosing_node_for_start_tag(node) - return node if node.content.include? "}}" - node.parent.nil? ? nil : enclosing_node_for_start_tag(node.parent) end end end end