lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.1.4 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.2.0

- old
+ new

@@ -10,27 +10,51 @@ new(path, verbose: verbose, &block) path end def examine - puts "Documents: #{extract_document_names}" - puts "Merge fields: #{extract_field_names}" + documents = document_names.join(", ") + fields = field_names.join(", ") + puts "Documents: #{documents}" + puts "Merge fields: #{fields}" end + def field_names + documents.collect do |name, document| + (document / "//w:t").collect do |text_node| + next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/)) + field_name = matches[1].strip + puts "...field #{field_name} found in #{name}" if verbose + field_name + end.compact + end.flatten + end + + def document_names + @zip.entries.collect { |entry| entry.name } + end + def data fields = {} fields = fields.transform_keys(&:to_s) + if verbose + puts "...substitutions..." + fields.each do |key, value| + puts " #{key} => #{value}" + end + end + @documents.each do |name, document| result = document.dup - result = substitute_style_one_with result, fields - result = substitute_style_two_with result, fields + result = substitute result, fields, name @output[name] = result.serialize save_with: 0 end end def write_to path + puts "...writing to #{path}" if verbose Zip::File.open(path, Zip::File::CREATE) do |out| zip.each do |entry| out.get_output_stream(entry.name) do |o| o.write(output[entry.name] || zip.read(entry.name)) end @@ -52,70 +76,30 @@ @output = {} @documents = {} begin @zip = Zip::File.open(path) @zip.entries.each do |entry| - next unless entry.name =~ /word\/(document|header|footer|footnotes|endnotes).?\.xml/ - puts "Reading #{entry.name}" if verbose + next unless entry.name.match?(/word\/(document|header|footer|footnotes|endnotes).?\.xml/) + puts "...reading #{entry.name}" if verbose xml = @zip.get_input_stream entry @documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent } end yield self ensure @zip.close end end - def extract_document_names - @zip.entries.collect { |entry| entry.name }.join(", ") - end - - def extract_field_names - (extract_style_one + extract_style_two).uniq.join(", ") - end - - def extract_style_one - documents.collect do |name, document| - (document / "//w:fldSimple").collect do |field| - value = field.attributes["instr"].value.strip - puts "...found #{value} (v1) in #{name}" if verbose - value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil - end.compact - end.flatten - end - - def extract_style_two - documents.collect do |name, document| - (document / "//w:instrText").collect do |instr| - value = instr.inner_text - puts "...found #{value} (v2) in #{name}" if verbose - value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil - end.compact - end.flatten - end - - def substitute_style_one_with document, fields - # Word's first way of doing things - (document / "//w:fldSimple").each do |field| - if field.attributes["instr"].value =~ /MERGEFIELD (\S+)/ - text_node = (field / ".//w:t").first - next unless text_node - puts "...substituting v1 #{field.attributes["instr"]} with #{fields[$1]}" if verbose - text_node.inner_html = fields[$1].to_s - end - end - document - end - - def substitute_style_two_with document, fields - # Word's second way of doing things - (document / "//w:instrText").each do |instr| - if instr.inner_text =~ /MERGEFIELD (\S+)/ - text_node = instr.parent.next_sibling.next_sibling.xpath(".//w:t").first - text_node ||= instr.parent.next_sibling.next_sibling.next_sibling.xpath(".//w:t").first - next unless text_node - puts "...substituting v2 #{instr.inner_text} with #{fields[$1]}" if verbose - text_node.inner_html = fields[$1].to_s + def substitute document, fields, document_name + (document / "//w:t").each do |text_node| + next unless (matches = text_node.content.match(/{{\s*(\S+)\s*}}/)) + field_name = matches[1].strip + if fields.has_key? field_name + field_value = fields[field_name] + puts "...substituting #{field_name} with #{field_value} in #{document_name}" if verbose + text_node.content = text_node.content.gsub(matches[1], field_value).gsub("{{", "").gsub("}}", "") + elsif verbose + puts "...found #{field_name} but no replacement value" end end document end