lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.1.2 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.1.3

- old
+ new

@@ -4,16 +4,18 @@ require "nokogiri" module Consolidate module Docx class Merge - def self.open(path, force_settings: true, &block) - new(path, force_settings: force_settings, &block) + def self.open(path, verbose: false, &block) + new(path, verbose: verbose, &block) + path end def examine - extract_field_names + puts "Documents: #{extract_document_names}" + puts "Merge fields: #{extract_field_names}" end def data fields = {} fields = fields.transform_keys(&:to_s) @@ -36,50 +38,60 @@ end end protected + attr_reader :verbose attr_reader :zip attr_reader :xml attr_reader :documents attr_accessor :output - def initialize(path, force_settings: true, &block) + EXCLUSIONS = %w{_rels/.rels [Content_Types].xml word/_rels/document.xml.rels word/theme/theme1.xml word/settings.xml word/_rels/settings.xml.rels word/styles.xml word/webSettings.xml word/fontTable.xml docProps/core.xml docProps/app.xml} + + def initialize(path, verbose: false, &block) raise "No block given" unless block + @verbose = verbose @output = {} @documents = {} - set_standard_settings if force_settings begin @zip = Zip::File.open(path) - ["word/document.xml", "word/header1.xml", "word/footer1.xml"].each do |document| - next unless @zip.find_entry(document) - xml = @zip.read document - @documents[document] = Nokogiri::XML(xml) { |x| x.noent } - yield self + @zip.entries.each do |entry| + next if EXCLUSIONS.include? entry.name + puts "Reading #{entry.name}" if verbose + xml = @zip.get_input_stream entry + @documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent } end + yield self ensure @zip.close end end + def extract_document_names + @zip.entries.collect { |entry| entry.name }.join(", ") + end + def extract_field_names - (extract_style_one + extract_style_two).uniq + (extract_style_one + extract_style_two).uniq.join(", ") end def extract_style_one documents.collect do |name, document| (document / "//w:fldSimple").collect do |field| value = field.attributes["instr"].value.strip + puts "...found #{value} (v1) in #{name}" if verbose value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil end.compact end.flatten end def extract_style_two documents.collect do |name, document| (document / "//w:instrText").collect do |instr| value = instr.inner_text + puts "...found #{value} (v2) in #{name}" if verbose value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil end.compact end.flatten end @@ -87,10 +99,11 @@ # Word's first way of doing things (document / "//w:fldSimple").each do |field| if field.attributes["instr"].value =~ /MERGEFIELD (\S+)/ text_node = (field / ".//w:t").first next unless text_node + puts "...substituting v1 #{field.attributes["instr"]} with #{fields[$1]}" if verbose text_node.inner_html = fields[$1].to_s end end document end @@ -100,18 +113,14 @@ (document / "//w:instrText").each do |instr| if instr.inner_text =~ /MERGEFIELD (\S+)/ text_node = instr.parent.next_sibling.next_sibling.xpath(".//w:t").first text_node ||= instr.parent.next_sibling.next_sibling.next_sibling.xpath(".//w:t").first next unless text_node + puts "...substituting v2 #{instr.inner_text} with #{fields[$1]}" if verbose text_node.inner_html = fields[$1].to_s end end document - end - - def set_standard_settings - output["word/settings.xml"] = %(<?xml version="1.0" encoding="UTF-8" standalone="yes"?> -<w:settings xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main"><w:zoom w:percent="100"/></w:settings>) end def close zip.close end