lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.1.2 vs lib/consolidate/docx/merge.rb in standard-procedure-consolidate-0.1.3
- old
+ new
@@ -4,16 +4,18 @@
require "nokogiri"
module Consolidate
module Docx
class Merge
- def self.open(path, force_settings: true, &block)
- new(path, force_settings: force_settings, &block)
+ def self.open(path, verbose: false, &block)
+ new(path, verbose: verbose, &block)
+ path
end
def examine
- extract_field_names
+ puts "Documents: #{extract_document_names}"
+ puts "Merge fields: #{extract_field_names}"
end
def data fields = {}
fields = fields.transform_keys(&:to_s)
@@ -36,50 +38,60 @@
end
end
protected
+ attr_reader :verbose
attr_reader :zip
attr_reader :xml
attr_reader :documents
attr_accessor :output
- def initialize(path, force_settings: true, &block)
+ EXCLUSIONS = %w{_rels/.rels [Content_Types].xml word/_rels/document.xml.rels word/theme/theme1.xml word/settings.xml word/_rels/settings.xml.rels word/styles.xml word/webSettings.xml word/fontTable.xml docProps/core.xml docProps/app.xml}
+
+ def initialize(path, verbose: false, &block)
raise "No block given" unless block
+ @verbose = verbose
@output = {}
@documents = {}
- set_standard_settings if force_settings
begin
@zip = Zip::File.open(path)
- ["word/document.xml", "word/header1.xml", "word/footer1.xml"].each do |document|
- next unless @zip.find_entry(document)
- xml = @zip.read document
- @documents[document] = Nokogiri::XML(xml) { |x| x.noent }
- yield self
+ @zip.entries.each do |entry|
+ next if EXCLUSIONS.include? entry.name
+ puts "Reading #{entry.name}" if verbose
+ xml = @zip.get_input_stream entry
+ @documents[entry.name] = Nokogiri::XML(xml) { |x| x.noent }
end
+ yield self
ensure
@zip.close
end
end
+ def extract_document_names
+ @zip.entries.collect { |entry| entry.name }.join(", ")
+ end
+
def extract_field_names
- (extract_style_one + extract_style_two).uniq
+ (extract_style_one + extract_style_two).uniq.join(", ")
end
def extract_style_one
documents.collect do |name, document|
(document / "//w:fldSimple").collect do |field|
value = field.attributes["instr"].value.strip
+ puts "...found #{value} (v1) in #{name}" if verbose
value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil
end.compact
end.flatten
end
def extract_style_two
documents.collect do |name, document|
(document / "//w:instrText").collect do |instr|
value = instr.inner_text
+ puts "...found #{value} (v2) in #{name}" if verbose
value.include?("MERGEFIELD") ? value.gsub("MERGEFIELD", "").strip : nil
end.compact
end.flatten
end
@@ -87,10 +99,11 @@
# Word's first way of doing things
(document / "//w:fldSimple").each do |field|
if field.attributes["instr"].value =~ /MERGEFIELD (\S+)/
text_node = (field / ".//w:t").first
next unless text_node
+ puts "...substituting v1 #{field.attributes["instr"]} with #{fields[$1]}" if verbose
text_node.inner_html = fields[$1].to_s
end
end
document
end
@@ -100,18 +113,14 @@
(document / "//w:instrText").each do |instr|
if instr.inner_text =~ /MERGEFIELD (\S+)/
text_node = instr.parent.next_sibling.next_sibling.xpath(".//w:t").first
text_node ||= instr.parent.next_sibling.next_sibling.next_sibling.xpath(".//w:t").first
next unless text_node
+ puts "...substituting v2 #{instr.inner_text} with #{fields[$1]}" if verbose
text_node.inner_html = fields[$1].to_s
end
end
document
- end
-
- def set_standard_settings
- output["word/settings.xml"] = %(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<w:settings xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main"><w:zoom w:percent="100"/></w:settings>)
end
def close
zip.close
end