require "date" require "nokogiri" require "htmlentities" require "json" require "pathname" require "open-uri" require "pp" module Asciidoctor module ISO module Utils def convert(node, transform = nil, opts = {}) transform ||= node.node_name opts.empty? ? (send transform, node) : (send transform, node, opts) end def document_ns_attributes(_doc) # ' xmlns="http://riboseinc.com/isoxml"' nil end def cleanup(xmldoc) intro_cleanup(xmldoc) termdef_cleanup(xmldoc) isotitle_cleanup(xmldoc) tablenote_cleanup(xmldoc) formula_cleanup(xmldoc) figure_cleanup(xmldoc) back_cleanup(xmldoc) ref_cleanup(xmldoc) end def intro_cleanup(xmldoc) intro = xmldoc.at("//introduction") foreword = xmldoc.at("//foreword") front = xmldoc.at("//front") unless foreword.nil? || front.nil? foreword.remove front << foreword end unless intro.nil? || front.nil? intro.remove front << intro end end def termdef_cleanup(xmldoc) # release termdef tags from surrounding paras nodes = xmldoc.xpath("//p/admitted_term | //p/termsymbol | //p/deprecated_term") while !nodes.empty? nodes[0].parent.replace(nodes[0].parent.children) nodes = xmldoc.xpath("//p/admitted_term | //p/termsymbol | //p/deprecated_term") end xmldoc.xpath("//termdef/p/stem").each do |a| if a.parent.elements.size == 1 # para containing just a stem expression t = Nokogiri::XML::Element.new("termsymbol", xmldoc) parent = a.parent a.remove t.children = a parent.replace(t) end end xmldoc.xpath("//p/termdomain").each do |a| prev = a.parent.previous a.remove prev.next = a end end def isotitle_cleanup(xmldoc) # Remove italicised ISO titles xmldoc.xpath("//isotitle").each do |a| if a.elements.size == 1 && a.elements[0].name == "em" a.children = a.elements[0].children end end end def tablenote_cleanup(xmldoc) # move notes after table footer xmldoc.xpath("//tfoot/tr/td/note | //tfoot/tr/th/note").each do |n| target = n.parent.parent.parent.parent n.remove target << n end end def formula_cleanup(xmldoc) # include where definition list inside stem block xmldoc.xpath("//formula").each do |s| if !s.next_element.nil? && s.next_element.name == "p" && s.next_element.content == "where" && !s.next_element.next_element.nil? && s.next_element.next_element.name == "dl" dl = s.next_element.next_element.remove s.next_element.remove s << dl end end end def figure_cleanup(xmldoc) # include key definition list inside figure xmldoc.xpath("//figure").each do |s| if !s.next_element.nil? && s.next_element.name == "p" && s.next_element.content =~ /^\s*Key\s*$/m && !s.next_element.next_element.nil? && s.next_element.next_element.name == "dl" dl = s.next_element.next_element.remove s.next_element.remove s << dl end end # examples containing only figures become subfigures of figures nodes = xmldoc.xpath("//example/figure") while !nodes.empty? nodes[0].parent.name = "figure" nodes = xmldoc.xpath("//example/figure") end end def back_cleanup(xmldoc) # move annex/bibliography to back if !xmldoc.xpath("//annex | //bibliography").empty? b = Nokogiri::XML::Element.new("back", xmldoc) xmldoc.root << b xmldoc.xpath("//annex").each do |e| e.remove b << e end xmldoc.xpath("//bibliography").each do |e| e.remove b << e end end end def ref_cleanup(xmldoc) # move ref before p xmldoc.xpath("//p/ref").each do |r| parent = r.parent r.remove parent.previous = r end xmldoc end # block for processing XML document fragments as XHTML, # to allow for HTMLentities def noko(&block) # fragment = ::Nokogiri::XML::DocumentFragment.parse("") # fragment.doc.create_internal_subset("xml", nil, "xhtml.dtd") head = < HERE doc = ::Nokogiri::XML.parse(head) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment.to_xml(encoding: "US-ASCII").lines.map do |l| l.gsub(/\s*\n/, "") end end def attr_code(attributes) attributes = attributes.reject { |_, val| val.nil? }.map attributes.map do |k, v| [k, (v.is_a? String) ? HTMLEntities.new.decode(v) : v] end.to_h end def current_location(node) if node.respond_to?(:lineno) && !node.lineno.nil? && !node.lineno.empty? return "Line #{node.lineno}" end if node.respond_to?(:id) && !node.id.nil? return "ID #{node.id}" end while !node.nil? && (!node.respond_to?(:level) || node.level.positive?) && node.context != :section node = node.parent if !node.nil? && node.context == :section return "Section: #{node.title}" end end "??" end # if node contains blocks, flatten them into a single line; # and extract only raw text def flatten_rawtext(node) result = [] if node.respond_to?(:blocks) && node.blocks? node.blocks.each { |b| result << flatten_rawtext(b) } elsif node.respond_to?(:lines) node.lines.each do |x| if node.respond_to?(:context) && (node.context == :literal || node.context == :listing) result << x.gsub(//, ">") else # strip not only HTML tags , # but also Asciidoc crossreferences <> result << x.gsub(/<[^>]*>+/, "") end end elsif node.respond_to?(:text) result << node.text.gsub(/<[^>]*>+/, "") else result << node.content.gsub(/<[^>]*>+/, "") end result.reject(&:empty?) end end end end