cleanup.rb in asciidoctor-iso-0.6.1

- old
+ new

@@ -1,31 +1,33 @@
 require "date"
 require "nokogiri"
-require "htmlentities"
-require "json"
 require "pathname"
 require "open-uri"
 require "pp"
 require_relative "./cleanup_block.rb"
+require_relative "./cleanup_ref.rb"
 
 module Asciidoctor
   module ISO
     module Cleanup
       def textcleanup(text)
         text.gsub(/\s+<fn /, "<fn ")
       end
 
       def cleanup(xmldoc)
+        sections_cleanup(xmldoc)
         termdef_cleanup(xmldoc)
         isotitle_cleanup(xmldoc)
         table_cleanup(xmldoc)
         formula_cleanup(xmldoc)
         figure_cleanup(xmldoc)
         ref_cleanup(xmldoc)
-        review_note_cleanup(xmldoc)
+        note_cleanup(xmldoc)
         normref_cleanup(xmldoc)
+        reference_names(xmldoc)
         xref_cleanup(xmldoc)
+        quotesource_cleanup(xmldoc)
         para_cleanup(xmldoc)
         callout_cleanup(xmldoc)
         origin_cleanup(xmldoc)
         element_name_cleanup(xmldoc)
         footnote_renumber(xmldoc)
@@ -49,41 +51,20 @@
             warn "#{x["id"]}: mismatch of callouts and annotations"
           end
         end
       end
 
-      def xref_cleanup(xmldoc)
-        reference_names(xmldoc)
-        xmldoc.xpath("//xref").each do |x|
-          #if InlineAnchor::is_refid? x["target"]
-          if is_refid? x["target"]
-            x.name = "eref"
-            x["bibitemid"] = x["target"]
-            x["citeas"] = @anchors[x["target"]][:xref]
-            x.delete("target")
-          else
-            x.delete("type")
-          end
-        end
-      end
+      LOCALITY_REGEX_STR = <<~REGEXP
+        ^((?<locality>section|clause|part|paragraph|chapter|page)\\s+
+               (?<ref>\\S+?)|(?<locality>whole))[,:]?\\s*
+         (?<text>.*)$
+      REGEXP
+      LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""),
+                               Regexp::IGNORECASE|Regexp::MULTILINE)
 
-      def origin_cleanup(xmldoc)
-        xmldoc.xpath("//origin").each do |x|
-          x["citeas"] = @anchors[x["bibitemid"]][:xref]
-          n = x.next_element
-          if !n.nil? && n.name == "isosection"
-            n.name = "locality"
-            n["type"] = "section"
-            n.parent = x
-          end
-        end
-      end
-
       def termdef_warn(text, re, term, msg)
-        if re.match? text
-          warn "ISO style: #{term}: #{msg}"
-        end
+        re.match? text and warn "ISO style: #{term}: #{msg}"
       end
 
       def termdef_style(xmldoc)
         xmldoc.xpath("//term").each do |t|
           para = t.at("./p") or return
@@ -118,13 +99,11 @@
         xmldoc.xpath("//term").each do |d|
           first_child = d.at("./p | ./figure | ./formula") or return
           t = Nokogiri::XML::Element.new("definition", xmldoc)
           first_child.replace(t)
           t << first_child.remove
-          d.xpath("./p | ./figure | ./formula").each do |n|
-            t << n.remove
-          end
+          d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove }
         end
       end
 
       def termdef_unnest_cleanup(xmldoc)
         # release termdef tags from surrounding paras
@@ -141,72 +120,21 @@
         termdomain_cleanup(xmldoc)
         termdefinition_cleanup(xmldoc)
         termdef_style(xmldoc)
       end
 
-      def isotitle_cleanup(xmldoc)
-        # Remove italicised ISO titles
-        xmldoc.xpath("//isotitle").each do |a|
-          if a.elements.size == 1 && a.elements[0].name == "em"
-            a.children = a.elements[0].children
-          end
-        end
-      end
+      ELEMS_ALLOW_NOTES = 
+        %w[p formula quote sourcecode example admonition ul ol dl figure]
 
-      def dl_table_cleanup(xmldoc)
-        # move Key dl after table footer
-        q = "//table/following-sibling::*[1]"\
-          "[self::p and normalize-space() = 'Key']"
-        xmldoc.xpath(q).each do |s|
-          if !s.next_element.nil? && s.next_element.name == "dl"
-            s.previous_element << s.next_element.remove
-            s.remove
-          end
-        end
-      end
-
-      def ref_cleanup(xmldoc)
-        # move ref before p
-        xmldoc.xpath("//p/ref").each do |r|
-          parent = r.parent
-          parent.previous = r.remove
-        end
-        xmldoc
-      end
-
-      def review_note_cleanup(xmldoc)
-        xmldoc.xpath("//review").each do |n|
-          prev = n.previous_element
-          if !prev.nil? && prev.name == "p"
-            n.parent = prev
-          end
-        end
-      end
-
-      def normref_cleanup(xmldoc)
-        q = "//references[title = 'Normative References']"
-        r = xmldoc.at(q)
-        r.elements.each do |n|
-          unless ["title", "bibitem"].include? n.name
-            n.remove
-          end
-        end
-      end
-
-      def format_ref(ref, isopub)
-        return "ISO #{ref}" if isopub
-        return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref) 
-        ref
-      end
-
-      def reference_names(xmldoc)
-        xmldoc.xpath("//bibitem").each do |ref|
-          isopub = ref.at(("./publisher/affiliation[name = 'ISO']"))
-          docid = ref.at(("./docidentifier"))
-          date = ref.at(("./publisherdate"))
-          reference = format_ref(docid.text, isopub)
-          reference += ": #{date.text}" if date && isopub
-          @anchors[ref["id"]] = { xref: reference }
+      # if a note is at the end of a section, it is left alone
+      # if a note is followed by a non-note block, 
+      # it is moved inside its preceding block
+      def note_cleanup(xmldoc)
+        q = "//note[following-sibling::*[not(local-name() = 'note')]]"
+        xmldoc.xpath(q).each do |n|
+          next unless n.ancestors("table").empty?
+          prev = n.previous_element or next
+          n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name
         end
       end
 
     end
   end