cleanup.rb in asciidoctor-iso-0.7.0

- old
+ new

@@ -13,73 +13,82 @@
         text.gsub(/\s+<fn /, "<fn ")
       end
 
       def cleanup(xmldoc)
         sections_cleanup(xmldoc)
+        obligations_cleanup(xmldoc)
         termdef_cleanup(xmldoc)
-        isotitle_cleanup(xmldoc)
         table_cleanup(xmldoc)
         formula_cleanup(xmldoc)
         figure_cleanup(xmldoc)
         ref_cleanup(xmldoc)
         note_cleanup(xmldoc)
         normref_cleanup(xmldoc)
         reference_names(xmldoc)
         xref_cleanup(xmldoc)
+        bpart_cleanup(xmldoc)
         quotesource_cleanup(xmldoc)
         para_cleanup(xmldoc)
         callout_cleanup(xmldoc)
         origin_cleanup(xmldoc)
         element_name_cleanup(xmldoc)
         footnote_renumber(xmldoc)
+        empty_element_cleanup(xmldoc)
+        bookmark_cleanup(xmldoc)
         xmldoc
       end
 
+      TEXT_ELEMS =
+        %w{status language script version author name callout phone
+           email street city state country postcode identifier referenceFrom
+           referenceTo docidentifier prefix initial addition surname forename
+           title draft secretariat title-main title-intro title-part}.freeze
+
+      def empty_element_cleanup(xmldoc)
+        xmldoc.xpath("//" + TEXT_ELEMS.join(" | //")).each do |x|
+          x.remove if x.children.empty?
+        end
+      end
+
       def element_name_cleanup(xmldoc)
         xmldoc.traverse { |n| n.name = n.name.gsub(/_/, "-") }
       end
 
-      def callout_cleanup(xmldoc)
+      def link_callouts_to_annotations(callouts, annotations)
+        callouts.each_with_index do |c, i|
+          c["target"] = "_" + UUIDTools::UUID.random_create
+          annotations[i]["id"] = c["target"]
+        end
+      end
+
+      def align_callouts_to_annotations(xmldoc)
         xmldoc.xpath("//sourcecode").each do |x|
           callouts = x.elements.select { |e| e.name == "callout" }
           annotations = x.elements.select { |e| e.name == "annotation" }
           if callouts.size == annotations.size
-            callouts.each_with_index do |c, i|
-              c["target"] = UUIDTools::UUID.random_create
-              annotations[i]["id"] = c["id"]
-            end
+            link_callouts_to_annotations(callouts, annotations)
           else
-            warn "#{x["id"]}: mismatch of callouts and annotations"
+            warn "#{x['id']}: mismatch of callouts and annotations"
           end
         end
       end
 
-      LOCALITY_REGEX_STR = <<~REGEXP
-        ^((?<locality>section|clause|part|paragraph|chapter|page)\\s+
-               (?<ref>\\S+?)|(?<locality>whole))[,:]?\\s*
-         (?<text>.*)$
-      REGEXP
-      LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""),
-                               Regexp::IGNORECASE|Regexp::MULTILINE)
-
-      def termdef_warn(text, re, term, msg)
-        re.match? text and warn "ISO style: #{term}: #{msg}"
+      def merge_annotations_into_sourcecode(xmldoc)
+        xmldoc.xpath("//sourcecode").each do |x|
+          while x&.next_element&.name == "annotation"
+            x.next_element.parent = x
+          end
+        end
       end
 
-      def termdef_style(xmldoc)
-        xmldoc.xpath("//term").each do |t|
-          para = t.at("./p") or return
-          term = t.at("preferred").text
-          termdef_warn(para.text, /^(the|a)\b/i, term,
-                       "term definition starts with article")
-          termdef_warn(para.text, /\.$/i, term,
-                       "term definition ends with period")
-        end
+      def callout_cleanup(xmldoc)
+        merge_annotations_into_sourcecode(xmldoc)
+        align_callouts_to_annotations(xmldoc)
       end
 
       def termdef_stem_cleanup(xmldoc)
-        xmldoc.xpath("//termdef/p/stem").each do |a|
+        xmldoc.xpath("//term/p/stem").each do |a|
           if a.parent.elements.size == 1
             # para containing just a stem expression
             t = Nokogiri::XML::Element.new("admitted", xmldoc)
             parent = a.parent
             t.children = a.remove
@@ -95,11 +104,11 @@
         end
       end
 
       def termdefinition_cleanup(xmldoc)
         xmldoc.xpath("//term").each do |d|
-          first_child = d.at("./p | ./figure | ./formula") or return
+          first_child = d.at("./p | ./figure | ./formula") || return
           t = Nokogiri::XML::Element.new("definition", xmldoc)
           first_child.replace(t)
           t << first_child.remove
           d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove }
         end
@@ -112,31 +121,68 @@
           nodes[0].parent.replace(nodes[0].parent.children)
           nodes = xmldoc.xpath("//p/admitted | //p/deprecates")
         end
       end
 
+      def termdef_boilerplate_cleanup(xmldoc)
+        xmldoc.xpath("//terms/p | //terms/ul").each do |a|
+          a.remove
+        end
+      end
+
       def termdef_cleanup(xmldoc)
         termdef_unnest_cleanup(xmldoc)
         termdef_stem_cleanup(xmldoc)
         termdomain_cleanup(xmldoc)
         termdefinition_cleanup(xmldoc)
-        termdef_style(xmldoc)
+        termdef_boilerplate_cleanup(xmldoc)
       end
 
-      ELEMS_ALLOW_NOTES = 
-        %w[p formula quote sourcecode example admonition ul ol dl figure]
+      ELEMS_ALLOW_NOTES =
+        %w[p formula quote sourcecode example admonition ul ol dl figure].freeze
 
       # if a note is at the end of a section, it is left alone
-      # if a note is followed by a non-note block, 
+      # if a note is followed by a non-note block,
       # it is moved inside its preceding block
       def note_cleanup(xmldoc)
         q = "//note[following-sibling::*[not(local-name() = 'note')]]"
         xmldoc.xpath(q).each do |n|
           next unless n.ancestors("table").empty?
-          prev = n.previous_element or next
+          prev = n.previous_element || next
           n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name
         end
       end
 
+      def empty_text_before_first_element(x)
+        x.children.each do |c|
+          if c.text?
+            return false if /\S/.match?(c.text)
+          end
+          return true if c.element?
+        end
+        true
+      end
+
+      def strip_initial_space(x)
+        if x.children[0].text?
+          if !/\S/.match?(x.children[0].text)
+            x.children[0].remove
+          else
+            x.children[0].content =  x.children[0].text.gsub(/^ /, "")
+          end
+        end
+      end
+
+      def bookmark_cleanup(xmldoc)
+        xmldoc.xpath("//li[descendant::bookmark]").each do |x|
+          if x&.elements[0]&.name == "p" &&
+              x&.elements[0]&.elements[0]&.name == "bookmark"
+            if empty_text_before_first_element(x.elements[0])
+              x["id"] = (x.elements[0].elements[0].remove)["id"]
+              strip_initial_space(x.elements[0])
+            end
+          end
+        end
+      end
     end
   end
 end