lib/asciidoctor/iso/cleanup.rb in asciidoctor-iso-0.6.1 vs lib/asciidoctor/iso/cleanup.rb in asciidoctor-iso-0.7.0

- old
+ new

@@ -13,73 +13,82 @@ text.gsub(/\s+<fn /, "<fn ") end def cleanup(xmldoc) sections_cleanup(xmldoc) + obligations_cleanup(xmldoc) termdef_cleanup(xmldoc) - isotitle_cleanup(xmldoc) table_cleanup(xmldoc) formula_cleanup(xmldoc) figure_cleanup(xmldoc) ref_cleanup(xmldoc) note_cleanup(xmldoc) normref_cleanup(xmldoc) reference_names(xmldoc) xref_cleanup(xmldoc) + bpart_cleanup(xmldoc) quotesource_cleanup(xmldoc) para_cleanup(xmldoc) callout_cleanup(xmldoc) origin_cleanup(xmldoc) element_name_cleanup(xmldoc) footnote_renumber(xmldoc) + empty_element_cleanup(xmldoc) + bookmark_cleanup(xmldoc) xmldoc end + TEXT_ELEMS = + %w{status language script version author name callout phone + email street city state country postcode identifier referenceFrom + referenceTo docidentifier prefix initial addition surname forename + title draft secretariat title-main title-intro title-part}.freeze + + def empty_element_cleanup(xmldoc) + xmldoc.xpath("//" + TEXT_ELEMS.join(" | //")).each do |x| + x.remove if x.children.empty? + end + end + def element_name_cleanup(xmldoc) xmldoc.traverse { |n| n.name = n.name.gsub(/_/, "-") } end - def callout_cleanup(xmldoc) + def link_callouts_to_annotations(callouts, annotations) + callouts.each_with_index do |c, i| + c["target"] = "_" + UUIDTools::UUID.random_create + annotations[i]["id"] = c["target"] + end + end + + def align_callouts_to_annotations(xmldoc) xmldoc.xpath("//sourcecode").each do |x| callouts = x.elements.select { |e| e.name == "callout" } annotations = x.elements.select { |e| e.name == "annotation" } if callouts.size == annotations.size - callouts.each_with_index do |c, i| - c["target"] = UUIDTools::UUID.random_create - annotations[i]["id"] = c["id"] - end + link_callouts_to_annotations(callouts, annotations) else - warn "#{x["id"]}: mismatch of callouts and annotations" + warn "#{x['id']}: mismatch of callouts and annotations" end end end - LOCALITY_REGEX_STR = <<~REGEXP - ^((?<locality>section|clause|part|paragraph|chapter|page)\\s+ - (?<ref>\\S+?)|(?<locality>whole))[,:]?\\s* - (?<text>.*)$ - REGEXP - LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""), - Regexp::IGNORECASE|Regexp::MULTILINE) - - def termdef_warn(text, re, term, msg) - re.match? text and warn "ISO style: #{term}: #{msg}" + def merge_annotations_into_sourcecode(xmldoc) + xmldoc.xpath("//sourcecode").each do |x| + while x&.next_element&.name == "annotation" + x.next_element.parent = x + end + end end - def termdef_style(xmldoc) - xmldoc.xpath("//term").each do |t| - para = t.at("./p") or return - term = t.at("preferred").text - termdef_warn(para.text, /^(the|a)\b/i, term, - "term definition starts with article") - termdef_warn(para.text, /\.$/i, term, - "term definition ends with period") - end + def callout_cleanup(xmldoc) + merge_annotations_into_sourcecode(xmldoc) + align_callouts_to_annotations(xmldoc) end def termdef_stem_cleanup(xmldoc) - xmldoc.xpath("//termdef/p/stem").each do |a| + xmldoc.xpath("//term/p/stem").each do |a| if a.parent.elements.size == 1 # para containing just a stem expression t = Nokogiri::XML::Element.new("admitted", xmldoc) parent = a.parent t.children = a.remove @@ -95,11 +104,11 @@ end end def termdefinition_cleanup(xmldoc) xmldoc.xpath("//term").each do |d| - first_child = d.at("./p | ./figure | ./formula") or return + first_child = d.at("./p | ./figure | ./formula") || return t = Nokogiri::XML::Element.new("definition", xmldoc) first_child.replace(t) t << first_child.remove d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove } end @@ -112,31 +121,68 @@ nodes[0].parent.replace(nodes[0].parent.children) nodes = xmldoc.xpath("//p/admitted | //p/deprecates") end end + def termdef_boilerplate_cleanup(xmldoc) + xmldoc.xpath("//terms/p | //terms/ul").each do |a| + a.remove + end + end + def termdef_cleanup(xmldoc) termdef_unnest_cleanup(xmldoc) termdef_stem_cleanup(xmldoc) termdomain_cleanup(xmldoc) termdefinition_cleanup(xmldoc) - termdef_style(xmldoc) + termdef_boilerplate_cleanup(xmldoc) end - ELEMS_ALLOW_NOTES = - %w[p formula quote sourcecode example admonition ul ol dl figure] + ELEMS_ALLOW_NOTES = + %w[p formula quote sourcecode example admonition ul ol dl figure].freeze # if a note is at the end of a section, it is left alone - # if a note is followed by a non-note block, + # if a note is followed by a non-note block, # it is moved inside its preceding block def note_cleanup(xmldoc) q = "//note[following-sibling::*[not(local-name() = 'note')]]" xmldoc.xpath(q).each do |n| next unless n.ancestors("table").empty? - prev = n.previous_element or next + prev = n.previous_element || next n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name end end + def empty_text_before_first_element(x) + x.children.each do |c| + if c.text? + return false if /\S/.match?(c.text) + end + return true if c.element? + end + true + end + + def strip_initial_space(x) + if x.children[0].text? + if !/\S/.match?(x.children[0].text) + x.children[0].remove + else + x.children[0].content = x.children[0].text.gsub(/^ /, "") + end + end + end + + def bookmark_cleanup(xmldoc) + xmldoc.xpath("//li[descendant::bookmark]").each do |x| + if x&.elements[0]&.name == "p" && + x&.elements[0]&.elements[0]&.name == "bookmark" + if empty_text_before_first_element(x.elements[0]) + x["id"] = (x.elements[0].elements[0].remove)["id"] + strip_initial_space(x.elements[0]) + end + end + end + end end end end