lib/asciidoctor/iso/cleanup.rb in asciidoctor-iso-0.6.1 vs lib/asciidoctor/iso/cleanup.rb in asciidoctor-iso-0.7.0
- old
+ new
@@ -13,73 +13,82 @@
text.gsub(/\s+<fn /, "<fn ")
end
def cleanup(xmldoc)
sections_cleanup(xmldoc)
+ obligations_cleanup(xmldoc)
termdef_cleanup(xmldoc)
- isotitle_cleanup(xmldoc)
table_cleanup(xmldoc)
formula_cleanup(xmldoc)
figure_cleanup(xmldoc)
ref_cleanup(xmldoc)
note_cleanup(xmldoc)
normref_cleanup(xmldoc)
reference_names(xmldoc)
xref_cleanup(xmldoc)
+ bpart_cleanup(xmldoc)
quotesource_cleanup(xmldoc)
para_cleanup(xmldoc)
callout_cleanup(xmldoc)
origin_cleanup(xmldoc)
element_name_cleanup(xmldoc)
footnote_renumber(xmldoc)
+ empty_element_cleanup(xmldoc)
+ bookmark_cleanup(xmldoc)
xmldoc
end
+ TEXT_ELEMS =
+ %w{status language script version author name callout phone
+ email street city state country postcode identifier referenceFrom
+ referenceTo docidentifier prefix initial addition surname forename
+ title draft secretariat title-main title-intro title-part}.freeze
+
+ def empty_element_cleanup(xmldoc)
+ xmldoc.xpath("//" + TEXT_ELEMS.join(" | //")).each do |x|
+ x.remove if x.children.empty?
+ end
+ end
+
def element_name_cleanup(xmldoc)
xmldoc.traverse { |n| n.name = n.name.gsub(/_/, "-") }
end
- def callout_cleanup(xmldoc)
+ def link_callouts_to_annotations(callouts, annotations)
+ callouts.each_with_index do |c, i|
+ c["target"] = "_" + UUIDTools::UUID.random_create
+ annotations[i]["id"] = c["target"]
+ end
+ end
+
+ def align_callouts_to_annotations(xmldoc)
xmldoc.xpath("//sourcecode").each do |x|
callouts = x.elements.select { |e| e.name == "callout" }
annotations = x.elements.select { |e| e.name == "annotation" }
if callouts.size == annotations.size
- callouts.each_with_index do |c, i|
- c["target"] = UUIDTools::UUID.random_create
- annotations[i]["id"] = c["id"]
- end
+ link_callouts_to_annotations(callouts, annotations)
else
- warn "#{x["id"]}: mismatch of callouts and annotations"
+ warn "#{x['id']}: mismatch of callouts and annotations"
end
end
end
- LOCALITY_REGEX_STR = <<~REGEXP
- ^((?<locality>section|clause|part|paragraph|chapter|page)\\s+
- (?<ref>\\S+?)|(?<locality>whole))[,:]?\\s*
- (?<text>.*)$
- REGEXP
- LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""),
- Regexp::IGNORECASE|Regexp::MULTILINE)
-
- def termdef_warn(text, re, term, msg)
- re.match? text and warn "ISO style: #{term}: #{msg}"
+ def merge_annotations_into_sourcecode(xmldoc)
+ xmldoc.xpath("//sourcecode").each do |x|
+ while x&.next_element&.name == "annotation"
+ x.next_element.parent = x
+ end
+ end
end
- def termdef_style(xmldoc)
- xmldoc.xpath("//term").each do |t|
- para = t.at("./p") or return
- term = t.at("preferred").text
- termdef_warn(para.text, /^(the|a)\b/i, term,
- "term definition starts with article")
- termdef_warn(para.text, /\.$/i, term,
- "term definition ends with period")
- end
+ def callout_cleanup(xmldoc)
+ merge_annotations_into_sourcecode(xmldoc)
+ align_callouts_to_annotations(xmldoc)
end
def termdef_stem_cleanup(xmldoc)
- xmldoc.xpath("//termdef/p/stem").each do |a|
+ xmldoc.xpath("//term/p/stem").each do |a|
if a.parent.elements.size == 1
# para containing just a stem expression
t = Nokogiri::XML::Element.new("admitted", xmldoc)
parent = a.parent
t.children = a.remove
@@ -95,11 +104,11 @@
end
end
def termdefinition_cleanup(xmldoc)
xmldoc.xpath("//term").each do |d|
- first_child = d.at("./p | ./figure | ./formula") or return
+ first_child = d.at("./p | ./figure | ./formula") || return
t = Nokogiri::XML::Element.new("definition", xmldoc)
first_child.replace(t)
t << first_child.remove
d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove }
end
@@ -112,31 +121,68 @@
nodes[0].parent.replace(nodes[0].parent.children)
nodes = xmldoc.xpath("//p/admitted | //p/deprecates")
end
end
+ def termdef_boilerplate_cleanup(xmldoc)
+ xmldoc.xpath("//terms/p | //terms/ul").each do |a|
+ a.remove
+ end
+ end
+
def termdef_cleanup(xmldoc)
termdef_unnest_cleanup(xmldoc)
termdef_stem_cleanup(xmldoc)
termdomain_cleanup(xmldoc)
termdefinition_cleanup(xmldoc)
- termdef_style(xmldoc)
+ termdef_boilerplate_cleanup(xmldoc)
end
- ELEMS_ALLOW_NOTES =
- %w[p formula quote sourcecode example admonition ul ol dl figure]
+ ELEMS_ALLOW_NOTES =
+ %w[p formula quote sourcecode example admonition ul ol dl figure].freeze
# if a note is at the end of a section, it is left alone
- # if a note is followed by a non-note block,
+ # if a note is followed by a non-note block,
# it is moved inside its preceding block
def note_cleanup(xmldoc)
q = "//note[following-sibling::*[not(local-name() = 'note')]]"
xmldoc.xpath(q).each do |n|
next unless n.ancestors("table").empty?
- prev = n.previous_element or next
+ prev = n.previous_element || next
n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name
end
end
+ def empty_text_before_first_element(x)
+ x.children.each do |c|
+ if c.text?
+ return false if /\S/.match?(c.text)
+ end
+ return true if c.element?
+ end
+ true
+ end
+
+ def strip_initial_space(x)
+ if x.children[0].text?
+ if !/\S/.match?(x.children[0].text)
+ x.children[0].remove
+ else
+ x.children[0].content = x.children[0].text.gsub(/^ /, "")
+ end
+ end
+ end
+
+ def bookmark_cleanup(xmldoc)
+ xmldoc.xpath("//li[descendant::bookmark]").each do |x|
+ if x&.elements[0]&.name == "p" &&
+ x&.elements[0]&.elements[0]&.name == "bookmark"
+ if empty_text_before_first_element(x.elements[0])
+ x["id"] = (x.elements[0].elements[0].remove)["id"]
+ strip_initial_space(x.elements[0])
+ end
+ end
+ end
+ end
end
end
end