require "nokogiri" require "pathname" require "open-uri" require "html2doc" require_relative "./cleanup_block.rb" require_relative "./cleanup_footnotes.rb" require_relative "./cleanup_ref.rb" require_relative "./cleanup_boilerplate.rb" require_relative "./cleanup_section.rb" require_relative "./cleanup_terms.rb" require_relative "./cleanup_inline.rb" require "relaton_iev" module Asciidoctor module Standoc module Cleanup def textcleanup(result) text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n" !@keepasciimath and text = asciimath2mathml(text) text = text.gsub(/\s+([^<]*) }mx) { |m| HTMLEntities.new.decode($1) } end def asciimath2mathml(text) text = text.gsub(%r{(.+?)}m) do |m| "#{HTMLEntities.new.decode($1)}" end text = Html2Doc. asciimath_to_mathml(text, ["", ""]) x = Nokogiri::XML(text) x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y| y.wrap("") end x.to_xml end def cleanup(xmldoc) element_name_cleanup(xmldoc) sections_cleanup(xmldoc) obligations_cleanup(xmldoc) table_cleanup(xmldoc) formula_cleanup(xmldoc) figure_cleanup(xmldoc) ref_cleanup(xmldoc) note_cleanup(xmldoc) clausebefore_cleanup(xmldoc) bibitem_cleanup(xmldoc) normref_cleanup(xmldoc) biblio_cleanup(xmldoc) reference_names(xmldoc) symbols_cleanup(xmldoc) xref_cleanup(xmldoc) concept_cleanup(xmldoc) origin_cleanup(xmldoc) termdef_cleanup(xmldoc) RelatonIev::iev_cleanup(xmldoc, @bibdb) element_name_cleanup(xmldoc) bpart_cleanup(xmldoc) quotesource_cleanup(xmldoc) callout_cleanup(xmldoc) footnote_cleanup(xmldoc) mathml_cleanup(xmldoc) script_cleanup(xmldoc) docidentifier_cleanup(xmldoc) bookmark_cleanup(xmldoc) requirement_cleanup(xmldoc) bibdata_cleanup(xmldoc) boilerplate_cleanup(xmldoc) smartquotes_cleanup(xmldoc) para_cleanup(xmldoc) empty_element_cleanup(xmldoc) img_cleanup(xmldoc) xmldoc end def smartquotes_cleanup(xmldoc) xmldoc.xpath("//date").each { |d| Utils::endash_date(d) } xmldoc.traverse do |n| next unless n.text? if @smartquotes next unless /[-'"(<>]|\.\.|\dx/.match(n) next unless n.ancestors("pre, tt, sourcecode, bibdata, on, "\ "stem, figure[@class = 'pseudocode']").empty? n.replace(Utils::smartformat(n.text)) else n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) end end end def docidentifier_cleanup(xmldoc) end TEXT_ELEMS = %w{status language script version author name callout phone email street city state country postcode identifier referenceFrom surname referenceTo docidentifier docnumber prefix initial addition forename title draft secretariat title-main title-intro title-part}.freeze # it seems Nokogiri::XML is treating the content of