require "nokogiri" require "pathname" require "open-uri" require "html2doc" require_relative "./cleanup_block.rb" require_relative "./cleanup_footnotes.rb" require_relative "./cleanup_ref.rb" require_relative "./cleanup_boilerplate.rb" require_relative "./cleanup_section.rb" require_relative "./cleanup_inline.rb" require "relaton_iev" module Asciidoctor module Standoc module Cleanup def textcleanup(result) text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n" if !@keepasciimath text = text.gsub(%r{(.+?)}m) do |m| "#{HTMLEntities.new.decode($1)}" end text = Html2Doc. asciimath_to_mathml(text, ["", ""]). gsub(%r{}, ""\ ""). gsub(%r{}, %{}) end text.gsub(/\s+]|\.\.|\dx/.match(n) next unless n.ancestors("pre, tt, sourcecode, bibdata, on, "\ "stem, figure[@class = 'pseudocode']").empty? n.replace(Utils::smartformat(n.text)) else n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) end end end def docidentifier_cleanup(xmldoc) end TEXT_ELEMS = %w{status language script version author name callout phone email street city state country postcode identifier referenceFrom surname referenceTo docidentifier docnumber prefix initial addition forename title draft secretariat title-main title-intro title-part}.freeze # it seems Nokogiri::XML is treating the content of