converter.rb in metanorma-gb-1.5.0

- old
+ new

@@ -1,16 +1,16 @@
 require "asciidoctor"
 require "asciidoctor/iso/converter"
-require "metanorma/gb/version"
 require "isodoc/gb/common"
 require "isodoc/gb/word_convert"
 require "isodoc/gb/pdf_convert"
 require "isodoc/gb/presentation_xml_convert"
 require "gb_agencies"
 require_relative "./section_input.rb"
 require_relative "./front.rb"
 require_relative "./validate.rb"
+require_relative "cleanup.rb"
 require "fileutils"
 
 module Asciidoctor
   module Gb
     # A {Converter} implementation that generates GB output, and a document
@@ -72,68 +72,21 @@
       end
 
       def outputs(node, ret)
         File.open(@filename + ".xml", "w:UTF-8") { |f| f.write(ret) }
         presentation_xml_converter(node).convert(@filename + ".xml")
-        html_compliant_converter(node).convert(@filename + ".presentation.xml", nil, false, "#{@filename}_compliant.html")
-        html_converter(node).convert(@filename + ".presentation.xml", nil, false, "#{@filename}.html")
-        doc_converter(node).convert(@filename + ".presentation.xml", nil, false, "#{@filename}.doc")
-        pdf_converter(node)&.convert(@filename + ".presentation.xml", nil, false, "#{@filename}.pdf")
+        html_compliant_converter(node).
+          convert(@filename + ".presentation.xml", 
+                  nil, false, "#{@filename}_compliant.html")
+        html_converter(node).convert(@filename + ".presentation.xml", 
+                                     nil, false, "#{@filename}.html")
+        doc_converter(node).convert(@filename + ".presentation.xml", 
+                                    nil, false, "#{@filename}.doc")
+        pdf_converter(node)&.convert(@filename + ".presentation.xml", 
+                                     nil, false, "#{@filename}.pdf")
       end
 
-      def termdef_cleanup(xmldoc)
-        super
-        localisedstr(xmldoc)
-      end
-
-      ROMAN_TEXT = /\s*[a-z\u00c0-\u00d6\u00d8-\u00f0\u0100-\u0240]/i
-      HAN_TEXT = /\s*[\u4e00-\u9fff]+/
-
-      LOCALISED_ELEMS = "//admitted | //deprecates | //preferred | //prefix | "\
-        "//initial | //addition | //surname | //forename | //name | "\
-        "//abbreviation | //role/description | //affiliation/description | "\
-        "//bibdata/item | //bibitem/title | //bibdata/formattedref | "\
-        "//bibitem/formattedref | //bibdata/note | //bibitem/note | "\
-        "//bibdata/abstract | //bibitem/note ".freeze
-
-      MUST_LOCALISE_ELEMS = %w{admitted deprecates preferred}.freeze
-
-      def localisedstr(xmldoc)
-        xmldoc.xpath(LOCALISED_ELEMS).each do |zh|
-          if zh.at("./string")
-            extract_localisedstrings(zh)
-          elsif MUST_LOCALISE_ELEMS.include? zh.name
-            duplicate_localisedstrings(zh)
-          end
-        end
-      end
-
-      # element consists solely of localised strings, with no attributes
-      def extract_localisedstrings(elem)
-        elem.xpath("./string").each do |s|
-          s.name = elem.name
-        end
-        elem.replace(elem.children)
-      end
-
-      def text_clean(text)
-        text.gsub(/^\s*/, "").gsub(/</, "&lt;").gsub(/>/, "&gt;")
-      end
-
-      def duplicate_localisedstrings(zh)
-        en = zh.dup.remove
-        zh.after(en).after(" ")
-        zh["language"] = "zh"
-        en["language"] = "en"
-        en.traverse do |c|
-          c.text? && c.content = text_clean(c.text.gsub(HAN_TEXT, ""))
-        end
-        zh.traverse do |c|
-          c.text? && c.content = text_clean(c.text.gsub(ROMAN_TEXT, ""))
-        end
-      end
-
       def inline_quoted(node)
         ret = noko do |xml|
           case node.role
           when "en" then xml.string node.text, **{ language: "en" }
           when "zh" then xml.string node.text, **{ language: "zh" }
@@ -147,15 +100,10 @@
         end.join
         return ret unless ret.nil? or ret.empty?
         super
       end
 
-      def termdef_boilerplate_cleanup(xmldoc)
-        return if @keepboilerplate
-        super
-      end
-
       GBCODE = "((AQ|BB|CB|CH|CJ|CY|DA|DB|DL|DZ|EJ|FZ|GA|GH|GM|GY|HB|HG|"\
         "HJ|HS|HY|JB|JC|JG|JR|JT|JY|LB|LD|LS|LY|MH|MT|MZ|NY|QB|QC|QJ|"\
         "QZ|SB|SC|SH|SJ|SN|SY|TB|TD|TJ|TY|WB|WH|WJ|WM|WS|WW|XB|YB|YC|"\
         "YD|YS|YY|YZ|ZY|GB|GBZ|GJB|GBn|GHZB|GWKB|GWPB|JJF|JJG|Q|T)(/Z|/T)?)"
 
@@ -184,74 +132,15 @@
         code = "CN(#{code})" if !/^CN\(/.match(code) &&
           /^#{GBCODE}[^A-Za-z]/.match(code)
           super
       end
 
-      def cleanup(xmldoc)
-        lang = xmldoc.at("//language")&.text
-        @agencyclass = GbAgencies::Agencies.new(lang, {}, "")
+      def init(node)
+        node.attr("language") or node.set_attr("language", "zh")
+        node.attr("script") or
+          node.set_attr("script", node.attr("language") == "zh" ?
+                        "Hans" : "Latn")
         super
-        contributor_cleanup(xmldoc)
-        xmldoc
-      end
-
-      def docidentifier_cleanup(xmldoc)
-        id = xmldoc.at("//bibdata/docidentifier[@type = 'gb']") or return
-        scope = xmldoc.at("//gbscope")&.text
-        prefix = xmldoc.at("//gbprefix")&.text
-        mand = xmldoc.at("//gbmandate")&.text || "mandatory"
-        idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text)
-        id.content = idtext&.gsub(/\&#x2002;/, " ")
-        id = xmldoc.at("//bibdata/ext/structuredidentifier/"\
-                       "project-number") or return
-        idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text)
-        id.content = idtext&.gsub(/\&#x2002;/, " ")
-      end
-
-      def committee_cleanup(xmldoc)
-        xmldoc.xpath("//gbcommittee").each do |c|
-          xmldoc.at("//bibdata/contributor").next =
-            "<contributor><role type='technical-committee'/><organization>"\
-            "<name>#{c.text}</name></organization></contributor>"
-        end
-      end
-
-      def agency_value(issuer, scope, prefix, mandate)
-        agency = issuer.content
-        agency == "GB" and
-          agency = @agencyclass.standard_agency1(scope, prefix, mandate)
-        agency = "GB" if agency.nil? || agency.empty?
-        agency
-      end
-
-      def contributor_cleanup(xmldoc)
-        issuer = xmldoc.at("//bibdata/contributor[role/@type = 'issuer']/"\
-                           "organization/name")
-        scope = xmldoc.at("//gbscope")&.text
-        prefix = xmldoc.at("//gbprefix")&.text
-        mandate = xmldoc.at("//gbmandate")&.text || "mandatory"
-        agency = agency_value(issuer, scope, prefix, mandate)
-        owner = xmldoc.at("//copyright/owner/organization/name")
-        owner.content = agency
-        issuer.content = agency
-        committee_cleanup(xmldoc)
-      end
-
-      def omit_docid_prefix(prefix)
-        IsoDoc::Gb::HtmlConvert.new({}).omit_docid_prefix(prefix)
-      end
-
-      def boilerplate_cleanup(xmldoc)
-        isodoc = boilerplate_isodoc(xmldoc)
-        initial_boilerplate(xmldoc, isodoc)
-        return if @keepboilerplate
-        xmldoc.xpath(self.class::TERM_CLAUSE).each do |f|
-          term_defs_boilerplate(f.at("./title"),
-                                xmldoc.xpath(".//termdocsource"),
-                                f.at(".//term"), f.at(".//p"), isodoc)
-        end
-        f = xmldoc.at(self.class::NORM_REF) and
-          norm_ref_preface(f)
       end
     end
   end
 end