module Asciidoctor module Gb class Converter < ISO::Converter def termdef_cleanup(xmldoc) super # TODO this should become variant tag localisedstr(xmldoc) end ROMAN_TEXT = /\s*[a-z\u00c0-\u00d6\u00d8-\u00f0\u0100-\u0240]/i HAN_TEXT = /\s*[\u4e00-\u9fff]+/ LOCALISED_ELEMS = "//admitted | //deprecates | //preferred | //prefix | "\ "//initial | //addition | //surname | //forename | //name | "\ "//abbreviation | //role/description | //affiliation/description | "\ "//bibdata/item | //bibitem/title | //bibdata/formattedref | "\ "//bibitem/formattedref | //bibdata/note | //bibitem/note | "\ "//bibdata/abstract | //bibitem/note ".freeze MUST_LOCALISE_ELEMS = %w{admitted deprecates preferred}.freeze def localisedstr(xmldoc) xmldoc.xpath(LOCALISED_ELEMS).each do |zh| if zh.at("./string") extract_localisedstrings(zh) elsif MUST_LOCALISE_ELEMS.include? zh.name duplicate_localisedstrings(zh) end end end # element consists solely of localised strings, with no attributes def extract_localisedstrings(elem) elem.xpath("./string").each do |s| s.name = elem.name end elem.replace(elem.children) end def text_clean(text) text.gsub(/^\s*/, "").gsub(//, ">") end def duplicate_localisedstrings(zh) en = zh.dup.remove zh.after(en).after(" ") zh["language"] = "zh" en["language"] = "en" en.traverse do |c| c.text? && c.content = text_clean(c.text.gsub(HAN_TEXT, "")) end zh.traverse do |c| c.text? && c.content = text_clean(c.text.gsub(ROMAN_TEXT, "")) end end def termdef_boilerplate_cleanup(xmldoc) return if @keepboilerplate super end def cleanup(xmldoc) lang = xmldoc.at("//language")&.text @agencyclass = GbAgencies::Agencies.new(lang, {}, "") super contributor_cleanup(xmldoc) xmldoc end def docidentifier_cleanup(xmldoc) id = xmldoc.at("//bibdata/docidentifier[@type = 'gb']") or return scope = xmldoc.at("//gbscope")&.text prefix = xmldoc.at("//gbprefix")&.text mand = xmldoc.at("//gbmandate")&.text || "mandatory" idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text) id.content = idtext&.gsub(/\ /, " ") id = xmldoc.at("//bibdata/ext/structuredidentifier/"\ "project-number") or return idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text) id.content = idtext&.gsub(/\ /, " ") end def committee_cleanup(xmldoc) xmldoc.xpath("//gbcommittee").each do |c| xmldoc.at("//bibdata/contributor").next = ""\ "#{c.text}" end end def agency_value(issuer, scope, prefix, mandate) agency = issuer.content agency == "GB" and agency = @agencyclass.standard_agency1(scope, prefix, mandate) agency = "GB" if agency.nil? || agency.empty? agency end def contributor_cleanup(xmldoc) issuer = xmldoc.at("//bibdata/contributor[role/@type = 'issuer']/"\ "organization/name") scope = xmldoc.at("//gbscope")&.text prefix = xmldoc.at("//gbprefix")&.text mandate = xmldoc.at("//gbmandate")&.text || "mandatory" agency = agency_value(issuer, scope, prefix, mandate) owner = xmldoc.at("//copyright/owner/organization/name") owner.content = agency issuer.content = agency committee_cleanup(xmldoc) end def omit_docid_prefix(prefix) IsoDoc::Gb::HtmlConvert.new({}).omit_docid_prefix(prefix) end def boilerplate_cleanup(xmldoc) isodoc = boilerplate_isodoc(xmldoc) initial_boilerplate(xmldoc, isodoc) return if @keepboilerplate xmldoc.xpath(self.class::TERM_CLAUSE).each do |f| term_defs_boilerplate(f.at("./title"), xmldoc.xpath(".//termdocsource"), f.at(".//term"), f.at(".//p"), isodoc) end f = xmldoc.at(self.class::NORM_REF) and norm_ref_preface(f) end end end end