require "asciidoctor"
require "asciidoctor/iso/converter"
require "metanorma/gb/version"
require "isodoc/gb/gbconvert"
require "isodoc/gb/gbwordconvert"
require "gb_agencies"
require_relative "./section_input.rb"
require_relative "./front.rb"
require_relative "./validate.rb"
require "fileutils"
module Asciidoctor
module Gb
# A {Converter} implementation that generates GB output, and a document
# schema encapsulation of the document for validation
class Converter < ISO::Converter
XML_ROOT_TAG = "gb-standard".freeze
XML_NAMESPACE = "https://www.metanorma.org/ns/gb".freeze
register_for "gb"
def makexml(node)
@draft = node.attributes.has_key?("draft")
@keepboilerplate = node.attributes.has_key?("keep-boilerplate")
super
end
def gb_attributes(node)
{
standardlogoimg: node.attr("standard-logo-img"),
standardclassimg: node.attr("standard-class-img"),
standardissuerimg: node.attr("standard-issuer-img"),
titlefont: node.attr("title-font"),
}
end
def html_extract_attributes(node)
super.merge(gb_attributes(node))
end
def doc_extract_attributes(node)
super.merge(gb_attributes(node))
end
def html_converter(node)
node.nil? ? IsoDoc::Gb::HtmlConvert.new({}) :
IsoDoc::Gb::HtmlConvert.new(html_extract_attributes(node))
end
def html_compliant_converter(node)
node.nil? ? IsoDoc::Gb::HtmlConvert.new({}) :
IsoDoc::Gb::HtmlConvert.new(html_extract_attributes(node).
merge(compliant: true))
end
def doc_converter(node)
node.nil? ? IsoDoc::Gb::WordConvert.new({}) :
IsoDoc::Gb::WordConvert.new(doc_extract_attributes(node))
end
def document(node)
init(node)
ret = makexml(node).to_xml(indent: 2)
unless node.attr("nodoc") || !node.attr("docfile")
filename = node.attr("docfile").gsub(/\.adoc$/, "").gsub(%r{^.*/}, "")
File.open(filename + ".xml", "w:utf-8") { |f| f.write(ret) }
html_compliant_converter(node).convert(filename + ".xml")
FileUtils.mv "#{filename}.html", "#{filename}_compliant.html"
html_converter(node).convert(filename + ".xml")
doc_converter(node).convert(filename + ".xml")
end
@files_to_delete.each { |f| FileUtils.rm f }
ret
end
def termdef_cleanup(xmldoc)
super
localisedstr(xmldoc)
end
ROMAN_TEXT = /\s*[a-z\u00c0-\u00d6\u00d8-\u00f0\u0100-\u0240]/i
HAN_TEXT = /\s*[\u4e00-\u9fff]+/
LOCALISED_ELEMS = "//admitted | //deprecates | //preferred | //prefix | "\
"//initial | //addition | //surname | //forename | //name | "\
"//abbreviation | //role/description | //affiliation/description | "\
"//bibdata/item | //bibitem/title | //bibdata/formattedref | "\
"//bibitem/formattedref | //bibdata/note | //bibitem/note | "\
"//bibdata/abstract | //bibitem/note ".freeze
MUST_LOCALISE_ELEMS = %w{admitted deprecates preferred}.freeze
def localisedstr(xmldoc)
xmldoc.xpath(LOCALISED_ELEMS).each do |zh|
if zh.at("./string")
extract_localisedstrings(zh)
elsif MUST_LOCALISE_ELEMS.include? zh.name
duplicate_localisedstrings(zh)
end
end
end
# element consists solely of localised strings, with no attributes
def extract_localisedstrings(elem)
elem.xpath("./string").each do |s|
s.name = elem.name
end
elem.replace(elem.children)
end
def text_clean(text)
text.gsub(/^\s*/, "").gsub(/, "<").gsub(/>/, ">")
end
def duplicate_localisedstrings(zh)
en = zh.dup.remove
zh.after(en).after(" ")
zh["language"] = "zh"
en["language"] = "en"
en.traverse do |c|
c.text? && c.content = text_clean(c.text.gsub(HAN_TEXT, ""))
end
zh.traverse do |c|
c.text? && c.content = text_clean(c.text.gsub(ROMAN_TEXT, ""))
end
end
def inline_quoted(node)
ret = noko do |xml|
case node.role
when "en" then xml.string node.text, **{ language: "en" }
when "zh" then xml.string node.text, **{ language: "zh" }
when "zh-Hans"
xml.string node.text, **{ language: "zh", script: "Hans" }
when "zh-Hant"
xml.string node.text, **{ language: "zh", script: "Hant" }
else
nil
end
end.join
return ret unless ret.nil? or ret.empty?
super
end
def termdef_boilerplate_cleanup(xmldoc)
return if @keepboilerplate
super
end
GBCODE = "((AQ|BB|CB|CH|CJ|CY|DA|DB|DL|DZ|EJ|FZ|GA|GH|GM|GY|HB|HG|"\
"HJ|HS|HY|JB|JC|JG|JR|JT|JY|LB|LD|LS|LY|MH|MT|MZ|NY|QB|QC|QJ|"\
"QZ|SB|SC|SH|SJ|SN|SY|TB|TD|TJ|TY|WB|WH|WJ|WM|WS|WW|XB|YB|YC|"\
"YD|YS|YY|YZ|ZY|GB|GBZ|GJB|GBn|GHZB|GWKB|GWPB|JJF|JJG|Q|T)(/Z|/T)?)"
ISO_REF = %r{^[[^"]+)">
\[(?\([^)]+\))?(?](ISO|IEC|#{GBCODE})[^0-9]*\s[0-9-]+?)
([:-](?(19|20)[0-9][0-9]))?\]
,?\s
(?.*)$}xm
ISO_REF_NO_YEAR = %r{^[[^"]+)">
\[(?\([^)]+\))?(?](ISO|IEC|#{GBCODE})[^0-9]*\s[0-9-]+):--\]
,?\s?
]*>\s*(?[^\]]+)
\s*,?\s?(?.*)$}xm
ISO_REF_ALL_PARTS = %r{^[[^"]+)">
\[(?\([^)]+\))?(?](ISO|IEC|#{GBCODE})[^0-9]*\s[0-9]+)\s
\(all\sparts\)\]
()?,?\s?
(?.*)(
)?$}xm
def reference1_matches(item)
matched = ISO_REF.match item
matched2 = ISO_REF_NO_YEAR.match item
matched3 = ISO_REF_ALL_PARTS.match item
[matched, matched2, matched3]
end
def fetch_ref(xml, code, year, **opts)
code = "CN(#{code})" if !/^CN\(/.match(code) &&
/^#{GBCODE}[^A-Za-z]/.match(code)
super
end
def cleanup(xmldoc)
lang = xmldoc.at("//language")&.text
@agencyclass = GbAgencies::Agencies.new(lang, {}, "")
super
contributor_cleanup(xmldoc)
xmldoc
end
def docidentifier_cleanup(xmldoc)
id = xmldoc.at("//bibdata/docidentifier[@type = 'gb']") or return
scope = xmldoc.at("//gbscope")&.text
prefix = xmldoc.at("//gbprefix")&.text
mand = xmldoc.at("//gbmandate")&.text || "mandatory"
idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text)
id.content = idtext&.gsub(/\ /, " ")
id = xmldoc.at("//bibdata/ext/structuredidentifier/"\
"project-number") or return
idtext = @agencyclass.docidentifier(scope, prefix, mand, nil, id.text)
id.content = idtext&.gsub(/\ /, " ")
end
def committee_cleanup(xmldoc)
xmldoc.xpath("//gbcommittee").each do |c|
xmldoc.at("//bibdata/contributor").next =
""\
"#{c.text}"
end
end
def agency_value(issuer, scope, prefix, mandate)
agency = issuer.content
agency == "GB" and
agency = @agencyclass.standard_agency1(scope, prefix, mandate)
agency = "GB" if agency.nil? || agency.empty?
agency
end
def contributor_cleanup(xmldoc)
issuer = xmldoc.at("//bibdata/contributor[role/@type = 'issuer']/"\
"organization/name")
scope = xmldoc.at("//gbscope")&.text
prefix = xmldoc.at("//gbprefix")&.text
mandate = xmldoc.at("//gbmandate")&.text || "mandatory"
agency = agency_value(issuer, scope, prefix, mandate)
owner = xmldoc.at("//copyright/owner/organization/name")
owner.content = agency
issuer.content = agency
committee_cleanup(xmldoc)
end
def omit_docid_prefix(prefix)
IsoDoc::Gb::HtmlConvert.new({}).omit_docid_prefix(prefix)
end
def boilerplate_cleanup(xmldoc)
isodoc = boilerplate_isodoc(xmldoc)
initial_boilerplate(xmldoc, isodoc)
return if @keepboilerplate
f = xmldoc.at(self.class::TERM_CLAUSE) and
term_defs_boilerplate(f.at("./title"),
xmldoc.xpath(".//termdocsource"),
f.at(".//term"), f.at(".//p"), isodoc)
f = xmldoc.at(self.class::NORM_REF) and
norm_ref_preface(f)
end
end
end
end