require "metanorma/standoc/utils" require_relative "validate_section" require_relative "validate_table" require_relative "validate_term" require "nokogiri" require "jing" require "iev" require "pngcheck" module Metanorma module Standoc module Validate def content_validate(doc) @doctype = doc.at("//bibdata/ext/doctype")&.text repeat_id_validate(doc.root) # feeds xref_validate xref_validate(doc) # feeds nested_asset_validate nested_asset_validate(doc) section_validate(doc) norm_ref_validate(doc) iev_validate(doc.root) concept_validate(doc, "concept", "refterm") concept_validate(doc, "related", "preferred//name") preferred_validate(doc) table_validate(doc) @fatalerror += requirement_validate(doc) image_validate(doc) math_validate(doc) @fatalerror.empty? or clean_abort(@fatalerror.join("\n"), doc) end MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze def math_validate(doc) doc.xpath("//m:math", "m" => MATHML_NS).each do |m| math = mathml_sanitise(m.dup) Plurimath::Math.parse(math, "mathml").to_mathml rescue StandardError => e math_validate_error(math, m, e) end end def mathml_sanitise(math) math.to_xml(encoding: "US-ASCII").gsub(/ xmlns=["'][^"']+["']/, "") .gsub(%r{<[^:/>]+:}, "<").gsub(%r{]+:}, " e clean_abort("Jing failed with error: #{e}", doc) ensure f.close! end end def schema_validate1(file, doc, schema) file.write(to_xml(doc)) file.close errors = Jing.new(schema, encoding: "UTF-8").validate(file.path) warn "Syntax Valid!" if errors.none? errors.each do |e| @log.add("Metanorma XML Syntax", "XML Line #{'%06d' % e[:line]}:#{e[:column]}", e[:message]) end end SVG_NS = "http://www.w3.org/2000/svg".freeze WILDCARD_ATTRS = "//*[@format] | //stem | //bibdata//description | " \ "//formattedref | //bibdata//note | //bibdata/abstract | " \ "//bibitem/abstract | //bibitem/note | //metanorma-extension".freeze # RelaxNG cannot cope well with wildcard attributes. So we strip # any attributes from FormattedString instances (which can contain # xs:any markup, and are signalled with @format) before validation. def formattedstr_strip(doc) doc.xpath(WILDCARD_ATTRS, "m" => SVG_NS).each do |n| n.elements.each do |e| e.traverse do |e1| e1.element? and e1.each { |k, _v| e1.delete(k) } end end end doc.xpath("//m:svg", "m" => SVG_NS).each { |n| n.replace("") } doc end def image_validate(doc) image_exists(doc) png_validate(doc) end def image_exists(doc) doc.xpath("//image").each do |i| Metanorma::Utils::url?(i["src"]) and next Metanorma::Utils::datauri?(i["src"]) and next expand_path(i["src"]) and next @log.add("Images", i.parent, "Image not found: #{i['src']}") @fatalerror << "Image not found: #{i['src']}" end end def expand_path(loc) relative_path = File.join(@localdir, loc) [loc, relative_path].detect do |p| File.exist?(p) ? p : nil end end def png_validate(doc) doc.xpath("//image[@mimetype = 'image/png']").each do |i| Metanorma::Utils::url?(i["src"]) and next decoded = if Metanorma::Utils::datauri?(i["src"]) Metanorma::Utils::decode_datauri(i["src"])[:data] else path = expand_path(i["src"]) or next File.binread(path) end png_validate1(i, decoded) end end def png_validate1(img, buffer) PngCheck.check_buffer(buffer) rescue PngCheck::CorruptPngError => e @log.add("Images", img.parent, "Corrupt PNG image detected: #{e.message}") end def validate(doc) content_validate(doc) schema_validate(formattedstr_strip(doc.dup), File.join(File.dirname(__FILE__), "isodoc-compile.rng")) end def repeat_id_validate1(elem) if @doc_ids[elem["id"]] @log.add("Anchors", elem, "Anchor #{elem['id']} has already been " \ "used at line #{@doc_ids[elem['id']]}") @fatalerror << "Multiple instances of same ID: #{elem['id']}" end @doc_ids[elem["id"]] = elem.line end def repeat_id_validate(doc) @doc_ids = {} doc.xpath("//*[@id]").each do |x| repeat_id_validate1(x) end end # manually check for xref/@target, xref/@to integrity def xref_validate(doc) @doc_xrefs = doc.xpath("//xref/@target | //xref/@to") .each_with_object({}) do |x, m| m[x.text] = x @doc_ids[x.text] and next @log.add("Anchors", x.parent, "Crossreference target #{x} is undefined") end end end end end