# frozen_string_literal: true. require "metanorma/standoc/utils" module Metanorma module Standoc # Intelligent term lookup xml modifier class TermLookupCleanup AUTOMATIC_GENERATED_ID_REGEXP = /\A_/.freeze EXISTING_TERM_REGEXP = /\Aterm-/.freeze EXISTING_SYMBOL_REGEXP = /\Asymbol-/.freeze attr_reader :xmldoc, :termlookup, :log def initialize(xmldoc, log) @xmldoc = xmldoc @log = log @termlookup = { term: {}, symbol: {}, secondary2primary: {} } @idhash = {} end def call @idhash = populate_idhash @termlookup = replace_automatic_generated_ids_terms set_termxref_tags_target concept_cleanup related_cleanup end private def concept_cleanup xmldoc.xpath("//concept").each do |n| n.delete("type") refterm = n.at("./refterm") or next p = @termlookup[:secondary2primary][refterm.text] and refterm.children = p end end def related_cleanup xmldoc.xpath("//related").each do |n| refterm = n.at("./refterm") or next p = @termlookup[:secondary2primary][refterm.text] and refterm.children = p refterm.replace("#{refterm.children.to_xml}"\ "") end end def populate_idhash xmldoc.xpath("//*[@id]").each_with_object({}) do |n, mem| next unless /^(term|symbol)-/.match?(n["id"]) mem[n["id"]] = true end end def set_termxref_tags_target xmldoc.xpath("//termxref").each do |node| target = normalize_ref_id(node) if termlookup[:term][target].nil? && termlookup[:symbol][target].nil? remove_missing_ref(node, target) next end x = node.at("../xrefrender") and modify_ref_node(x, target) node.name = "refterm" end end def remove_missing_ref(node, target) if node.at("../concept[@type = 'symbol']") remove_missing_ref_symbol(node, target) else remove_missing_ref_term(node, target) end end def remove_missing_ref_term(node, target) log.add("AsciiDoc Input", node, %(Error: Term reference to `#{target}` missing: \ "#{target}" is not defined in document)) node.name = "strong" node&.at("../xrefrender")&.remove display = node&.at("../renderterm")&.remove&.children display = [] if display.nil? || display&.to_xml == node.text d = display.empty? ? "" : ", display #{display.to_xml}" node.children = "term #{node.text}#{d} "\ "not resolved via ID #{target}" end def remove_missing_ref_symbol(node, target) log.add("AsciiDoc Input", node, %(Error: Symbol reference in `symbol[#{target}]` missing: \ "#{target}" is not defined in document)) node.name = "strong" node&.at("../xrefrender")&.remove display = node&.at("../renderterm")&.remove&.children display = [] if display.nil? || display&.to_xml == node.text d = display.empty? ? "" : ", display #{display.to_xml}" node.children = "symbol #{node.text}#{d} "\ "not resolved via ID #{target}" end def modify_ref_node(node, target) node.name = "xref" s = termlookup[:symbol][target] t = termlookup[:term][target] type = node.parent["type"] if type == "term" || ((!type || node.parent.name == "related") && t) node["target"] = t elsif type == "symbol" || ((!type || node.parent.name == "related") && s) node["target"] = s end end def replace_automatic_generated_ids_terms r = xmldoc.xpath("//term").each.with_object({}) do |n, res| normalize_id_and_memorize(n, res, "./preferred//name", "term") normalize_id_and_memorize(n, res, "./admitted//name", "term") end s = xmldoc.xpath("//definitions//dt").each.with_object({}) do |n, res| normalize_id_and_memorize(n, res, ".", "symbol") end { term: r, symbol: s, secondary2primary: pref_secondary2primary } end def pref_secondary2primary term = "" xmldoc.xpath("//term").each.with_object({}) do |n, res| n.xpath("./preferred//name").each_with_index do |p, i| i.zero? and term = p.text i.positive? and res[p.text] = term end n.xpath("./admitted//name").each { |p| res[p.text] = term } end end def normalize_id_and_memorize(node, res_table, text_selector, prefix) normalize_id_and_memorize_init(node, res_table, text_selector, prefix) memorize_other_pref_terms(node, res_table, text_selector) end def normalize_id_and_memorize_init(node, res_table, text_selector, prefix) term_text = normalize_ref_id(node.at(text_selector)) or return unless AUTOMATIC_GENERATED_ID_REGEXP.match(node["id"]).nil? && !node["id"].nil? id = unique_text_id(term_text, prefix) node["id"] = id @idhash[id] = true end res_table[term_text] = node["id"] end def memorize_other_pref_terms(node, res_table, text_selector) node.xpath(text_selector).each_with_index do |p, i| next unless i.positive? res_table[normalize_ref_id(p)] = node["id"] end end def normalize_ref_id(term) return nil if term.nil? t = term.dup t.xpath(".//index").map(&:remove) Metanorma::Utils::to_ncname(t.text.strip.downcase .gsub(/[[:space:]]+/, "-")) end def unique_text_id(text, prefix) unless @idhash["#{prefix}-#{text}"] return "#{prefix}-#{text}" end (1..Float::INFINITY).lazy.each do |index| unless @idhash["#{prefix}-#{text}-#{index}"] break("#{prefix}-#{text}-#{index}") end end end end end end