require "isodoc" require "htmlentities" require "metanorma-utils" require_relative "filelookup_sectionsplit" module Metanorma class Collection class FileLookup attr_accessor :files_to_delete, :parent # hash for each document in collection of document identifier to: # document reference (fileref or id), type of document reference, # and bibdata entry for that file # @param path [String] path to collection def initialize(path, parent) @c = HTMLEntities.new @files = {} @parent = parent @xml = parent.xml @isodoc = parent.isodoc @path = path @compile = parent.compile @documents = parent.documents @files_to_delete = [] @disambig = Util::DisambigFiles.new @manifest = parent.manifest read_files(@manifest.entry) end def read_files(entries) Array(entries).each do |e| e.file and read_file(e) read_files(e.entry) end end def read_file(manifest) i, k = read_file_idents(manifest) entry = file_entry(manifest, k) or return bibdata_process(entry, i) bibitem_process(entry) @files[key(i)] = entry end def read_file_idents(manifest) id = manifest.identifier sanitised_id = key(@isodoc.docid_prefix("", manifest.identifier.dup)) # if manifest.bibdata and # NO, DO NOT FISH FOR THE GENUINE IDENTIFIER IN BIBDATA # d = manifest.bibdata.docidentifier.detect { |x| x.primary } || # manifest.bibdata.docidentifier.first # k = d.id # i = key(@isodoc.docid_prefix(d.type, d.id.dup)) # end [id, sanitised_id] end def bibdata_process(entry, ident) if entry[:attachment] entry[:bibdata] = Metanorma::Collection::Document.attachment_bibitem(ident).root else file, _filename = targetfile(entry, read: true) xml = Nokogiri::XML(file, &:huge) add_document_suffix(ident, xml) entry.merge!(anchors: read_anchors(xml), ids: read_ids(xml), bibdata: xml.at(ns("//bibdata")), document_suffix: xml.root["document_suffix"]) end end def bibitem_process(entry) entry[:bibitem] = entry[:bibdata].dup entry[:bibitem].name = "bibitem" entry[:bibitem]["hidden"] = "true" entry[:bibitem].at("./*[local-name() = 'ext']")&.remove end # ref is the absolute source file address # rel_path is the relative source file address, relative to the YAML locaton # out_path is the destination file address, with any references outside # the working directory (../../...) truncated, and based on relative path # identifier is the id with only spaces, no nbsp def file_entry(ref, identifier) ref.file or return abs = @documents[Util::key identifier].file ret = if ref.file { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url, out_path: output_file_path(ref) } else { type: "id", ref: ref.id } end file_entry_copy(ref, ret) ret.compact end # TODO make the output file location reflect source location universally, # not just for attachments: no File.basename def output_file_path(ref) f = File.basename(ref.file) ref.attachment and f = ref.file @disambig.source2dest_filename(f) end def file_entry_copy(ref, ret) %w(attachment sectionsplit index presentation-xml url bare-after-first).each do |s| ref.respond_to?(s.to_sym) and ret[s.gsub("-", "").to_sym] = ref.send(s) end end def add_document_suffix(identifier, doc) document_suffix = Metanorma::Utils::to_ncname(identifier) Metanorma::Utils::anchor_attributes.each do |(tag_name, attribute_name)| Util::add_suffix_to_attributes(doc, document_suffix, tag_name, attribute_name, @isodoc) end url_in_css_styles(doc, document_suffix) doc.root["document_suffix"] ||= "" doc.root["document_suffix"] += document_suffix end # update relative URLs, url(#...), in CSS in @style attrs (including SVG) def url_in_css_styles(doc, document_suffix) doc.xpath("//*[@style]").each do |s| s["style"] = s["style"] .gsub(%r{url\(#([^()]+)\)}, "url(#\\1_#{document_suffix})") end end # return citation url for file # @param doc [Boolean] I am a Metanorma document, # so my URL should end with html or pdf or whatever def url(ident, options) data = get(ident) data[:url] || targetfile(data, options)[1] end # are references to the file to be linked to a file in the collection, # or externally? Determines whether file suffix anchors are to be used def url?(ident) data = get(ident) or return false data[:url] end # return file contents + output filename for each file in the collection, # given a docref entry # @param data [Hash] docref entry # @param read [Boolean] read the file in and return it # @param doc [Boolean] I am a Metanorma document, # so my URL should end with html or pdf or whatever # @param relative [Boolean] Return output path, # formed relative to YAML file, not input path, relative to calling function # @return [Array] def targetfile(data, options) options = { read: false, doc: true, relative: false }.merge(options) path = options[:relative] ? data[:rel_path] : data[:ref] if data[:type] == "fileref" ref_file path, data[:out_path], options[:read], options[:doc] else xml_file data[:id], options[:read] end end def targetfile_id(ident, options) targetfile(get(ident), options) end def ref_file(ref, out, read, doc) file = File.read(ref, encoding: "utf-8") if read filename = out.dup filename.sub!(/\.xml$/, ".html") if doc [file, filename] end def xml_file(id, read) file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read filename = "#{id}.html" [file, filename] end # map locality type and label (e.g. "clause" "1") to id = anchor for # a document # Note: will only key clauses, which have unambiguous reference label in # locality. Notes, examples etc with containers are just plunked against # UUIDs, so that their IDs can at least be registered to be tracked # as existing. def read_anchors(xml) xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n, { locale: @locale }) xrefs.parse xml xrefs.get.each_with_object({}) do |(k, v), ret| read_anchors1(k, v, ret) end end def read_anchors1(key, val, ret) val[:type] ||= "clause" ret[val[:type]] ||= {} index = if val[:container] || val[:label].nil? || val[:label].empty? UUIDTools::UUID.random_create.to_s else val[:label] end ret[val[:type]][index] = key ret[val[:type]][val[:value]] = key if val[:value] end # Also parse all ids in doc (including ones which won't be xref targets) def read_ids(xml) ret = {} xml.traverse do |x| x.text? and next /^semantic__/.match?(x.name) and next x["id"] and ret[x["id"]] = true end ret end def key(ident) @c.decode(ident).gsub(/(\p{Zs})+/, " ").sub(/^metanorma-collection /, "") end def keys @files.keys end def get(ident, attr = nil) if attr then @files[key(ident)][attr] else @files[key(ident)] end end def set(ident, attr, value) @files[key(ident)][attr] = value end def each @files.each end def each_with_index @files.each_with_index end def ns(xpath) @isodoc.ns(xpath) end end end end