lib/metanorma/collection_fileprocess.rb in metanorma-1.2.8 vs lib/metanorma/collection_fileprocess.rb in metanorma-1.3.0
- old
+ new
@@ -1,9 +1,10 @@
# frozen_string_literal: true
require "isodoc"
require "metanorma-utils"
+require_relative "collection_fileparse"
module Metanorma
# XML collection renderer
class CollectionRenderer
# hash for each document in collection of document identifier to:
@@ -13,279 +14,107 @@
# @return [Hash{String=>Hash}]
def read_files(path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
files = {}
@xml.xpath(ns("//docref")).each do |d|
identifier = d.at(ns("./identifier")).text
- files[identifier] = if d["fileref"]
- { type: "fileref",
- ref: File.join(path, d["fileref"]) }
- else { type: "id", ref: d["id"] }
- end
+ files[identifier] = file_entry(d, path)
+ next if files[identifier][:attachment]
+
file, _filename = targetfile(files[identifier], true)
xml = Nokogiri::XML(file)
add_document_suffix(identifier, xml)
files[identifier][:anchors] = read_anchors(xml)
files[identifier][:bibdata] = xml.at(ns("//bibdata"))
end
files
end
+ def file_entry(docref, path)
+ ret = if docref["fileref"]
+ { type: "fileref", ref: File.join(path, docref["fileref"]),
+ rel_path: docref["fileref"] }
+ else
+ { type: "id", ref: docref["id"] }
+ end
+ ret[:attachment] = docref["attachment"] if docref["attachment"]
+ ret
+ end
+
def add_suffix_to_attributes(doc, suffix, tag_name, attribute_name)
doc.xpath(ns("//#{tag_name}[@#{attribute_name}]")).each do |elem|
elem.attributes[attribute_name].value =
"#{elem.attributes[attribute_name].value}_#{suffix}"
end
end
def add_document_suffix(identifier, doc)
document_suffix = Metanorma::Utils::to_ncname(identifier)
- [%w[* id],
- %w[* bibitemid],
- %w[review from],
- %w[review to],
- %w[index to],
- %w[xref target],
- %w[callout target]]
- .each do |(tag_name, attribute_name)|
+ [%w[* id], %w[* bibitemid], %w[review from],
+ %w[review to], %w[index to], %w[xref target],
+ %w[callout target]]
+ .each do |(tag_name, attribute_name)|
add_suffix_to_attributes(doc, document_suffix, tag_name, attribute_name)
end
end
- # map locality type and label (e.g. "clause" "1") to id = anchor for
- # a document
- def read_anchors(xml)
- ret = {}
- xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n, {})
- xrefs.parse xml
- xrefs.get.each do |k, v|
- ret[v[:type]] ||= {}
- index = v[:container] || v[:label].nil? || v[:label].empty? ?
- UUIDTools::UUID.random_create.to_s : v[:label]
- # Note: will only key clauses, which have unambiguous reference label in locality.
- # Notes, examples etc with containers are just plunked agaisnt UUIDs, so that their
- # IDs can at least be registered to be tracked as existing.
- ret[v[:type]][index] = k
- end
- ret
- end
-
# return file contents + output filename for each file in the collection,
# given a docref entry
# @param data [Hash]
# @param read [Boolean]
# @return [Array<String, nil>]
- def targetfile(data, read = false)
- if data[:type] == "fileref" then ref_file data[:ref], read
+ def targetfile(data, read = false, doc = true)
+ if data[:type] == "fileref" then ref_file data[:ref], read, doc
else xml_file data[:id], read
end
end
# @param ref [String]
# @param read [Boolean]
+ # @param doc [Boolean]
# @return [Array<String, nil>]
- def ref_file(ref, read)
+ def ref_file(ref, read, doc)
file = File.read(ref, encoding: "utf-8") if read
- filename = ref.sub(/\.xml$/, ".html")
+ filename = ref.dup
+ filename.sub!(/\.xml$/, ".html") if doc
[file, filename]
end
- # @param id [String]
- # @param read [Boolean]
- # @return [Array<String, nil>]
- def xml_file(id, read)
- file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
- filename = id + ".html"
- [file, filename]
- end
-
- # @param bib [Nokogiri::XML::Element]
- # @param identifier [String]
- def update_bibitem(bib, identifier) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
- docid = bib&.at(ns("./docidentifier"))&.text
- unless @files[docid]
- error = "[metanorma] Cannot find crossreference to document #{docid} in document #{identifier}."
- @log.add("Cross-References", nil, error)
- Util.log(error, :warning)
- return
- end
- id = bib["id"]
- newbib = bib.replace(@files[docid][:bibdata])
- newbib.name = "bibitem"
- newbib["id"] = id
- newbib["hidden"] = "true"
- newbib&.at(ns("./ext"))&.remove
- _file, url = targetfile(@files[docid], false)
- uri_node = Nokogiri::XML::Node.new "uri", newbib
- uri_node[:type] = "citation"
- uri_node.content = url
- newbib.at(ns("./docidentifier")).previous = uri_node
- end
-
- # Resolves direct links to other files in collection (repo(current-metanorma-collection/x),
- # and indirect links to other files in collection (bibitem[@type = 'internal'] pointing to a file anchor
- # in another file in the collection)
- # @param file [String] XML content
- # @param identifier [String] docid
- # @param internal_refs [Hash{String=>Hash{String=>String}] schema name to anchor to filename
- # @return [String] XML content
- def update_xrefs(file, identifier, internal_refs)
- docxml = Nokogiri::XML(file)
- update_indirect_refs_to_docs(docxml, internal_refs)
- add_document_suffix(identifier, docxml)
- update_direct_refs_to_docs(docxml, identifier)
- svgmap_resolve(datauri_encode(docxml))
- docxml.xpath(ns("//references[not(./bibitem[not(@hidden) or @hidden = 'false'])]")).each do |f|
- f["hidden"] = "true"
- end
- docxml.to_xml
- end
-
- def datauri_encode(docxml)
- docxml.xpath(ns("//image")).each { |i| i["src"] = Metanorma::Utils::datauri(i["src"]) }
- docxml
- end
-
- def svgmap_resolve(docxml)
- isodoc = IsoDoc::Convert.new({})
- docxml.xpath(ns("//svgmap//eref")).each do |e|
- href = isodoc.eref_target(e)
- next if href == "#" + e["bibitemid"]
- if href.match(/^#/)
- next unless docxml.at("//*[@id = '#{href.sub(/^#/, '')}']")
- end
- e["target"] = href.strip
- e.name = "link"
- e&.elements&.remove
- end
- Metanorma::Utils::svgmap_rewrite(docxml, "")
- end
-
- # repo(current-metanorma-collection/ISO 17301-1:2016)
- # replaced by bibdata of "ISO 17301-1:2016" in situ as bibitem.
- # Any erefs to that bibitem id are replaced with relative URL
- # Preferably with anchor, and is a job to realise dynamic lookup of localities.
- def update_direct_refs_to_docs(docxml, identifier)
- docxml.xpath(ns("//bibitem[not(ancestor::bibitem)]")).each do |b|
- docid = b&.at(ns("./docidentifier[@type = 'repository']"))&.text
- next unless docid && %r{^current-metanorma-collection/}.match(docid)
- update_bibitem(b, identifier)
- update_anchors(b, docxml, docid)
- end
- end
-
- # Resolve erefs to a container of ids in another doc, to an anchor eref (direct link)
- def update_indirect_refs_to_docs(docxml, internal_refs)
- internal_refs.each do |schema, ids|
- ids.each do |id, file|
- update_indirect_refs_to_docs1(docxml, schema, id, file)
- end
- end
- end
-
- def update_indirect_refs_to_docs1(docxml, schema, id, file)
- docxml.xpath(ns("//eref[@bibitemid = '#{schema}_#{id}']")).each do |e|
- e["citeas"] = file
- end
- docid = docxml.at(ns("//bibitem[@id = '#{schema}_#{id}']/docidentifier[@type = 'repository']")) or return
- docid.children = "current-metanorma-collection/#{file}"
- docid.previous = "<docidentifier type='X'>#{file}</docidentifier>"
- end
-
- # update crossrefences to other documents, to include disambiguating document suffix on id
- def update_anchors(bib, docxml, _id) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
- docid = bib&.at(ns("./docidentifier"))&.text
- docxml.xpath("//xmlns:eref[@citeas = '#{docid}']").each do |e|
- if @files[docid]
- update_anchor_loc(bib, e, docid)
- else
- e << "<strong>** Unresolved reference to document #{docid}, id #{e['bibitemid']}</strong>"
- end
- end
- end
-
- def update_anchor_loc(bib, e, docid)
- loc = e.at(ns(".//locality[@type = 'anchor']")) or return update_anchor_create_loc(bib, e, docid)
- document_suffix = Metanorma::Utils::to_ncname(docid)
- ref = loc.at(ns("./referenceFrom")) || return
- anchor = "#{ref.text}_#{document_suffix}"
- return unless @files[docid][:anchors].inject([]) { |m, (_, x)| m+= x.values }.include?(anchor)
- ref.content = anchor
- end
-
- # if there is a crossref to another document, with no anchor, retrieve the
- # anchor given the locality, and insert it into the crossref
- def update_anchor_create_loc(bib, e, docid)
- ins = e.at(ns("./localityStack")) || return
- type = ins&.at(ns("./locality/@type"))&.text
- ref = ins&.at(ns("./locality/referenceFrom"))&.text
- (anchor = @files[docid][:anchors][type][ref]) || return
- ref_from = Nokogiri::XML::Node.new "referenceFrom", bib
- ref_from.content = anchor.sub(/^_/, "")
- locality = Nokogiri::XML::Node.new "locality", bib
- locality[:type] = "anchor"
- locality.add_child ref_from
- ins << locality
- end
-
# compile and output individual file in collection
def file_compile(f, filename, identifier)
# warn "metanorma compile -x html #{f.path}"
c = Compile.new
- c.compile f.path, { format: :asciidoc, extension_keys: @format }.merge(@compile_options)
+ c.compile f.path, { format: :asciidoc,
+ extension_keys: @format }.merge(@compile_options)
@files[identifier][:outputs] = {}
@format.each do |e|
ext = c.processor.output_formats[e]
fn = File.basename(filename).sub(/(?<=\.)[^\.]+$/, ext.to_s)
FileUtils.mv f.path.sub(/\.xml$/, ".#{ext}"), File.join(@outdir, fn)
@files[identifier][:outputs][e] = File.join(@outdir, fn)
end
end
- # gather internal bibitem references
- def gather_internal_refs
- @files.each_with_object({}) do |(identifier, x), refs|
- file, _ = targetfile(x, true)
- Nokogiri::XML(file).xpath(ns("//bibitem[@type = 'internal']/docidentifier[@type = 'repository']")).each do |d|
- a = d.text.split(%r{/}, 2)
- a.size > 1 or next
- refs[a[0]] ||= {}
- refs[a[0]][a[1]] = true
- end
- end
+ def copy_file_to_dest(fileref)
+ _file, filename = targetfile(fileref, true, false)
+ dest = File.join(@outdir, fileref[:rel_path])
+ FileUtils.mkdir_p(File.dirname(dest))
+ FileUtils.cp filename, dest
end
- # resolve file location for the target of each internal reference
- def locate_internal_refs
- refs = gather_internal_refs
- @files.each do |identifier, x|
- file, filename = targetfile(x, true)
- docxml = Nokogiri::XML(file)
- refs.each do |schema, ids|
- ids.keys.each do |id|
- n = docxml.at("//*[@id = '#{id}']") and n.at("./ancestor-or-self::*[@type = '#{schema}']") and
- refs[schema][id] = identifier
- end
- end
- end
- refs.each do |schema, ids|
- ids.each do |id, key|
- key == true and refs[schema][id] = "Missing:#{schema}:#{id}"
- end
- end
- refs
- end
-
# process each file in the collection
# files are held in memory, and altered as postprocessing
def files # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
internal_refs = locate_internal_refs
@files.each do |identifier, x|
- file, filename = targetfile(x, true)
- file = update_xrefs(file, identifier, internal_refs)
- Tempfile.open(["collection", ".xml"], encoding: "utf-8") do |f|
- f.write(file)
- f.close
- file_compile(f, filename, identifier)
+ if x[:attachment] then copy_file_to_dest(x)
+ else
+ file, filename = targetfile(x, true)
+ file = update_xrefs(file, identifier, internal_refs)
+ Tempfile.open(["collection", ".xml"], encoding: "utf-8") do |f|
+ f.write(file)
+ f.close
+ file_compile(f, filename, identifier)
+ end
end
end
end
end
end