lib/metanorma/sectionsplit.rb in metanorma-1.6.2 vs lib/metanorma/sectionsplit.rb in metanorma-1.6.3

- old
+ new

@@ -1,33 +1,37 @@ require "yaml" +require_relative "util" +require_relative "sectionsplit_links" module Metanorma - class Compile - # assume we pass in Presentation XML, but we want to recover Semantic XML - def sectionsplit_convert(input_filename, file, output_filename = nil, - opts = {}) - @isodoc = IsoDoc::Convert.new({}) - input_filename += ".xml" unless input_filename.match?(/\.xml$/) - File.exist?(input_filename) or - File.open(input_filename, "w:UTF-8") { |f| f.write(file) } - presxml = File.read(input_filename, encoding: "utf-8") - @openmathdelim, @closemathdelim = @isodoc.extract_delims(presxml) - xml, filename, dir = @isodoc.convert_init(presxml, input_filename, false) - build_collection(xml, presxml, output_filename || filename, dir, opts) + class Sectionsplit + attr_accessor :filecache + + def initialize(opts) + @input_filename = opts[:input] + @base = opts[:base] + @output_filename = opts[:output] + @xml = opts[:xml] + @dir = opts[:dir] + @compile_opts = opts[:compile_opts] || {} + @fileslookup = opts[:fileslookup] + @ident = opts[:ident] + @isodoc = opts[:isodoc] end def ns(xpath) @isodoc.ns(xpath) end - def build_collection(xml, presxml, filename, dir, opts = {}) - base = File.basename(filename) - collection_setup(base, dir) - files = sectionsplit(xml, base, dir) - collection_manifest(base, files, xml, presxml, dir).render( - { format: %i(html), output_folder: "#{filename}_collection", - coverpage: File.join(dir, "cover.html") }.merge(opts), + def build_collection + collection_setup(@base, @dir) + files = sectionsplit #(@input_filename, @base, @dir, @compile_opts) + input_xml = Nokogiri::XML(File.read(@input_filename, + encoding: "UTF-8"), &:huge) + collection_manifest(@base, files, input_xml, @xml, @dir).render( + { format: %i(html), output_folder: "#{@output_filename}_collection", + coverpage: File.join(@dir, "cover.html") }.merge(@compile_opts), ) end def collection_manifest(filename, files, origxml, _presxml, dir) File.open(File.join(dir, "#{filename}.html.yaml"), "w:UTF-8") do |f| @@ -44,184 +48,112 @@ end end def coll_cover <<~COVER - <html><head/> - <body> + <html><head/><body> <h1>{{ doctitle }}</h1> <h2>{{ docnumber }}</h2> <nav>{{ navigation }}</nav> - </body> - </html> + </body></html> COVER end SPLITSECTIONS = [["//preface/*", "preface"], ["//sections/*", "sections"], ["//annex", nil], ["//bibliography/*[not(@hidden = 'true')]", "bibliography"], ["//indexsect", nil], ["//colophon", nil]].freeze - def sectionsplit(xml, filename, dir) - @key = xref_preprocess(xml) - @splitdir = dir - out = emptydoc(xml) + # Input XML is Semantic + # def sectionsplit(filename, basename, dir, compile_options, fileslookup = nil, ident = nil) + def sectionsplit + xml = sectionsplit_prep(File.read(@input_filename), @base, @dir) + @key = xref_preprocess(xml, @fileslookup, @ident) SPLITSECTIONS.each_with_object([]) do |n, ret| - xml.xpath(ns(n[0])).each do |s| - ret << sectionfile(xml, out, "#{filename}.#{ret.size}", s, n[1]) + conflate_floatingtitles(xml.xpath(ns(n[0]))).each do |s| + ret << sectionfile(xml, emptydoc(xml), "#{@base}.#{ret.size}", s, n[1]) end end end - def emptydoc(xml) - out = xml.dup - out.xpath( - ns("//preface | //sections | //annex | //bibliography/clause | " \ - "//bibliography/references[not(@hidden = 'true')] | //indexsect" \ - "//colophon"), - ).each(&:remove) - out + def block?(node) + %w(p table formula admonition ol ul dl figure quote sourcecode example + pre note pagebrreak hr bookmark requirement recommendation permission + svgmap inputform toc passthrough review imagemap).include?(node.name) end - def sectionfile(fulldoc, xml, file, chunk, parentnode) - fname = create_sectionfile(fulldoc, xml.dup, file, chunk, parentnode) - { order: chunk["displayorder"].to_i, url: fname, - title: titlerender(chunk) } - end - - def create_sectionfile(xml, out, file, chunk, parentnode) - ins = out.at(ns("//misccontainer")) || out.at(ns("//bibdata")) - if parentnode - ins.next = "<#{parentnode}/>" - ins.next.add_child(chunk.dup) - else ins.next = chunk.dup - end - xref_process(out, xml, @key) - outname = "#{file}.xml" - File.open(File.join(@splitdir, outname), "w:UTF-8") { |f| f.write(out) } - outname - end - - def xref_preprocess(xml) - svg_preprocess(xml) - key = (0...8).map { rand(65..90).chr }.join # random string - xml.root["type"] = key # to force recognition of internal refs - key - end - - def xref_process(section, xml, key) - refs = eref_to_internal_eref(section, xml, key) - refs += xref_to_internal_eref(section, key) - ins = new_hidden_ref(section) - copied_refs = copy_repo_items_biblio(ins, section, xml) - insert_indirect_biblio(ins, refs - copied_refs, key) - end - - def svg_preprocess(xml) - xml.xpath("//m:svg", "m" => "http://www.w3.org/2000/svg").each do |s| - m = svgmap_wrap(s) - s.xpath(".//m:a", "m" => "http://www.w3.org/2000/svg").each do |a| - next unless /^#/.match? a["href"] - - a["href"] = a["href"].sub(/^#/, "") - m << "<target href='#{a['href']}'>" \ - "<xref target='#{a['href']}'/></target>" + def conflate_floatingtitles(nodes) + holdover = false + nodes.each_with_object([]) do |x, m| + if holdover then m.last << x + else m << [x] end + holdover = block?(x) end end - def svgmap_wrap(svg) - ret = svg.at("./ancestor::xmlns:svgmap") and return ret - ret = svg.at("./ancestor::xmlns:figure") - ret.wrap("<svgmap/>") - svg.at("./ancestor::xmlns:svgmap") + def sectionsplit_prep(file, filename, dir) + @splitdir = dir + xml1filename, type = sectionsplit_preprocess_semxml(file, filename) + Compile.new.compile( + xml1filename, + { format: :asciidoc, extension_keys: [:presentation], type: type } + .merge(@compile_opts), + ) + Nokogiri::XML(File.read(xml1filename.sub(/\.xml$/, ".presentation.xml"), + encoding: "utf-8"), &:huge) end - def make_anchor(anchor) - "<localityStack><locality type='anchor'><referenceFrom>" \ - "#{anchor}</referenceFrom></locality></localityStack>" - end - - def xref_to_internal_eref(xml, key) - xml.xpath(ns("//xref")).each_with_object({}) do |x, m| - x["bibitemid"] = "#{key}_#{x['target']}" - x << make_anchor(x["target"]) - m[x["bibitemid"]] = true - x.delete("target") - x["type"] = key - x.name = "eref" - end.keys - end - - def eref_to_internal_eref(section, xml, key) - eref_to_internal_eref_select(section, xml).each_with_object([]) do |x, m| - url = xml.at(ns("//bibitem[@id = '#{x}']/uri[@type = 'citation']")) - section.xpath("//*[@bibitemid = '#{x}']").each do |e| - id = eref_to_internal_eref1(e, key, url) - id and m << id - end + def sectionsplit_preprocess_semxml(file, filename) + xml = Nokogiri::XML(file, &:huge) + type = xml.root.name.sub("-standard", "").to_sym + @fileslookup&.parent&.update_xrefs(xml, @ident, {}) + xml1 = Tempfile.open([filename, ".xml"], encoding: "utf-8") do |f| + f.write(@isodoc.to_xml(svg_preprocess(xml))) + f end + @filecache ||= [] + @filecache << xml1 + [xml1.path, type] end - def eref_to_internal_eref1(elem, key, url) - if url - elem.name = "link" - elem["target"] = url - nil - else - elem["bibitemid"] = "#{key}_#{elem['bibitemid']}" - elem << make_anchor(elem["bibitemid"]) - elem["type"] = key - elem["bibitemid"] - end + def emptydoc(xml) + out = xml.dup + out.xpath( + ns("//preface | //sections | //annex | //bibliography/clause | " \ + "//bibliography/references[not(@hidden = 'true')] | //indexsect | " \ + "//colophon"), + ).each(&:remove) + out end - def eref_to_internal_eref_select(section, xml) - refs = section.xpath("//*/@bibitemid").map { |x| x.text } # rubocop:disable Style/SymbolProc - refs.uniq.reject do |x| - xml.at(ns("//bibitem[@id = '#{x}'][@type = 'internal']")) || - xml.at(ns("//bibitem[@id = '#{x}']" \ - "[docidentifier/@type = 'repository']")) - end + def sectionfile(fulldoc, xml, file, chunks, parentnode) + fname = create_sectionfile(fulldoc, xml.dup, file, chunks, parentnode) + { order: chunks.last["displayorder"].to_i, url: fname, + title: titlerender(chunks.last) } end - # from standoc - def new_hidden_ref(xmldoc) - ins = xmldoc.at("bibliography") or - xmldoc.root << "<bibliography/>" and ins = xmldoc.at("bibliography") - ins.add_child("<references hidden='true' normative='false'/>").first - end - - def copy_repo_items_biblio(ins, section, xml) - xml.xpath(ns("//references/bibitem[docidentifier/@type = 'repository']")) - .each_with_object([]) do |b, m| - section.at("//*[@bibitemid = '#{b['id']}']") or next - ins << b.dup - m << b["id"] + def create_sectionfile(xml, out, file, chunks, parentnode) + ins = out.at(ns("//metanorma-extension")) || out.at(ns("//bibdata")) + sectionfile_insert(ins, chunks, parentnode) + xref_process(out, xml, @key) + outname = "#{file}.xml" + File.open(File.join(@splitdir, outname), "w:UTF-8") do |f| + f.write(out) end + outname end - def insert_indirect_biblio(ins, refs, prefix) - refs.each do |x| - ins << <<~BIBENTRY - <bibitem id="#{x}" type="internal"> - <docidentifier type="repository">#{x.sub(/^#{prefix}_/, "#{prefix}/")}</docidentifier> - </bibitem> - BIBENTRY + def sectionfile_insert(ins, chunks, parentnode) + if parentnode + ins.next = "<#{parentnode}/>" + chunks.each { |c| ins.next.add_child(c.dup) } + else chunks.each { |c| ins.next = c.dup } end end - def recursive_string_keys(hash) - case hash - when Hash then hash.map { |k, v| [k.to_s, recursive_string_keys(v)] }.to_h - when Enumerable then hash.map { |v| recursive_string_keys(v) } - else - hash - end - end - def titlerender(section) title = section.at(ns("./title")) or return "[Untitled]" t = title.dup t.xpath(ns(".//tab | .//br")).each { |x| x.replace(" ") } t.xpath(ns(".//strong")).each { |x| x.replace(x.children) } @@ -247,9 +179,22 @@ docref: files.sort_by { |f| f[:order] }.each.map do |f| { fileref: f[:url], identifier: f[:title] } end }, } - recursive_string_keys(ret).to_yaml + Util::recursive_string_keys(ret).to_yaml + end + + def section_split_cover(col, ident) + dir = File.dirname(col.file) + collection_setup(nil, dir) + CollectionRenderer.new(col, dir, + output_folder: "#{ident}_collection", + format: %i(html), + coverpage: File.join(dir, "cover.html")).coverpage + FileUtils.mv "#{ident}_collection/index.html", + File.join(dir, "#{ident}_index.html") + FileUtils.rm_rf "#{ident}_collection" + "#{ident}_index.html" end end end