lib/asciidoctor/iso/ref.rb in asciidoctor-iso-0.8.1 vs lib/asciidoctor/iso/ref.rb in asciidoctor-iso-0.9.0

- old
+ new

@@ -69,84 +69,151 @@ def isorefmatches3(xml, m) ref = fetch_ref xml, m[:code], m[:year], all_parts: true return use_my_anchor(ref, m[:anchor]) if ref xml.bibitem(**attr_code(ref_attributes(m))) do |t| t.title(**plaintxt) { |i| i << ref_normalise(m[:text]) } - t.docidentifier "#{m[:code]}:All Parts" + t.docidentifier "#{m[:code]}" if m.named_captures.has_key?("year") t.date(**{ type: "published" }) { |d| set_date_range(d, m[:year]) } end iso_publisher(t, m[:code]) + t.allParts "true" end end - def fetch_year_check(hit, code, year, opts) - ret =nil - if year.nil? || year.to_i == hit.hit["year"] - ret = hit.to_xml opts - @bibliodb[code] = ret if @bibliodb - @local_bibliodb[code] = ret if @local_bibliodb +# --- ISOBIB + def fetch_ref_err(code, year, missed_years) + id = year ? "#{code}:#{year}" : code + warn "WARNING: no match found on the ISO website for #{id}. "\ + "The code must be exactly like it is on the website." + warn "(There was no match for #{year}, though there were matches "\ + "found for #{missed_years.join(', ')}.)" unless missed_years.empty? + if /\d-\d/.match? code + warn "The provided document part may not exist, or the document "\ + "may no longer be published in parts." else - warn "WARNING: cited year #{year} does not match year "\ - "#{hit.hit['year']} found on the ISO website for #{code}" + warn "If you wanted to cite all document parts for the reference, "\ + "use \"#{code} (all parts)\".\nIf the document is not a standard, "\ + "use its document type abbreviation (TS, TR, PAS, Guide)." end - ret + nil end - def first_with_title(result) - result.first.each do |x| - next unless x.hit["title"] - return x - end - return nil + def fetch_pages(s, n) + workers = WorkersPool.new n + workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } } + s.each_with_index { |hit, i| workers << { i: i, hit: hit } } + workers.end + workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] } end - def first_year_match_hit(result, code, year) - return first_with_title(result) if year.nil? - return nil unless result.first && result.first.is_a?(Array) - coderegex = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+} - result.first.each do |x| - next unless x.hit["title"] - return x if x.hit["title"].match(coderegex).to_s == code && - year.to_i == x.hit["year"] - end - return first_with_title(result) + def isobib_search_filter(code) + docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+} + corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/} + warn "fetching #{code}..." + result = Isobib::IsoBibliography.search(code) + result.first.select do |i| + i.hit["title"] && + i.hit["title"].match(docidrx).to_s == code && + !corrigrx.match?(i.hit["title"]) + end end - def fetch_ref_err(code) - warn "WARNING: no match found on the ISO website for #{code}." - if /\d-\d/.match? code - warn "The provided document part may not exist, or the document may no longer be published in parts." - else - warn "If you wanted to cite all document parts for the reference, use #{code}:All Parts" - end + def iev + Nokogiri::XML.fragment(<<~"END") + <bibitem type="international-standard" id="IEV"> + <title format="text/plain" language="en" script="Latn">Electropedia: + The World's Online Electrotechnical Vocabulary</title> + <source type="src">http://www.electropedia.org</source> + <docidentifier>IEV</docidentifier> + <date type="published"> <on>#{Date.today.year}</on> </date> + <contributor> + <role type="publisher"/> + <organization> + <name>International Electrotechnical Commission</name> + <abbreviation>IEC</abbreviation> + <uri>www.iec.ch</uri> + </organization> + </contributor> + <language>en</language> <language>fr</language> + <script>Latn</script> + <copyright> + <from>#{Date.today.year}</from> + <owner> + <organization> + <name>International Electrotechnical Commission</name> + <abbreviation>IEC</abbreviation> + <uri>www.iec.ch</uri> + </organization> + </owner> + </copyright> + <relation type="updates"> + <bibitem> + <formattedref>IEC 60050</formattedref> + </bibitem> + </relation> +</bibitem> + END end - def fetch_ref2(code, year, opts) - result = Isobib::IsoBibliography.search(code) - ret = nil - hit = first_year_match_hit(result, code, year) - coderegex = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+} - if hit && hit.hit["title"]&.match(coderegex)&.to_s == code - ret = fetch_year_check(hit, code, year, opts) - else - fetch_ref_err(code) + # Sort through the results from Isobib, fetching them three at a time, + # and return the first result that matches the code, + # matches the year (if provided), and which # has a title (amendments do not). + # Only expects the first page of results to be populated. + # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007) + # If no match, returns any years which caused mismatch, for error reporting + def isobib_results_filter(result, year) + missed_years = [] + result.each_slice(3) do |s| # ISO website only allows 3 connections + fetch_pages(s, 3).each_with_index do |r, i| + return { ret: r } if !year + r.dates.select { |d| d.type == "published" }.each do |d| + return { ret: r } if year.to_i == d.on.year + missed_years << d.on.year + end + end end + { years: missed_years } + end + + def isobib_get1(code, year, opts) + return iev if code.casecmp? "IEV" + result = isobib_search_filter(code) or return nil + ret = isobib_results_filter(result, year) + return ret[:ret] if ret[:ret] + fetch_ref_err(code, year, ret[:years]) + end + + def isobib_get(code, year, opts) + code += "-1" if opts[:all_parts] + ret = isobib_get1(code, year, opts) + return nil if ret.nil? + ret.to_most_recent_reference if !year + ret.to_all_parts if opts[:all_parts] + ret.to_xml + end + + # --- ISOBIB + + def iso_id(code, year, all_parts) + ret = code + ret += ":#{year}" if year + ret += " (all parts)" if all_parts ret end def fetch_ref1(code, year, opts) + id = iso_id(code, year, opts[:all_parts]) return nil if @bibliodb.nil? # signals we will not be using isobib - @bibliodb[code] = fetch_ref2(code, year, opts) unless @bibliodb[code] - @local_bibliodb[code] = @bibliodb[code] if !@local_bibliodb.nil? && - !@local_bibliodb[code] - return @local_bibliodb[code] unless @local_bibliodb.nil? - @bibliodb[code] + @bibliodb[id] = isobib_get(code, year, opts) unless @bibliodb[id] + @local_bibliodb[id] = @bibliodb[id] if !@local_bibliodb.nil? && + !@local_bibliodb[id] + return @local_bibliodb[id] unless @local_bibliodb.nil? + @bibliodb[id] end def fetch_ref(xml, code, year, **opts) - warn "fetching #{code}..." hit = fetch_ref1(code, year, opts) return nil if hit.nil? xml.parent.add_child(hit) xml rescue Algolia::AlgoliaProtocolError @@ -179,11 +246,12 @@ # gsub(/&#8201;&#8212;&#8201;/, " -- "). gsub(/&amp;amp;/, "&amp;") end ISO_REF = %r{^<ref\sid="(?<anchor>[^"]+)"> - \[(?<code>(ISO|IEC)[^0-9]*\s[0-9-]+)(:(?<year>[0-9][0-9-]+))?\]</ref>,?\s + \[(?<code>(ISO|IEC)[^0-9]*\s[0-9-]+|IEV) + (:(?<year>[0-9][0-9-]+))?\]</ref>,?\s (?<text>.*)$}xm ISO_REF_NO_YEAR = %r{^<ref\sid="(?<anchor>[^"]+)"> \[(?<code>(ISO|IEC)[^0-9]*\s[0-9-]+):--\]</ref>,?\s? <fn[^>]*>\s*<p>(?<fn>[^\]]+)</p>\s*</fn>,?\s?(?<text>.*)$}xm @@ -233,23 +301,25 @@ "#{@filename}.relaton.json" end # if returns nil, then biblio caching is disabled, and so is use of isobib def open_cache_biblio(node, global) - return nil # disabling for now + # return nil # disabling for now + return nil if node.attr("no-isobib") + return {} if @no_isobib_cache filename = bibliocache_name(global) - system("rm -f #{filename}") if node.attr("flush-caches") == "true" + system("rm -f #{filename}") if node.attr("flush-caches") biblio = {} if Pathname.new(filename).file? File.open(filename, "r") do |f| biblio = JSON.parse(f.read) end end biblio end def save_cache_biblio(biblio, global) - return if biblio.nil? + return if biblio.nil? || @no_isobib_cache filename = bibliocache_name(global) File.open(filename, "w") do |b| b << biblio.to_json end end