require "date" module Relaton class Bibdata ATTRIBS = %i[ docidentifier doctype title stage relation xml pdf doc html uri relaton revdate abstract technical_committee copyright_from copyright_owner contributor_author_role contributor_author_organization contributor_publisher_role contributor_publisher_organization language script edition datetype ] attr_accessor *ATTRIBS def self.ns(xpath) xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1"). gsub(%r{::([a-zA-z])}, "::xmlns:\\1"). gsub(%r{\[([a-zA-z][a-z0-9A-Z@/]* ?=)}, "[xmlns:\\1"). gsub(%r{\[([a-zA-z][a-z0-9A-Z@/]*\])}, "[xmlns:\\1") end def initialize(options) options.each_pair do |k,v| send("#{k.to_s}=", v) end self end # From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/ FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ] def docidentifier_code return "" if docidentifier.nil? a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char| result.gsub(bad_char, '-') end end DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/ def doc_number docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999 end def self.from_xml(source) # bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml") revdate = source.at(ns("./date[@type = 'published']")) || source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date")) datetype = "circulated" datetype = revdate["type"] if revdate new({ uri: source.at(ns("./uri[not(@type)]"))&.text, xml: source.at(ns("./uri[@type='xml']"))&.text, pdf: source.at(ns("./uri[@type='pdf']"))&.text, html: source.at(ns("./uri[@type='html']"))&.text, relaton: source.at(ns("./uri[@type='relaton']"))&.text, doc: source.at(ns("./uri[@type='doc']"))&.text, docidentifier: source.at(ns("./docidentifier"))&.text, title: source.at(ns("./title"))&.text, doctype: source.at(ns("./@type"))&.text, stage: source.at(ns("./status"))&.text, technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text, abstract: source.at(ns("./abstract"))&.text, revdate: revdate ? Date.parse(revdate.text) : nil, language: source.at(ns("./language"))&.text, script: source.at(ns("./script"))&.text, edition: source.at(ns("./edition"))&.text, copyright_from: source.at(ns("./copyright/from"))&.text, copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text, contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text, contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text, contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text, contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text, datetype: datetype }) end def to_xml #datetype = stage&.casecmp("published") == 0 ? "published" : "circulated" ret = "\n" ret += "#{Date.today.to_s}\n" ret += "#{title}\n" ret += "#{docidentifier}\n" if docidentifier ret += "#{uri}\n" if uri ret += "#{xml}\n" if xml ret += "#{html}\n" if html ret += "#{pdf}\n" if pdf ret += "#{doc}\n" if doc ret += "#{relaton}\n" if relaton ret += "#{language}\n" ret += "\n" if copyright_from ret += "" ret += "#{copyright_from}\n" if copyright_from ret += "#{copyright_owner}\n" if copyright_owner ret += "" end if contributor_author_role ret += "\n" ret += "\n" ret += "#{contributor_author_organization}\n" ret += "\n" end if contributor_publisher_role ret += "\n" ret += "\n" ret += "#{contributor_publisher_organization}\n" ret += "\n" end ret += "#{revdate}\n" if revdate # ret += "#{agency}" if agency # ret += "#{agency}" if agency ret += "#{edition}\n" if edition ret += "#{language}\n" if language ret += "\n" if script ret += "#{abstract}\n" if abstract ret += "#{stage}\n" if stage ret += "#{technical_committee}\n" if technical_committee ret += "\n" end def to_h ATTRIBS.inject({}) do |acc, k| value = send(k) acc[k.to_s] = value unless value.nil? acc end end def to_yaml to_h.to_yaml end end end