lib/relaton/bibdata.rb in relaton-cli-0.1.2 vs lib/relaton/bibdata.rb in relaton-cli-0.1.4

- old
+ new

@@ -1,10 +1,11 @@ +require "date" module Relaton class Bibdata ATTRIBS = %i[ - docid + docidentifier doctype title stage relation xml @@ -14,10 +15,20 @@ uri relaton revdate abstract technical_committee + copyright_from + copyright_owner + contributor_author_role + contributor_author_organization + contributor_publisher_role + contributor_publisher_organization + language + script + edition + datetype ] attr_accessor *ATTRIBS def self.ns(xpath) @@ -29,70 +40,122 @@ def initialize(options) options.each_pair do |k,v| send("#{k.to_s}=", v) end + self + end - puts "*+"*30 - puts self.inspect + # From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/ + FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ] - self + def docidentifier_code + return "" if docidentifier.nil? + a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char| + result.gsub(bad_char, '-') + end end - def docid_code - docid.downcase.gsub(/[\s\/]/, "-") || "" + DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/ + def doc_number + docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999 end def self.from_xml(source) # bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml") + revdate = source.at(ns("./date[@type = 'published']")) || + source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date")) + datetype = "circulated" + datetype = revdate["type"] if revdate - datetype = source.at(ns("./date[@type]")).text - revdate = source.at(ns("./date/on")).text - new({ - uri: source.at(ns("./uri"))&.text, + uri: source.at(ns("./uri[not(@type)]"))&.text, xml: source.at(ns("./uri[@type='xml']"))&.text, pdf: source.at(ns("./uri[@type='pdf']"))&.text, html: source.at(ns("./uri[@type='html']"))&.text, relaton: source.at(ns("./uri[@type='relaton']"))&.text, doc: source.at(ns("./uri[@type='doc']"))&.text, - docid: source.at(ns("./docidentifier"))&.text, + docidentifier: source.at(ns("./docidentifier"))&.text, title: source.at(ns("./title"))&.text, doctype: source.at(ns("./@type"))&.text, stage: source.at(ns("./status"))&.text, - technical_committee: source.at(ns("./technical-committee"))&.text, + technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text, abstract: source.at(ns("./abstract"))&.text, - revdate: Date.parse(revdate) - # revdate TODO + revdate: revdate ? Date.parse(revdate.text) : nil, + language: source.at(ns("./language"))&.text, + script: source.at(ns("./script"))&.text, + edition: source.at(ns("./edition"))&.text, + copyright_from: source.at(ns("./copyright/from"))&.text, + copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text, + contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text, + contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text, + contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text, + contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text, + datetype: datetype }) end def to_xml - datetype = stage.casecmp("published") == 0 ? "published" : "updated" + #datetype = stage&.casecmp("published") == 0 ? "published" : "circulated" ret = "<bibdata type='#{doctype}'>\n" + ret += "<fetched>#{Date.today.to_s}</fetched>\n" ret += "<title>#{title}</title>\n" + ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier ret += "<uri>#{uri}</uri>\n" if uri ret += "<uri type='xml'>#{xml}</uri>\n" if xml ret += "<uri type='html'>#{html}</uri>\n" if html ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf ret += "<uri type='doc'>#{doc}</uri>\n" if doc ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton - ret += "<docidentifier>#{docid}</docidentifier>\n" + + ret += "<language>#{language}</language>\n" + ret += "<script>#{script}</script>\n" + + if copyright_from + ret += "<copyright>" + ret += "<from>#{copyright_from}</from>\n" if copyright_from + ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner + ret += "</copyright>" + end + + if contributor_author_role + ret += "<contributor>\n" + ret += "<role type='author'/>\n" + ret += "<organization><name>#{contributor_author_organization}</name></organization>\n" + ret += "</contributor>\n" + end + + if contributor_publisher_role + ret += "<contributor>\n" + ret += "<role type='publisher'/>\n" + ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n" + ret += "</contributor>\n" + end + ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate + # ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency + # ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency + ret += "<edition>#{edition}</edition>\n" if edition + ret += "<language>#{language}</language>\n" if language + ret += "<script>#{script}</script>\n" if script ret += "<abstract>#{abstract}</abstract>\n" if abstract ret += "<status>#{stage}</status>\n" if stage - ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee + ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee ret += "</bibdata>\n" end def to_h ATTRIBS.inject({}) do |acc, k| value = send(k) - acc[k] = value unless value.nil? + acc[k.to_s] = value unless value.nil? acc end + end + + def to_yaml + to_h.to_yaml end end end