lib/relaton/bibdata.rb in relaton-cli-0.1.2 vs lib/relaton/bibdata.rb in relaton-cli-0.1.4
- old
+ new
@@ -1,10 +1,11 @@
+require "date"
module Relaton
class Bibdata
ATTRIBS = %i[
- docid
+ docidentifier
doctype
title
stage
relation
xml
@@ -14,10 +15,20 @@
uri
relaton
revdate
abstract
technical_committee
+ copyright_from
+ copyright_owner
+ contributor_author_role
+ contributor_author_organization
+ contributor_publisher_role
+ contributor_publisher_organization
+ language
+ script
+ edition
+ datetype
]
attr_accessor *ATTRIBS
def self.ns(xpath)
@@ -29,70 +40,122 @@
def initialize(options)
options.each_pair do |k,v|
send("#{k.to_s}=", v)
end
+ self
+ end
- puts "*+"*30
- puts self.inspect
+ # From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/
+ FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ]
- self
+ def docidentifier_code
+ return "" if docidentifier.nil?
+ a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char|
+ result.gsub(bad_char, '-')
+ end
end
- def docid_code
- docid.downcase.gsub(/[\s\/]/, "-") || ""
+ DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/
+ def doc_number
+ docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999
end
def self.from_xml(source)
# bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml")
+ revdate = source.at(ns("./date[@type = 'published']")) ||
+ source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date"))
+ datetype = "circulated"
+ datetype = revdate["type"] if revdate
- datetype = source.at(ns("./date[@type]")).text
- revdate = source.at(ns("./date/on")).text
-
new({
- uri: source.at(ns("./uri"))&.text,
+ uri: source.at(ns("./uri[not(@type)]"))&.text,
xml: source.at(ns("./uri[@type='xml']"))&.text,
pdf: source.at(ns("./uri[@type='pdf']"))&.text,
html: source.at(ns("./uri[@type='html']"))&.text,
relaton: source.at(ns("./uri[@type='relaton']"))&.text,
doc: source.at(ns("./uri[@type='doc']"))&.text,
- docid: source.at(ns("./docidentifier"))&.text,
+ docidentifier: source.at(ns("./docidentifier"))&.text,
title: source.at(ns("./title"))&.text,
doctype: source.at(ns("./@type"))&.text,
stage: source.at(ns("./status"))&.text,
- technical_committee: source.at(ns("./technical-committee"))&.text,
+ technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text,
abstract: source.at(ns("./abstract"))&.text,
- revdate: Date.parse(revdate)
- # revdate TODO
+ revdate: revdate ? Date.parse(revdate.text) : nil,
+ language: source.at(ns("./language"))&.text,
+ script: source.at(ns("./script"))&.text,
+ edition: source.at(ns("./edition"))&.text,
+ copyright_from: source.at(ns("./copyright/from"))&.text,
+ copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text,
+ contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text,
+ contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text,
+ contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text,
+ contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text,
+ datetype: datetype
})
end
def to_xml
- datetype = stage.casecmp("published") == 0 ? "published" : "updated"
+ #datetype = stage&.casecmp("published") == 0 ? "published" : "circulated"
ret = "<bibdata type='#{doctype}'>\n"
+ ret += "<fetched>#{Date.today.to_s}</fetched>\n"
ret += "<title>#{title}</title>\n"
+ ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier
ret += "<uri>#{uri}</uri>\n" if uri
ret += "<uri type='xml'>#{xml}</uri>\n" if xml
ret += "<uri type='html'>#{html}</uri>\n" if html
ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf
ret += "<uri type='doc'>#{doc}</uri>\n" if doc
ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton
- ret += "<docidentifier>#{docid}</docidentifier>\n"
+
+ ret += "<language>#{language}</language>\n"
+ ret += "<script>#{script}</script>\n"
+
+ if copyright_from
+ ret += "<copyright>"
+ ret += "<from>#{copyright_from}</from>\n" if copyright_from
+ ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner
+ ret += "</copyright>"
+ end
+
+ if contributor_author_role
+ ret += "<contributor>\n"
+ ret += "<role type='author'/>\n"
+ ret += "<organization><name>#{contributor_author_organization}</name></organization>\n"
+ ret += "</contributor>\n"
+ end
+
+ if contributor_publisher_role
+ ret += "<contributor>\n"
+ ret += "<role type='publisher'/>\n"
+ ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n"
+ ret += "</contributor>\n"
+ end
+
ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate
+ # ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency
+ # ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency
+ ret += "<edition>#{edition}</edition>\n" if edition
+ ret += "<language>#{language}</language>\n" if language
+ ret += "<script>#{script}</script>\n" if script
ret += "<abstract>#{abstract}</abstract>\n" if abstract
ret += "<status>#{stage}</status>\n" if stage
- ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee
+ ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee
ret += "</bibdata>\n"
end
def to_h
ATTRIBS.inject({}) do |acc, k|
value = send(k)
- acc[k] = value unless value.nil?
+ acc[k.to_s] = value unless value.nil?
acc
end
+ end
+
+ def to_yaml
+ to_h.to_yaml
end
end
end