lib/relaton_nist/data_fetcher.rb in relaton-nist-1.9.4 vs lib/relaton_nist/data_fetcher.rb in relaton-nist-1.9.6
- old
+ new
@@ -9,32 +9,49 @@
"isVersionOf" => "editionOf",
"hasTranslation" => "hasTranslation",
"isTranslationOf" => "translatedFrom",
"hasPreprint" => "hasReprint",
"isSupplementTo" => "complements",
+ "isPartOf" => "partOf",
+ "hasPart" => "hasPart",
}.freeze
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
def initialize(output, format)
@output = output
@format = format
@ext = format.sub(/^bib/, "")
end
- def parse_docid(doc)
- doi = doc.at("doi_data/doi").text
- id = doc.at("publisher_item/item_number", "publisher_item/identifier").text.sub(%r{^/}, "")
- case doi
- when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
- when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
- when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
- when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
- when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
- end
- [{ type: "NIST", id: id }, { type: "DOI", id: doi }]
+ def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
+ # case doi
+ # when "10.6028/NBS.CIRC.12e2revjune" then doi.sub!("13e", "12e")
+ # when "10.6028/NBS.CIRC.36e2" then doi.sub!("46e", "36e")
+ # when "10.6028/NBS.HB.67suppJune1967" then doi.sub!("1965", "1967")
+ # when "10.6028/NBS.HB.105-1r1990" then doi.sub!("105-1-1990", "105-1r1990")
+ # when "10.6028/NIST.HB.150-10-1995" then doi.sub!(/150-10$/, "150-10-1995")
+ # end
+ # anchor = doi.split("/")[1..-1].join "/"
+ [
+ { type: "NIST", id: pub_id(doc) },
+ { type: "DOI", id: doi(doc) },
+ { type: "NIST", id: anchor(doc), scope: "anchor" },
+ ]
end
+ def pub_id(doc)
+ anchor(doc).gsub(".", " ")
+ end
+
+ def doi(doc)
+ doc.at("doi_data/doi").text
+ end
+
+ def anchor(doc)
+ doi(doc).split("/")[1..-1].join "/"
+ end
+
# @param doc [Nokogiri::XML::Element]
# @return [Array<RelatonBib::DocumentIdentifier>]
def fetch_docid(doc)
parse_docid(doc).map do |id|
RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
@@ -45,11 +62,11 @@
# @return [RelatonBib::TypedTitleStringCollection, Array]
def fetch_title(doc)
t = doc.xpath("titles/title|titles/subtitle")
return [] unless t.any?
- RelatonBib::TypedTitleString.from_string t.map(&:text).join(" "), "en", "Latn"
+ RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
end
# @param doc [Nokogiri::XML::Element]
# @return [Array<RelatonBib::BibliographicDate>]
def fetch_date(doc)
@@ -74,16 +91,15 @@
# @param doc [Nokogiri::XML::Element]
# @return [Array<Hash>]
def fetch_relation(doc)
ns = "http://www.crossref.org/relations.xsd"
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
- doi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
- # ref = doi_to_id doi.text
- # ref, = parse_docid doc
- fref = RelatonBib::FormattedRef.new content: doi.text
+ rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
+ fref = RelatonBib::FormattedRef.new content: rdoi.text
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
- type = RELATION_TYPES[doi["relationship-type"]]
+ type = RELATION_TYPES[rdoi["relationship-type"]]
+ warn "Relation type #{rdoi['relationship-type']} not found" unless type
{ type: type, bibitem: bibitem }
end
end
# @param doc [Nokogiri::XML::Element]
@@ -121,26 +137,46 @@
RelatonBib::PersonIdentifier.new "orcid", id.text
end
fullname = RelatonBib::FullName.new(
surname: surname, forename: forename, initial: initial, identifier: ident,
)
- person = RelatonBib::Person.new name: fullname
+ person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
{ entity: person, role: [{ type: p["contributor_role"] }] }
end
contribs + doc.xpath("publisher").map do |p|
abbr = p.at("../institution/institution_acronym")&.text
- org = RelatonBib::Organization.new(name: p.at("publisher_name").text, abbreviation: abbr)
+ place = p.at("./publisher_place")
+ cont = []
+ if place
+ city, state = place.text.split(", ")
+ cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
+ end
+ org = RelatonBib::Organization.new(
+ name: p.at("publisher_name").text, abbreviation: abbr, contact: cont,
+ )
{ entity: org, role: [{ type: "publisher" }] }
end
end
+ def affiliation(doc)
+ doc.xpath("./institution/institution_department").map do |id|
+ org = RelatonBib::Organization.new name: id.text
+ RelatonBib::Affiliation.new organization: org
+ end
+ end
+
# @param doc [Nokogiri::XML::Element]
# @return [Array<String>]
def fetch_place(doc)
doc.xpath("institution/institution_place").map(&:text)
end
+ def fetch_series(doc)
+ title = RelatonBib::TypedTitleString.new(content: "NIST")
+ [RelatonBib::Series.new(title: title, number: pub_id(doc))]
+ end
+
#
# Save document
#
# @param bib [RelatonNist::NistBibliographicItem]
#
@@ -172,18 +208,19 @@
item = RelatonNist::NistBibliographicItem.new(
type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
link: fetch_link(doc), abstract: fetch_abstract(doc),
date: fetch_date(doc), edition: fetch_edition(doc),
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
- place: fetch_place(doc),
+ place: fetch_place(doc), series: fetch_series(doc),
language: [doc["language"]], script: ["Latn"], doctype: "standard"
)
write_file item
rescue StandardError => e
warn "Document: #{doc.at('doi').text}"
warn e.message
- raise e
+ warn e.backtrace[0..5].join("\n")
+ # raise e
end
#
# Fetch all the documnts from dataset
#
@@ -200,9 +237,10 @@
t2 = Time.now
puts "Stopped at: #{t2}"
puts "Done in: #{(t2 - t1).round} sec."
rescue StandardError => e
warn e.message
+ warn e.backtrace[0..5].join("\n")
end
#
# Fetch all the documnts from dataset
#