lib/bolognese/readers/datacite_reader.rb in bolognese-0.15.9 vs lib/bolognese/readers/datacite_reader.rb in bolognese-1.0
- old
+ new
@@ -52,11 +52,11 @@
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
ns = doc.collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
schema_version = Array.wrap(ns).last || "http://datacite.org/schema/kernel-4"
doc.remove_namespaces!
string = doc.to_xml(:indent => 2)
-
+
meta = Maremma.from_xml(string).to_h.fetch("resource", {})
# validate only when option is set, as this step is expensive and
# not needed if XML comes from DataCite MDS
if options[:validate]
@@ -80,19 +80,17 @@
else
{ "title_type" => r["titleType"], "lang" => r["lang"], "text" => sanitize(r["__content__"]) }.compact
end
end.unwrap
- container_title = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
-
- alternate_identifier = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
+ alternate_identifiers = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
{ "type" => r["alternateIdentifierType"], "name" => r["__content__"] }
end.unwrap
description = Array.wrap(meta.dig("descriptions", "description")).select { |r| r["descriptionType"] != "SeriesInformation" }.map do |r|
{ "type" => r["descriptionType"], "text" => sanitize(r["__content__"]) }.compact
end.unwrap
- license = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
+ rights = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
{ "id" => normalize_url(r["rightsURI"]), "name" => r["__content__"] }.compact
end.unwrap
keywords = Array.wrap(meta.dig("subjects", "subject")).map do |k|
if k.nil?
nil
@@ -100,67 +98,113 @@
sanitize(k)
else
{ "subject_scheme" => k["subjectScheme"], "scheme_uri" => k["schemeURI"], "text" => sanitize(k["__content__"]) }.compact
end
end.compact
- dates = Array.wrap(meta.dig("dates", "date"))
+ dates = Array.wrap(meta.dig("dates", "date")).map do |d|
+ {
+ "date" => parse_attributes(d),
+ "date_type" => parse_attributes(d, content: "dateType")
+ }
+ end
sizes = Array.wrap(meta.dig("sizes", "size")).unwrap
formats = Array.wrap(meta.dig("formats", "format")).unwrap
- funding = begin
- f = datacite_funder_contributor(meta) + datacite_funding_reference(meta)
- f.length > 1 ? f : f.first
+ funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
+ {
+ "funder_name" => fr["funderName"],
+ "funder_identifier" => normalize_id(parse_attributes(fr["funderIdentifier"])),
+ "funder_identifier_type" => parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType"),
+ "award_number" => parse_attributes(fr["awardNumber"]),
+ "award_uri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
+ "award_title" => fr["awardTitle"] }.compact
end
+ related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
+ if ri["relatedIdentifierType"] == "DOI"
+ rid = ri["__content__"].downcase
+ else
+ rid = ri["__content__"]
+ end
+
+ {
+ "id" => rid,
+ "related_identifier_type" => ri["relatedIdentifierType"],
+ "relation_type" => ri["relationType"],
+ "resource_type_general" => ri["resourceTypeGeneral"]
+ }.compact
+ end
+ geo_location = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
+ if gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String)
+ nil
+ else
+ {
+ "geo_location_place" => gl["geoLocationPlace"],
+ "geo_location_point" => {
+ "point_latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
+ "point_longitude" => gl.dig("geoLocationPoint", "pointLongitude")
+ }.compact.presence,
+ "geo_location_box" => {
+ "west_bound_longitude" => gl.dig("geoLocationBox", "westBoundLongitude"),
+ "east_bound_longitude" => gl.dig("geoLocationBox", "eastBoundLongitude"),
+ "south_bound_latitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
+ "north_bound_latitude" => gl.dig("geoLocationBox", "northBoundLatitude")
+ }.compact.presence
+ }.compact
+ end
+ end
+ periodical = set_periodical(meta)
state = doi.present? ? "findable" : "not_found"
{ "id" => id,
"type" => type,
"additional_type" => additional_type,
"citeproc_type" => Bolognese::Utils::CR_TO_CP_TRANSLATIONS[additional_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_CP_TRANSLATIONS[type] || "article",
"bibtex_type" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[additional_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[type] || "misc",
"ris_type" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[additional_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN",
"resource_type_general" => resource_type_general,
"doi" => doi,
- "alternate_identifier" => alternate_identifier,
+ "alternate_identifiers" => alternate_identifiers,
"url" => options.fetch(:url, nil),
"title" => title,
- "author" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
- "editor" => get_authors(Array.wrap(meta.dig("contributors", "contributor")).select { |r| r["contributorType"] == "Editor" }),
- "container_title" => container_title,
- "publisher" => meta.fetch("publisher", nil),
+ "creator" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
+ "periodical" => periodical,
+ "publisher" => meta.fetch("publisher", "").strip.presence,
"service_provider" => "DataCite",
- "funding" => funding,
- "is_identical_to" => datacite_is_identical_to(meta),
- "is_part_of" => datacite_is_part_of(meta),
- "has_part" => datacite_has_part(meta),
- "references" => datacite_references(meta),
- "is_referenced_by" => datacite_is_referenced_by(meta),
- "is_supplement_to" => datacite_is_supplement_to(meta),
- "is_supplemented_by" => datacite_is_supplemented_by(meta),
- "date_created" => datacite_date(dates, "Created"),
- "date_accepted" => datacite_date(dates, "Accepted"),
- "date_available" => datacite_date(dates, "Available"),
- "date_copyrighted" => datacite_date(dates, "Copyrights"),
- "date_collected" => datacite_date(dates, "Collected"),
- "date_submitted" => datacite_date(dates, "Submitted"),
- "date_valid" => datacite_date(dates, "Valid"),
+ "funding_references" => funding_references,
+ "dates" => dates,
"date_published" => datacite_date(dates, "Issued") || meta.fetch("publicationYear", nil),
"date_modified" => datacite_date(dates, "Updated"),
"description" => description,
- "license" => license,
+ "rights" => rights,
"b_version" => meta.fetch("version", nil),
"keywords" => keywords,
"language" => meta.fetch("language", nil),
- "content_format" => formats,
- "content_size" => sizes,
+ "geo_location" => geo_location,
+ "related_identifiers" => related_identifiers,
+ "b_format" => formats,
+ "size" => sizes,
"schema_version" => schema_version,
"state" => state
}
end
+ def set_periodical(meta)
+ container_title = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
+ is_part_of = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
+
+ if container_title.present? || is_part_of.present?
+ {
+ "type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataCatalog" : "Periodical",
+ "id" => is_part_of["relatedIdentifierType"] == "DOI" ? normalize_doi(is_part_of["__content__"]) : is_part_of["__content__"],
+ "title" => container_title,
+ "issn" => is_part_of["relatedIdentifierType"] == "ISSN" ? is_part_of["__content__"] : nil
+ }.compact
+ end
+ end
+
def datacite_date(dates, date_type)
- dd = dates.find { |d| d["dateType"] == date_type } || {}
- dd.fetch("__content__", nil)
+ dd = dates.find { |d| d["date_type"] == date_type } || {}
+ dd.fetch("date", nil)
end
def datacite_funding_reference(meta)
Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |f|
funder_id = parse_attributes(f["funderIdentifier"])
@@ -197,68 +241,9 @@
end
else
sum
end
end
- end
-
- def datacite_related_identifier(meta, relation_type: nil)
- arr = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).select { |r| %w(DOI URL).include?(r["relatedIdentifierType"]) }
- arr = arr.select { |r| relation_type.split(" ").include?(r["relationType"]) } if relation_type.present?
-
- arr.map { |work| { "type" => "CreativeWork", "id" => normalize_id(work["__content__"]) } }.unwrap
- end
-
- def datacite_is_identical_to(meta)
- datacite_related_identifier(meta, relation_type: "IsIdenticalTo")
- end
-
- def datacite_is_part_of(meta)
- datacite_related_identifier(meta, relation_type: "IsPartOf")
- end
-
- def datacite_has_part(meta)
- datacite_related_identifier(meta, relation_type: "HasPart")
- end
-
- def datacite_is_previous_version_of(meta)
- datacite_related_identifier(meta, relation_type: "IsPreviousVersionOf")
- end
-
- def datacite_is_new_version_of(meta)
- datacite_related_identifier(meta, relation_type: "IsNewVersionOf")
- end
-
- def datacite_is_variant_form_of(meta)
- datacite_related_identifier(meta, relation_type: "IsVariantFormOf")
- end
-
- def datacite_is_original_form_of(meta)
- datacite_related_identifier(meta, relation_type: "IsOriginalFormOf")
- end
-
- def datacite_references(meta)
- datacite_related_identifier(meta, relation_type: "References Cites").presence
- end
-
- def datacite_is_referenced_by(meta)
- datacite_related_identifier(meta, relation_type: "IsCitedBy IsReferencedBy").presence
- end
-
- def datacite_is_supplement_to(meta)
- datacite_related_identifier(meta, relation_type: "IsSupplementTo")
- end
-
- def datacite_is_supplemented_by(meta)
- datacite_related_identifier(meta, relation_type: "isSupplementedBy")
- end
-
- def datacite_reviews(meta)
- datacite_related_identifier(meta, relation_type: "Reviews").presence
- end
-
- def datacite_is_reviewed_by(meta)
- datacite_related_identifier(meta, relation_type: "IsReviewedBy").presence
end
end
end
end