lib/bolognese/readers/datacite_reader.rb in bolognese-0.9.36 vs lib/bolognese/readers/datacite_reader.rb in bolognese-0.9.37
- old
+ new
@@ -27,11 +27,22 @@
"data_center_id" => attributes.fetch("datacentre_symbol", nil),
"url" => url }
end
def read_datacite(string: nil, **options)
- meta = string.present? ? Maremma.from_xml(string).fetch("resource", {}) : {}
+ return { "errors" => "no content" } unless string.present?
+
+ meta = Maremma.from_xml(string).fetch("resource", {})
+ schema_version = meta.fetch("xmlns", nil)
+
+ # validate only when option is set, as this step is expensive and
+ # not needed if XML comes from DataCite MDS
+ if options[:validate]
+ errors = datacite_errors(xml: string, schema_version: schema_version)
+ return { "errors" => errors } if errors.present?
+ end
+
id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox])
doi = doi_from_url(id)
resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
type = Bolognese::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || "CreativeWork"
title = Array.wrap(meta.dig("titles", "title")).map do |r|
@@ -102,10 +113,10 @@
"license" => license,
"version" => meta.fetch("version", nil),
"keywords" => keywords,
"language" => meta.fetch("language", nil),
"content_size" => meta.fetch("size", nil),
- "schema_version" => meta.fetch("xmlns", nil)
+ "schema_version" => schema_version
}
end
def datacite_date(dates, date_type)
dd = dates.find { |d| d["dateType"] == date_type } || {}