lib/bolognese/readers/datacite_reader.rb in bolognese-0.9.36 vs lib/bolognese/readers/datacite_reader.rb in bolognese-0.9.37

- old
+ new

@@ -27,11 +27,22 @@ "data_center_id" => attributes.fetch("datacentre_symbol", nil), "url" => url } end def read_datacite(string: nil, **options) - meta = string.present? ? Maremma.from_xml(string).fetch("resource", {}) : {} + return { "errors" => "no content" } unless string.present? + + meta = Maremma.from_xml(string).fetch("resource", {}) + schema_version = meta.fetch("xmlns", nil) + + # validate only when option is set, as this step is expensive and + # not needed if XML comes from DataCite MDS + if options[:validate] + errors = datacite_errors(xml: string, schema_version: schema_version) + return { "errors" => errors } if errors.present? + end + id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox]) doi = doi_from_url(id) resource_type_general = meta.dig("resourceType", "resourceTypeGeneral") type = Bolognese::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || "CreativeWork" title = Array.wrap(meta.dig("titles", "title")).map do |r| @@ -102,10 +113,10 @@ "license" => license, "version" => meta.fetch("version", nil), "keywords" => keywords, "language" => meta.fetch("language", nil), "content_size" => meta.fetch("size", nil), - "schema_version" => meta.fetch("xmlns", nil) + "schema_version" => schema_version } end def datacite_date(dates, date_type) dd = dates.find { |d| d["dateType"] == date_type } || {}