lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.29 vs lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.30
- old
+ new
@@ -35,25 +35,23 @@
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate))
meta = string.present? ? Maremma.from_json(string) : {}
- identifier = Array.wrap(meta.fetch("identifier", nil))
- if identifier.length > 1
- alternate_identifiers = identifier[1..-1].map do |r|
- if r.is_a?(String)
- { "alternateIdentifierType" => "URL", "alternateIdentifier" => r }
- elsif r.is_a?(Hash)
- { "alternateIdentifierType" => r["propertyID"], "alternateIdentifier" => r["value"] }
- end
+ identifiers = ([options[:doi] || meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
+ r = normalize_id(r) if r.is_a?(String)
+ if r.is_a?(String) && r.start_with?("https://doi.org")
+ { "identifierType" => "DOI", "identifier" => r }
+ elsif r.is_a?(String)
+ { "identifierType" => "URL", "identifier" => r }
+ elsif r.is_a?(Hash)
+ { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
end
- else
- alternate_identifiers = nil
- end
- identifier = identifier.first
+ end.compact.uniq
- id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
+
schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
types = {
"resourceTypeGeneral" => resource_type_general,
"resourceType" => meta.fetch("additionalType", nil),
@@ -61,23 +59,30 @@
"citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
}.compact
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
- creators = get_authors(from_schema_org(Array.wrap(authors)))
- contributors = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
+ creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
+ contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
- periodical = if meta.fetch(ct, nil).present?
+ container = if meta.fetch(ct, nil).present?
+ url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
+
{
- "type" => (schema_org == "Dataset") ? "DataCatalog" : "Periodical",
+ "type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
- "url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
+ "identifier" => url,
+ "identifierType" => url.present? ? "URL" : nil,
+ "volume" => meta.fetch("volumeNumber", nil),
+ "issue" => meta.fetch("issueNumber", nil),
+ "firstPage" => meta.fetch("pageStart", nil),
+ "lastPage" => meta.fetch("pageEnd", nil)
}.compact
else
- nil
+ {}
end
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
Array.wrap(schema_org_is_part_of(meta)) +
Array.wrap(schema_org_has_part(meta)) +
@@ -136,21 +141,20 @@
end
{ "id" => id,
"types" => types,
"doi" => validate_doi(id),
- "identifier" => identifier,
- "alternate_identifiers" => alternate_identifiers,
+ "identifiers" => identifiers,
"url" => normalize_id(meta.fetch("url", nil)),
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
"sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
"formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)),
"titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : nil,
"creators" => creators,
"contributors" => contributors,
"publisher" => publisher,
"agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
- "periodical" => periodical,
+ "container" => container,
"related_identifiers" => related_identifiers,
"publication_year" => publication_year,
"dates" => dates,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,