lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.29 vs lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.30

- old
+ new

@@ -35,25 +35,23 @@ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate)) meta = string.present? ? Maremma.from_json(string) : {} - identifier = Array.wrap(meta.fetch("identifier", nil)) - if identifier.length > 1 - alternate_identifiers = identifier[1..-1].map do |r| - if r.is_a?(String) - { "alternateIdentifierType" => "URL", "alternateIdentifier" => r } - elsif r.is_a?(Hash) - { "alternateIdentifierType" => r["propertyID"], "alternateIdentifier" => r["value"] } - end + identifiers = ([options[:doi] || meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r| + r = normalize_id(r) if r.is_a?(String) + if r.is_a?(String) && r.start_with?("https://doi.org") + { "identifierType" => "DOI", "identifier" => r } + elsif r.is_a?(String) + { "identifierType" => "URL", "identifier" => r } + elsif r.is_a?(Hash) + { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] } end - else - alternate_identifiers = nil - end - identifier = identifier.first + end.compact.uniq - id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil)) + id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil) + schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org] types = { "resourceTypeGeneral" => resource_type_general, "resourceType" => meta.fetch("additionalType", nil), @@ -61,23 +59,30 @@ "citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal", "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc", "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN" }.compact authors = meta.fetch("author", nil) || meta.fetch("creator", nil) - creators = get_authors(from_schema_org(Array.wrap(authors))) - contributors = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil)))) + creators = get_authors(from_schema_org_creators(Array.wrap(authors))) + contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil)))) publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true) ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical" - periodical = if meta.fetch(ct, nil).present? + container = if meta.fetch(ct, nil).present? + url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true) + { - "type" => (schema_org == "Dataset") ? "DataCatalog" : "Periodical", + "type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical", "title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true), - "url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true) + "identifier" => url, + "identifierType" => url.present? ? "URL" : nil, + "volume" => meta.fetch("volumeNumber", nil), + "issue" => meta.fetch("issueNumber", nil), + "firstPage" => meta.fetch("pageStart", nil), + "lastPage" => meta.fetch("pageEnd", nil) }.compact else - nil + {} end related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) + Array.wrap(schema_org_is_part_of(meta)) + Array.wrap(schema_org_has_part(meta)) + @@ -136,21 +141,20 @@ end { "id" => id, "types" => types, "doi" => validate_doi(id), - "identifier" => identifier, - "alternate_identifiers" => alternate_identifiers, + "identifiers" => identifiers, "url" => normalize_id(meta.fetch("url", nil)), "content_url" => Array.wrap(meta.fetch("contentUrl", nil)), "sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence, "formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)), "titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : nil, "creators" => creators, "contributors" => contributors, "publisher" => publisher, "agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true), - "periodical" => periodical, + "container" => container, "related_identifiers" => related_identifiers, "publication_year" => publication_year, "dates" => dates, "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil, "rights_list" => rights_list,