lib/bolognese/readers/schema_org_reader.rb in bolognese-0.12.2 vs lib/bolognese/readers/schema_org_reader.rb in bolognese-0.12.3

- old
+ new

@@ -33,10 +33,24 @@ return { "errors" => errors } if errors.present? end meta = string.present? ? Maremma.from_json(string) : {} + identifier = Array.wrap(meta.fetch("identifier", nil)) + if identifier.length > 1 + alternate_identifier = identifier[1..-1].map do |r| + if r.is_a?(String) + { "type" => "URL", "name" => r } + elsif r.is_a?(Hash) + { "type" => r["propertyID"], "name" => r["value"] } + end + end.unwrap + else + alternate_identifier = nil + end + identifier = identifier.first + id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil)) type = meta.fetch("@type", nil) && meta.fetch("@type").camelcase resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[type] authors = meta.fetch("author", nil) || meta.fetch("creator", nil) author = get_authors(from_schema_org(Array.wrap(authors))) @@ -46,36 +60,46 @@ elsif publisher.is_a?(String) meta.dig("publisher") end included_in_data_catalog = from_schema_org(Array.wrap(meta.fetch("includedInDataCatalog", nil))) - included_in_data_catalog = Array.wrap(included_in_data_catalog).map { |dc| { "title" => dc["name"], "url" => dc["url"] } } + included_in_data_catalog = Array.wrap(included_in_data_catalog).reduce([]) do |sum, dc| + sum << { "title" => dc["name"], "url" => dc["url"] } if dc["url"].present? + sum + end.unwrap is_part_of = schema_org_is_part_of(meta) || included_in_data_catalog license = { "id" => parse_attributes(meta.fetch("license", nil), content: "id", first: true), "name" => parse_attributes(meta.fetch("license", nil), content: "name", first: true) } + funding = from_schema_org(Array.wrap(meta.fetch("funding", nil))) date_published = meta.fetch("datePublished", nil) state = meta.present? ? "findable" : "not_found" + + ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical" + container_title = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true) { "id" => id, "type" => type, "additional_type" => meta.fetch("additionalType", nil), "citeproc_type" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[type] || "article-journal", "bibtex_type" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[type] || "misc", "ris_type" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN", "resource_type_general" => resource_type_general, "doi" => validate_doi(id), - "identifier" => meta.fetch("identifier", nil), + "identifier" => identifier, + "alternate_identifier" => alternate_identifier, "b_url" => normalize_id(meta.fetch("url", nil)), + "content_url" => Array.wrap(meta.fetch("contentUrl", nil)).unwrap, "title" => meta.fetch("name", nil), "author" => author, "editor" => editor, "publisher" => publisher, - "service_provider" => meta.fetch("provider", nil), + "service_provider" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true), + "container_title" => container_title, "is_identical_to" => schema_org_is_identical_to(meta), "is_part_of" => is_part_of, "has_part" => schema_org_has_part(meta), "references" => schema_org_references(meta), "is_referenced_by" => schema_org_is_referenced_by(meta), @@ -86,10 +110,12 @@ "date_modified" => meta.fetch("dateModified", nil), "description" => meta.fetch("description", nil).present? ? { "text" => sanitize(meta.fetch("description")) } : nil, "license" => license, "b_version" => meta.fetch("version", nil), "keywords" => meta.fetch("keywords", nil).to_s.split(", "), - "state" => state + "state" => state, + "schema_version" => meta.fetch("schemaVersion", nil), + "funding" => funding } end def schema_org_related_identifier(meta, relation_type: nil) normalize_ids(ids: meta.fetch(relation_type, nil))