# frozen_string_literal: true module Commonmeta module Readers module CrossrefReader def get_crossref(id: nil, **options) return { 'string' => nil, 'state' => 'not_found' } unless id.present? api_url = crossref_api_url(id, options) response = HTTP.get(api_url) return { 'string' => nil, 'state' => 'not_found' } unless response.status.success? { 'string' => response.body.to_s } end def read_crossref(string: nil, **options) if string.present? errors = jsonlint(string) return { 'errors' => errors } if errors.present? end read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra)) meta = string.present? ? JSON.parse(string) : {} # optionally strip out the message wrapper from API meta = meta.dig('message') if meta.dig('message').present? resource_type = meta.fetch('type', nil) resource_type = resource_type.present? ? resource_type.underscore.camelcase : nil type = Commonmeta::Utils::CR_TO_CM_TRANSLATIONS.fetch(resource_type, 'Other') member_id = meta.fetch('member', nil) # TODO: get publisher from member_id almost always return publisher name, but sometimes does not publisher = if member_id.present? get_crossref_member(member_id) else meta.fetch('publisher', nil) end creators = if meta.fetch('author', nil).present? get_authors(from_csl(Array.wrap(meta.fetch('author', nil)))) else [] end editors = Array.wrap(meta.fetch('editor', nil)).each { |e| e['contributorType'] = 'Editor' } contributors = get_authors(from_csl(editors)) date = {} date['submitted'] = nil date['accepted'] = meta.dig('accepted', 'date-time') date['published'] = meta.dig('issued', 'date-time') || get_date_from_date_parts(meta.fetch('issued', nil)) || get_date_from_date_parts(meta.fetch( 'created', nil )) date['updated'] = meta.dig('updated', 'date-time') || meta.dig('deposited', 'date-time') || get_date_from_date_parts(meta.fetch( 'deposited', nil )) # TODO: fix timestamp. Until then, remove time as this is not always stable with Crossref (different server timezones) date['published'] = get_iso8601_date(date['published']) if date['published'].present? date['updated'] = get_iso8601_date(date['updated']) if date['updated'].present? license = if meta.fetch('license', nil) hsh_to_spdx('rightsURI' => meta.dig('license', 0, 'URL')) end issn = Array.wrap(meta.fetch('issn-type', nil)).find { |i| i['type'] == 'electronic' } || Array.wrap(meta.fetch('issn-type', nil)).find { |i| i['type'] == 'print' } || {} issn = issn.fetch('value', nil) if issn.present? references = Array.wrap(meta.fetch('reference', nil)).map { |r| get_reference(r) } funding_references = Array.wrap(meta.fetch('funder', nil)).reduce([]) do |sum, funding| funding_reference = { 'funderName' => funding['name'], 'funderIdentifier' => funding['DOI'] ? doi_as_url(funding['DOI']) : nil, 'funderIdentifierType' => funding['DOI'].to_s.starts_with?('10.13039') ? 'Crossref Funder ID' : nil }.compact if funding['name'].present? && funding['award'].present? Array.wrap(funding['award']).each do |award| funding_reference['awardNumber'] = award end end sum += [funding_reference] if funding_reference.present? sum end container_type = case resource_type when 'JournalArticle', 'JournalIssue' 'Journal' when 'BookChapter' 'Book' when 'Monograph' 'BookSeries' end first_page = if meta.fetch('page', nil).present? meta.fetch('page').split('-').map(&:strip)[0] end last_page = if meta.fetch('page', nil).present? meta.fetch('page').split('-').map(&:strip)[1] end container = { 'type' => container_type, 'title' => parse_attributes(meta.fetch('container-title', nil), first: true).to_s.squish.presence, 'identifier' => issn.present? ? issn : nil, 'identifierType' => issn.present? ? 'ISSN' : nil, 'volume' => meta.fetch('volume', nil), 'issue' => meta.fetch('issue', nil), 'firstPage' => first_page, 'lastPage' => last_page }.compact id = normalize_id(meta.fetch('id', nil) || meta.fetch('DOI', nil)) id = normalize_doi(options[:doi] || options[:id] || meta.fetch('DOI', nil)) title = if meta.fetch('title', nil).is_a?(Array) meta.fetch('title', nil)[0] else meta.fetch('title', nil) end title = title.blank? ? ':(unav)' : title.squish state = meta.present? || read_options.present? ? 'findable' : 'not_found' subjects = Array.wrap(meta.fetch('categories', nil)).reduce([]) do |sum, subject| sum += name_to_fos(subject) sum end abstract = meta.fetch('abstract', nil) provider = get_doi_ra(id) { 'id' => id, 'type' => type, 'url' => normalize_id(meta.dig('resource', 'primary', 'URL')), 'titles' => [{ 'title' => title }], 'creators' => creators, 'contributors' => contributors, 'container' => container, 'publisher' => publisher, 'references' => references, 'date' => date.compact, 'descriptions' => if abstract.present? [{ 'description' => sanitize(abstract), 'descriptionType' => 'Abstract' }] else [] end, 'license' => license, 'alternate_identifiers' => [], 'funding_references' => funding_references, 'version' => meta.fetch('version', nil), 'subjects' => subjects, 'provider' => provider, 'schema_version' => 'http://datacite.org/schema/kernel-4', 'state' => state }.compact.merge(read_options) end def get_reference(reference) return nil unless reference.present? || !reference.is_a?(Hash) doi = reference.dig('DOI') { 'key' => reference.dig('key'), 'doi' => doi ? normalize_doi(doi) : nil, 'creator' => reference.dig('author'), 'title' => reference.dig('article-title'), 'publisher' => reference.dig('publisher'), 'publicationYear' => reference.dig('year'), 'volume' => reference.dig('volume'), 'issue' => reference.dig('issue'), 'firstPage' => reference.dig('first-page'), 'lastPage' => reference.dig('last-page'), 'containerTitle' => reference.dig('journal-title'), 'edition' => nil, 'contributor' => nil, 'unstructured' => doi.nil? ? reference.dig('unstructured') : nil }.compact end end end end