lib/commonmeta/author_utils.rb in commonmeta-ruby-3.4.5 vs lib/commonmeta/author_utils.rb in commonmeta-ruby-3.5

- old
+ new

@@ -1,79 +1,108 @@ # frozen_string_literal: true -require 'namae' +require "namae" module Commonmeta module AuthorUtils + # mapping of DataCite contributorType to commonmeta contributorRoles + def datacite_contributor_roles = { + "ContactPerson" => "ContactPerson", + "DataCurator" => "DataCuration", + "DataManager" => "Other", + "Distributor" => "Other", + "Editor" => "Editor", + "HostingInstitution" => "Other", + "Other" => "Other", + "Producer" => "Other", + "ProjectLeader" => "Other", + "ProjectManager" => "Other", + "ProjectMember" => "Other", + "RegistrationAgency" => "Other", + "RegistrationAuthority" => "Other", + "RelatedPerson" => "Other", + "ResearchGroup" => "Other", + "RightsHolder" => "Other", + "Researcher" => "Other", + "Sponsor" => "Other", + "Supervisor" => "Supervision", + "WorkPackageLeader" => "Other" + } + def get_one_author(author) # basic sanity checks return nil if author.blank? # author is a string - author = { 'name' => author } if author.is_a?(String) + author = { "name" => author } if author.is_a?(String) # malformed XML - return nil if author.fetch('name', nil).is_a?(Array) + return nil if author.fetch("name", nil).is_a?(Array) # parse author name attributes - name = parse_attributes(author.fetch('name', nil)) || - parse_attributes(author.fetch('creatorName', nil)) || - parse_attributes(author.fetch('contributorName', nil)) + name = parse_attributes(author.fetch("name", nil)) || + parse_attributes(author.fetch("creatorName", nil)) || + parse_attributes(author.fetch("contributorName", nil)) - given_name = parse_attributes(author.fetch('givenName', nil)) || - parse_attributes(author.fetch('given', nil)) - family_name = parse_attributes(author.fetch('familyName', nil)) || - parse_attributes(author.fetch('family', nil)) + given_name = parse_attributes(author.fetch("givenName", nil)) || + parse_attributes(author.fetch("given", nil)) + family_name = parse_attributes(author.fetch("familyName", nil)) || + parse_attributes(author.fetch("family", nil)) name = cleanup_author(name) # parse author identifier - id = parse_attributes(author.fetch('id', nil), first: true) || - parse_attributes(author.fetch('identifier', nil), first: true) || - parse_attributes(author.fetch('sameAs', nil), first: true) + id = parse_attributes(author.fetch("id", nil), first: true) || + parse_attributes(author.fetch("identifier", nil), first: true) || + parse_attributes(author.fetch("sameAs", nil), first: true) # DataCite metadata - if id.nil? && author['nameIdentifiers'].present? - id = Array.wrap(author.dig('nameIdentifiers')).find do |ni| - ni['nameIdentifierScheme'] == 'ORCID' + if id.nil? && author["nameIdentifiers"].present? + id = Array.wrap(author.dig("nameIdentifiers")).find do |ni| + ni["nameIdentifierScheme"] == "ORCID" end - id = id['nameIdentifier'] if id.present? - # Crossref metadata - elsif id.nil? && author['ORCID'].present? - id = author.fetch('ORCID') + id = id["nameIdentifier"] if id.present? + # Crossref metadata + elsif id.nil? && author["ORCID"].present? + id = author.fetch("ORCID") end id = normalize_orcid(id) || normalize_ror(id) # parse author type, i.e. "Person", "Organization" or not specified - type = author.fetch('type', nil) + type = author.fetch("type", nil) type = type.first if type.is_a?(Array) # DataCite metadata - type = type[0..-3] if type.is_a?(String) && type.end_with?('al') + type = type[0..-3] if type.is_a?(String) && type.end_with?("al") - if type.blank? && name.blank? && id.is_a?(String) && URI.parse(id).host == 'ror.org' - type = 'Person' - author['affiliation'] = { 'affiliationIdentifier' => id } + if type.blank? && name.blank? && id.is_a?(String) && URI.parse(id).host == "ror.org" + type = "Person" + author["affiliation"] = { "affiliationIdentifier" => id } id = nil - elsif type.blank? && id.is_a?(String) && URI.parse(id).host == 'ror.org' - type = 'Organization' - elsif type.blank? && author['type'] == 'Organization' - type = 'Organization' - elsif type.blank? && id.is_a?(String) && URI.parse(id).host == 'orcid.org' - type = 'Person' + elsif type.blank? && id.is_a?(String) && URI.parse(id).host == "ror.org" + type = "Organization" + elsif type.blank? && author["type"] == "Organization" + type = "Organization" + elsif type.blank? && id.is_a?(String) && URI.parse(id).host == "orcid.org" + type = "Person" elsif type.blank? && (given_name.present? || family_name.present?) - type = 'Person' - elsif type.blank? && is_personal_name?(name: name) && name.to_s.exclude?(';') - type = 'Person' + type = "Person" + elsif type.blank? && is_personal_name?(name: name) && name.to_s.exclude?(";") + type = "Person" elsif type.blank? && name.present? && !is_personal_name?(name: name) - type = 'Organization' + type = "Organization" end # parse author contributor role - contributor_type = parse_attributes(author.fetch('contributorType', nil)) + contributor_roles = parse_attributes(author.fetch("contributorType", nil)) + if contributor_roles + contributor_roles = [datacite_contributor_roles[contributor_roles]] + else + contributor_roles = ["Author"] + end # split name for type Person into given/family name if not already provided - if type == 'Person' && name.present? && given_name.blank? && family_name.blank? + if type == "Person" && name.present? && given_name.blank? && family_name.blank? Namae.options[:include_particle_in_family] = true names = Namae.parse(name) parsed_name = names.first if parsed_name.present? @@ -85,65 +114,68 @@ end end # return author in commonmeta format, using name vs. given/family name # depending on type - { 'id' => id, - 'type' => type, - 'name' => type == 'Person' ? nil : name, - 'givenName' => type == 'Organization' ? nil : given_name, - 'familyName' => type == 'Organization' ? nil : family_name, - 'affiliation' => get_affiliations(author.fetch('affiliation', nil)), - 'contributorType' => contributor_type }.compact + { "id" => id, + "type" => type, + "name" => type == "Person" ? nil : name, + "contributorRoles" => contributor_roles, + "givenName" => type == "Organization" ? nil : given_name, + "familyName" => type == "Organization" ? nil : family_name, + "affiliation" => get_affiliations(author.fetch("affiliation", nil)) }.compact end def cleanup_author(author) return nil unless author.present? # detect pattern "Smith J.", but not "Smith, John K." - unless author.include?(',') + unless author.include?(",") author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') end # strip suffixes, e.g. "John Smith, MD" as the named parser doesn't handle them - author = author.split(',').first if %w[MD PhD].include? author.split(', ').last + author = author.split(",").first if %w[MD PhD].include? author.split(", ").last # remove email addresses email = validate_email(author) - author = author.gsub(email, '') if email.present? + author = author.gsub(email, "") if email.present? # strip spaces at the beginning and end of string author = author.strip # remove parentheses around names - author = author[1..-2] if author[0] == '(' && author[-1] == ')' + author = author[1..-2] if author[0] == "(" && author[-1] == ")" # remove spaces around hyphens - author = author.gsub(' - ', '-') + author = author.gsub(" - ", "-") # remove non-standard space characters - author.gsub(/[[:space:]]/, ' ') + author.gsub(/[[:space:]]/, " ") end # check if given name is in the database of known given names: # https://github.com/bmuller/gender_detector def is_personal_name?(name: nil) - return true if name_exists?(name.to_s.split.first) || name_exists?(name.to_s.split(', ').last) + # personal names are not allowed to contain semicolons + return false if name.to_s.include?(";") + return true if name_exists?(name.to_s.split.first) || name_exists?(name.to_s.split(", ").last) + # check if a name has only one word, e.g. "FamousOrganization", not including commas - return false if name.to_s.split(' ').size == 1 && name.to_s.exclude?(',') + return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",") # check for suffixes, e.g. "John Smith, MD" - return true if %w[MD PhD].include? name.split(', ').last + return true if %w[MD PhD].include? name.split(", ").last # check of name can be parsed into given/family name Namae.options[:include_particle_in_family] = true names = Namae.parse(name) parsed_name = names.first return true if parsed_name && parsed_name.given - + false end # recognize given name if we have loaded ::NameDetector data, e.g. in a Rails initializer def name_exists?(name) @@ -157,48 +189,48 @@ Array.wrap(authors).map { |author| get_one_author(author) }.compact end def authors_as_string(authors) Array.wrap(authors).map do |a| - if a['familyName'].present? - [a['familyName'], a['givenName']].join(', ') - elsif a['type'] == 'Person' - a['name'] - elsif a['name'].present? - "{#{a['name']}}" + if a["familyName"].present? + [a["familyName"], a["givenName"]].join(", ") + elsif a["type"] == "Person" + a["name"] + elsif a["name"].present? + "{#{a["name"]}}" end - end.join(' and ').presence + end.join(" and ").presence end def get_affiliations(affiliations) return nil unless affiliations.present? Array.wrap(affiliations).map do |a| affiliation_identifier = nil if a.is_a?(String) name = a.squish elsif a.is_a?(Hash) - if a['affiliationIdentifier'].present? - affiliation_identifier = a['affiliationIdentifier'] - if a['schemeURI'].present? - schemeURI = a['schemeURI'].end_with?('/') ? a['schemeURI'] : "#{a['schemeURI']}/" + if a["affiliationIdentifier"].present? + affiliation_identifier = a["affiliationIdentifier"] + if a["schemeURI"].present? + schemeURI = a["schemeURI"].end_with?("/") ? a["schemeURI"] : "#{a["schemeURI"]}/" end - affiliation_identifier = !affiliation_identifier.to_s.start_with?('https://') && schemeURI.present? ? normalize_id(schemeURI + affiliation_identifier) : normalize_id(affiliation_identifier) + affiliation_identifier = !affiliation_identifier.to_s.start_with?("https://") && schemeURI.present? ? normalize_id(schemeURI + affiliation_identifier) : normalize_id(affiliation_identifier) end - name = (a['name'] || a['__content__']).to_s.squish.presence + name = (a["name"] || a["__content__"]).to_s.squish.presence end - { 'id' => affiliation_identifier, 'name' => name }.compact.presence + { "id" => affiliation_identifier, "name" => name }.compact.presence end.compact.presence end def author_name_identifiers(id) return nil unless id.present? Array.wrap(id).map do |i| - { 'nameIdentifier' => i, - 'nameIdentifierScheme' => 'ORCID', - 'schemeUri' => 'https://orcid.org' }.compact + { "nameIdentifier" => i, + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org" }.compact end.compact.presence end end end