# frozen_string_literal: true
require 'spec_helper'
describe Briard::Metadata, vcr: true do
subject { described_class.new(input: input, from: 'crossref') }
let(:input) { 'https://doi.org/10.1101/097196' }
context 'validate url' do
it 'DOI' do
str = 'https://doi.org/10.5438/0000-00ss'
response = subject.validate_url(str)
expect(response).to eq('DOI')
end
it 'URL' do
str = 'https://blog.datacite.org/eating-your-own-dog-food'
response = subject.validate_url(str)
expect(response).to eq('URL')
end
it 'ISSN' do
str = 'ISSN 2050-084X'
response = subject.validate_url(str)
expect(response).to eq('ISSN')
end
it 'string' do
str = 'eating-your-own-dog-food'
response = subject.validate_url(str)
expect(response.nil?).to be(true)
end
end
context 'validate_orcid' do
it 'validate_orcid' do
orcid = 'http://orcid.org/0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid https' do
orcid = 'https://orcid.org/0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid id' do
orcid = '0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid www' do
orcid = 'http://www.orcid.org/0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid with spaces' do
orcid = '0000 0002 1394 3097'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-1394-3097')
end
it 'validate_orcid sandbox' do
orcid = 'http://sandbox.orcid.org/0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid sandbox https' do
orcid = 'https://sandbox.orcid.org/0000-0002-2590-225X'
response = subject.validate_orcid(orcid)
expect(response).to eq('0000-0002-2590-225X')
end
it 'validate_orcid wrong id' do
orcid = '0000-0002-1394-309'
response = subject.validate_orcid(orcid)
expect(response.nil?).to be(true)
end
end
context 'validate_orcid_scheme' do
it 'validate_orcid_scheme' do
orcid = 'http://orcid.org'
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq('orcid.org')
end
it 'validate_orcid_scheme trailing slash' do
orcid = 'http://orcid.org/'
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq('orcid.org')
end
it 'validate_orcid_scheme https' do
orcid = 'https://orcid.org'
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq('orcid.org')
end
it 'validate_orcid_scheme www' do
orcid = 'http://www.orcid.org'
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq('orcid.org')
end
end
context 'parse attributes' do
it 'string' do
element = '10.5061/DRYAD.8515'
response = subject.parse_attributes(element)
expect(response).to eq('10.5061/DRYAD.8515')
end
it 'hash' do
element = { '__content__' => '10.5061/DRYAD.8515' }
response = subject.parse_attributes(element)
expect(response).to eq('10.5061/DRYAD.8515')
end
it 'array' do
element = [{ '__content__' => '10.5061/DRYAD.8515' }]
response = subject.parse_attributes(element)
expect(response).to eq('10.5061/DRYAD.8515')
end
it 'array of strings' do
element = %w[datacite doi metadata featured]
response = subject.parse_attributes(element)
expect(response).to eq(%w[datacite doi metadata featured])
end
it 'nil' do
element = nil
response = subject.parse_attributes(element)
expect(response.nil?).to be(true)
end
it 'first' do
element = [{ '__content__' => '10.5061/DRYAD.8515/1' },
{ '__content__' => '10.5061/DRYAD.8515/2' }]
response = subject.parse_attributes(element, first: true)
expect(response).to eq('10.5061/DRYAD.8515/1')
end
end
context 'normalize id' do
it 'doi' do
doi = '10.5061/DRYAD.8515'
response = subject.normalize_id(doi)
expect(response).to eq('https://doi.org/10.5061/dryad.8515')
end
it 'doi as url' do
doi = 'http://dx.doi.org/10.5061/DRYAD.8515'
response = subject.normalize_id(doi)
expect(response).to eq('https://doi.org/10.5061/dryad.8515')
end
it 'url' do
url = 'https://blog.datacite.org/eating-your-own-dog-food/'
response = subject.normalize_id(url)
expect(response).to eq('https://blog.datacite.org/eating-your-own-dog-food')
end
it 'url with utf-8' do
url = 'http://www.詹姆斯.com/eating-your-own-dog-food/'
response = subject.normalize_id(url)
expect(response).to eq('http://www.xn--8ws00zhy3a.com/eating-your-own-dog-food')
end
it 'ftp' do
url = 'ftp://blog.datacite.org/eating-your-own-dog-food/'
response = subject.normalize_id(url)
expect(response.nil?).to be(true)
end
it 'invalid url' do
url = 'http://'
response = subject.normalize_id(url)
expect(response.nil?).to be(true)
end
it 'string' do
url = 'eating-your-own-dog-food'
response = subject.normalize_id(url)
expect(response.nil?).to be(true)
end
it 'filename' do
url = 'crossref.xml'
response = subject.normalize_id(url)
expect(response.nil?).to be(true)
end
it 'sandbox via url' do
url = 'https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7'
response = subject.normalize_id(url)
expect(response).to eq('https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7')
end
it 'sandbox via options' do
url = '10.20375/0000-0001-ddb8-7'
response = subject.normalize_id(url, sandbox: true)
expect(response).to eq('https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7')
end
end
context 'normalize ids' do
it 'doi' do
ids = [{ '@type' => 'CreativeWork', '@id' => 'https://doi.org/10.5438/0012' },
{ '@type' => 'CreativeWork', '@id' => 'https://doi.org/10.5438/55E5-T5C0' }]
response = subject.normalize_ids(ids: ids)
expect(response).to eq([{ 'relatedIdentifier' => '10.5438/0012',
'relatedIdentifierType' => 'DOI',
'resourceTypeGeneral' => 'Text' },
{ 'relatedIdentifier' => '10.5438/55e5-t5c0',
'relatedIdentifierType' => 'DOI',
'resourceTypeGeneral' => 'Text' }])
end
it 'url' do
ids = [{ '@type' => 'CreativeWork',
'@id' => 'https://blog.datacite.org/eating-your-own-dog-food/' }]
response = subject.normalize_ids(ids: ids)
expect(response).to eq(
'relatedIdentifier' => 'https://blog.datacite.org/eating-your-own-dog-food', 'relatedIdentifierType' => 'URL', 'resourceTypeGeneral' => 'Text'
)
end
end
context 'normalize url' do
it 'with trailing slash' do
url = 'http://creativecommons.org/publicdomain/zero/1.0/'
response = subject.normalize_url(url)
expect(response).to eq('http://creativecommons.org/publicdomain/zero/1.0')
end
it 'with trailing slash and to https' do
url = 'http://creativecommons.org/publicdomain/zero/1.0/'
response = subject.normalize_url(url, https: true)
expect(response).to eq('https://creativecommons.org/publicdomain/zero/1.0')
end
it 'uri' do
url = 'info:eu-repo/semantics/openAccess'
response = subject.normalize_url(url)
expect(response).to eq('info:eu-repo/semantics/openAccess')
end
end
context 'normalize cc url' do
it 'with trailing slash' do
url = 'http://creativecommons.org/publicdomain/zero/1.0/'
response = subject.normalize_cc_url(url)
expect(response).to eq('https://creativecommons.org/publicdomain/zero/1.0/legalcode')
end
it 'with trailing slash and to https' do
url = 'http://creativecommons.org/publicdomain/zero/1.0/'
response = subject.normalize_cc_url(url)
expect(response).to eq('https://creativecommons.org/publicdomain/zero/1.0/legalcode')
end
it 'not found' do
url = 'http://creativecommons.org/publicdomain/zero/2.0/'
response = subject.normalize_cc_url(url)
expect(response).to eq('https://creativecommons.org/publicdomain/zero/2.0')
end
end
context 'normalize issn' do
it 'from array' do
input = [{ 'media_type' => 'print', '__content__' => '13040855' },
{ 'media_type' => 'electronic', '__content__' => '21468427' }]
response = subject.normalize_issn(input)
expect(response).to eq('2146-8427')
end
it 'from empty array' do
input = []
response = subject.normalize_issn(input)
expect(response.nil?).to be(true)
end
it 'from hash' do
input = { 'media_type' => 'electronic', '__content__' => '21468427' }
response = subject.normalize_issn(input)
expect(response).to eq('2146-8427')
end
it 'from string' do
input = '2146-8427'
response = subject.normalize_issn(input)
expect(response).to eq('2146-8427')
end
end
context 'to_schema_org' do
it 'with id' do
author = { 'type' => 'Person', 'id' => 'http://orcid.org/0000-0003-1419-2405', 'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner' }
response = subject.to_schema_org(author)
expect(response).to eq('givenName' => 'Martin', 'familyName' => 'Fenner',
'name' => 'Martin Fenner', '@type' => 'Person', '@id' => 'http://orcid.org/0000-0003-1419-2405')
end
end
context 'from_schema_org' do
it 'with @id' do
author = { '@type' => 'Person', '@id' => 'http://orcid.org/0000-0003-1419-2405', 'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner' }
response = subject.from_schema_org(author)
expect(response).to eq('givenName' => 'Martin', 'familyName' => 'Fenner',
'name' => 'Martin Fenner', 'type' => 'Person', 'id' => 'http://orcid.org/0000-0003-1419-2405')
end
end
context 'from_schema_org_creators' do
it 'with affiliation' do
authors = [{ '@type' => 'Person', '@id' => 'http://orcid.org/0000-0003-1419-2405', 'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner', 'affiliation' => {
'@id' => 'https://ror.org/04wxnsj81',
'name' => 'DataCite',
'@type' => 'Organization'
} }]
response = subject.from_schema_org_creators(authors)
expect(response).to eq([{ 'affiliation' =>
{ 'affiliationIdentifier' => 'https://ror.org/04wxnsj81',
'affiliationIdentifierScheme' => 'ROR',
'__content__' => 'DataCite',
'schemeUri' => 'https://ror.org/' },
'creatorName' => { '__content__' => 'Martin Fenner',
'nameType' => 'Personal' },
'familyName' => 'Fenner',
'givenName' => 'Martin',
'nameIdentifier' =>
[{ '__content__' => 'http://orcid.org/0000-0003-1419-2405',
'nameIdentifierScheme' => 'ORCID',
'schemeUri' => 'https://orcid.org' }] }])
end
it 'without affiliation' do
authors = [{ '@type' => 'Person', '@id' => 'http://orcid.org/0000-0003-1419-2405',
'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner' }]
response = subject.from_schema_org_creators(authors)
expect(response).to eq([{ 'creatorName' => { '__content__' => 'Martin Fenner', 'nameType' => 'Personal' },
'familyName' => 'Fenner',
'givenName' => 'Martin',
'nameIdentifier' =>
[{ '__content__' => 'http://orcid.org/0000-0003-1419-2405',
'nameIdentifierScheme' => 'ORCID',
'schemeUri' => 'https://orcid.org' }] }])
end
end
context 'to_schema_org_identifiers' do
it 'with identifiers' do
identifiers = [
{ 'identifier' => 'https://doi.org/10.23725/8na3-9s47',
'identifierType' => 'DOI' }, { 'identifierType' => 'md5', 'identifier' => '3b33f6b9338fccab0901b7d317577ea3' }, { 'identifierType' => 'minid', 'identifier' => 'ark:/99999/fk41CrU4eszeLUDe' }, { 'identifierType' => 'dataguid', 'identifier' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7' }
]
response = subject.to_schema_org_identifiers(identifiers, type: 'Dataset')
expect(response).to eq([{ '@type' => 'PropertyValue', 'propertyID' => 'DOI', 'value' => 'https://doi.org/10.23725/8na3-9s47' },
{ '@type' => 'PropertyValue',
'propertyID' => 'md5',
'value' => '3b33f6b9338fccab0901b7d317577ea3' },
{ '@type' => 'PropertyValue',
'propertyID' => 'minid',
'value' => 'ark:/99999/fk41CrU4eszeLUDe' },
{ '@type' => 'PropertyValue',
'propertyID' => 'dataguid',
'value' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7' }])
end
end
context 'sanitize' do
it 'removes a tags' do
text = 'In 1998 Tim Berners-Lee coined the term cool URIs'
content = subject.sanitize(text)
expect(content).to eq('In 1998 Tim Berners-Lee coined the term cool URIs')
end
it 'onlies keep specific tags' do
text = 'In 1998 Tim Berners-Lee coined the term cool URIs'
content = subject.sanitize(text, tags: ['a'])
expect(content).to eq('In 1998 Tim Berners-Lee coined the term cool URIs')
end
end
context 'get_datetime_from_time' do
it 'present' do
time = '20200226071709'
response = subject.get_datetime_from_time(time)
expect(response).to eq('2020-02-26T07:17:09Z')
end
it 'past' do
time = '18770312071709'
response = subject.get_datetime_from_time(time)
expect(response).to eq('1877-03-12T07:17:09Z')
end
it 'future' do
time = '20970114071709'
response = subject.get_datetime_from_time(time)
expect(response).to eq('2097-01-14T07:17:09Z')
end
it 'invalid' do
time = '20201587168864794'
response = subject.get_datetime_from_time(time)
expect(response.nil?).to be(true)
end
it 'nil' do
time = nil
response = subject.get_datetime_from_time(time)
expect(response.nil?).to be(true)
end
end
context 'get_date_parts' do
it 'date' do
date = '2016-12-20'
response = subject.get_date_parts(date)
expect(response).to eq('date-parts' => [[2016, 12, 20]])
end
it 'year-month' do
date = '2016-12'
response = subject.get_date_parts(date)
expect(response).to eq('date-parts' => [[2016, 12]])
end
it 'year' do
date = '2016'
response = subject.get_date_parts(date)
expect(response).to eq('date-parts' => [[2016]])
end
end
context 'get_date_from_parts' do
it 'date' do
response = subject.get_date_from_parts(2016, 12, 20)
expect(response).to eq('2016-12-20')
end
it 'year-month' do
response = subject.get_date_from_parts(2016, 12)
expect(response).to eq('2016-12')
end
it 'year' do
response = subject.get_date_from_parts(2016)
expect(response).to eq('2016')
end
end
context 'get_date_from_date_parts' do
it 'date' do
date_as_parts = { 'date-parts' => [[2016, 12, 20]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq('2016-12-20')
end
it 'year-month' do
date_as_parts = { 'date-parts' => [[2016, 12]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq('2016-12')
end
it 'year' do
date_as_parts = { 'date-parts' => [[2016]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq('2016')
end
end
context 'get_date' do
it 'publication date' do
dates = [{ 'date' => '2016-12-20', 'dateType' => 'Issued' }]
response = subject.get_date(dates, 'Issued')
expect(response).to eq('2016-12-20')
end
end
context 'get_series_information' do
it 'only title' do
str = nil
response = subject.get_series_information(str)
expect(response).to eq({})
end
it 'only title' do
str = 'DataCite Blog'
response = subject.get_series_information(str)
expect(response).to eq('title' => 'DataCite Blog')
end
it 'title and pages' do
str = 'DataCite Blog, 1-3'
response = subject.get_series_information(str)
expect(response).to eq('firstPage' => '1', 'lastPage' => '3', 'title' => 'DataCite Blog')
end
it 'title, volume and pages' do
str = 'DataCite Blog, 7, 1-3'
response = subject.get_series_information(str)
expect(response).to eq('firstPage' => '1', 'lastPage' => '3', 'title' => 'DataCite Blog',
'volume' => '7')
end
it 'title, volume, issue and pages' do
str = 'DataCite Blog, 7(11), 1-3'
response = subject.get_series_information(str)
expect(response).to eq('firstPage' => '1', 'issue' => '11', 'lastPage' => '3',
'title' => 'DataCite Blog', 'volume' => '7')
end
end
context 'github' do
it 'github_from_url' do
url = 'https://github.com/datacite/bolognese'
response = subject.github_from_url(url)
expect(response).to eq(owner: 'datacite', repo: 'bolognese')
end
it 'github_from_url file' do
url = 'https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json'
response = subject.github_from_url(url)
expect(response).to eq(owner: 'datacite', repo: 'metadata-reports', release: 'master',
path: 'software/codemeta.json')
end
it 'github_from_url cff file' do
url = 'https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff'
response = subject.github_from_url(url)
expect(response).to eq(owner: 'citation-file-format', path: 'CITATION.cff',
release: 'main', repo: 'ruby-cff')
end
it 'github_as_codemeta_url' do
url = 'https://github.com/datacite/bolognese'
response = subject.github_as_codemeta_url(url)
expect(response).to eq('https://raw.githubusercontent.com/datacite/bolognese/master/codemeta.json')
end
it 'github_as_cff_url' do
url = 'https://github.com/citation-file-format/ruby-cff'
response = subject.github_as_cff_url(url)
expect(response).to eq('https://raw.githubusercontent.com/citation-file-format/ruby-cff/main/CITATION.cff')
end
it 'github_from_url file' do
url = 'https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json'
response = subject.github_as_codemeta_url(url)
expect(response).to eq('https://raw.githubusercontent.com/datacite/metadata-reports/master/software/codemeta.json')
end
end
context 'spdx' do
it 'name_to_spdx exists' do
name = 'Creative Commons Attribution 4.0 International'
response = subject.name_to_spdx(name)
expect(response).to eq({ 'rights' => 'Creative Commons Attribution 4.0 International',
'rightsUri' => 'https://creativecommons.org/licenses/by/4.0/legalcode', 'rightsIdentifier' => 'cc-by-4.0', 'rightsIdentifierScheme' => 'SPDX', 'schemeUri' => 'https://spdx.org/licenses/' })
end
it 'name_to_spdx id' do
name = 'CC-BY-4.0'
response = subject.name_to_spdx(name)
expect(response).to eq({ 'rights' => 'Creative Commons Attribution 4.0 International',
'rightsUri' => 'https://creativecommons.org/licenses/by/4.0/legalcode', 'rightsIdentifier' => 'cc-by-4.0', 'rightsIdentifierScheme' => 'SPDX', 'schemeUri' => 'https://spdx.org/licenses/' })
end
it 'hsh_to_spdx id' do
hsh = { 'rightsIdentifier' => 'cc-by-4.0' }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq({ 'rights' => 'Creative Commons Attribution 4.0 International',
'rightsUri' => 'https://creativecommons.org/licenses/by/4.0/legalcode', 'rightsIdentifier' => 'cc-by-4.0', 'rightsIdentifierScheme' => 'SPDX', 'schemeUri' => 'https://spdx.org/licenses/' })
end
it 'hsh_to_spdx url' do
hsh = { 'rightsURI' => 'http://creativecommons.org/licenses/by-nc/4.0/legalcode' }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq(
'rights' => 'Creative Commons Attribution Non Commercial 4.0 International', 'rightsUri' => 'https://creativecommons.org/licenses/by-nc/4.0/legalcode', 'rightsIdentifier' => 'cc-by-nc-4.0', 'rightsIdentifierScheme' => 'SPDX', 'schemeUri' => 'https://spdx.org/licenses/'
)
end
it 'hsh_to_spdx not found' do
hsh = { 'rightsURI' => 'info:eu-repo/semantics/openAccess' }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq({ 'rightsUri' => 'info:eu-repo/semantics/openAccess' })
end
end
context 'fos' do
it 'name_to_fos match' do
name = 'Biological sciences'
response = subject.name_to_fos(name)
expect(response).to eq([{ 'subject' => 'biological sciences' },
{ 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf',
'subject' => 'FOS: Biological sciences',
'subjectScheme' => 'Fields of Science and Technology (FOS)' }])
end
it 'name_to_fos for match' do
name = 'Statistics'
response = subject.name_to_fos(name)
expect(response).to eq([{ 'subject' => 'statistics' },
{ 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf',
'subject' => 'FOS: Mathematics',
'subjectScheme' => 'Fields of Science and Technology (FOS)' }])
end
it 'name_to_fos no match' do
name = 'Random tag'
response = subject.name_to_fos(name)
expect(response).to eq([{ 'subject' => 'random tag' }])
end
it 'hsh_to_fos match' do
hsh = { '__content__' => 'Biological sciences' }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{ 'subject' => 'Biological sciences' },
{ 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf',
'subject' => 'FOS: Biological sciences',
'subjectScheme' => 'Fields of Science and Technology (FOS)' }])
end
it 'hsh_to_fos for match' do
hsh = { '__content__' => 'Statistics' }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{ 'subject' => 'Statistics' },
{ 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf',
'subject' => 'FOS: Mathematics',
'subjectScheme' => 'Fields of Science and Technology (FOS)' }])
end
it 'hsh_to_fos for with schemeUri in hash' do
hsh = {
'subject' => 'FOS: Computer and information sciences',
'subjectScheme' => 'Fields of Science and Technology (FOS)',
'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
}
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{
'subject' => 'FOS: Computer and information sciences',
'subjectScheme' => 'Fields of Science and Technology (FOS)',
'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
}])
end
it 'hsh_to_fos no match' do
hsh = { '__content__' => 'Random tag' }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{ 'subject' => 'Random tag' }])
end
end
context 'random doi' do
it 'encode doi' do
prefix = '10.53731'
response = subject.encode_doi(prefix)
expect(response).to match(%r{#{prefix}/[-._;()/:A-Za-z0-9]+})
expect(response.length).to eq(40)
end
it 'decode doi' do
doi = 'https://doi.org/10.53731/revzwnv-rpd913d-8drwz'
response = subject.decode_doi(doi)
expect(response).to eq(30_286_005_717_401_267_192_153_432_991)
end
it 'decode anothe doi' do
doi = 'https://doi.org/10.53731/rckvde5-tzg61kj-7zvc1'
response = subject.decode_doi(doi)
expect(response).to eq(30_198_793_950_250_854_133_601_922_433)
end
end
end