# frozen_string_literal: true
require 'spec_helper'
describe Briard::Metadata, vcr: true do
let(:input) { "https://doi.org/10.1101/097196" }
subject { Briard::Metadata.new(input: input, from: "crossref") }
context "validate url" do
it "DOI" do
str = "https://doi.org/10.5438/0000-00ss"
response = subject.validate_url(str)
expect(response).to eq("DOI")
end
it "URL" do
str = "https://blog.datacite.org/eating-your-own-dog-food"
response = subject.validate_url(str)
expect(response).to eq("URL")
end
it "ISSN" do
str = "ISSN 2050-084X"
response = subject.validate_url(str)
expect(response).to eq("ISSN")
end
it "string" do
str = "eating-your-own-dog-food"
response = subject.validate_url(str)
expect(response).to be_nil
end
end
context "validate_orcid" do
it "validate_orcid" do
orcid = "http://orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid https" do
orcid = "https://orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid id" do
orcid = "0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid www" do
orcid = "http://www.orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid with spaces" do
orcid = "0000 0002 1394 3097"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-1394-3097")
end
it "validate_orcid sandbox" do
orcid = "http://sandbox.orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid sandbox https" do
orcid = "https://sandbox.orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid wrong id" do
orcid = "0000-0002-1394-309"
response = subject.validate_orcid(orcid)
expect(response).to be_nil
end
end
context "validate_orcid_scheme" do
it "validate_orcid_scheme" do
orcid = "http://orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme trailing slash" do
orcid = "http://orcid.org/"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme https" do
orcid = "https://orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme www" do
orcid = "http://www.orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
end
context "parse attributes" do
it "string" do
element = "10.5061/DRYAD.8515"
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "hash" do
element = { "__content__" => "10.5061/DRYAD.8515" }
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "array" do
element = [{ "__content__" => "10.5061/DRYAD.8515" }]
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "array of strings" do
element = ["datacite", "doi", "metadata", "featured"]
response = subject.parse_attributes(element)
expect(response).to eq(["datacite", "doi", "metadata", "featured"])
end
it "nil" do
element = nil
response = subject.parse_attributes(element)
expect(response).to be_nil
end
it "first" do
element = [{ "__content__" => "10.5061/DRYAD.8515/1" }, { "__content__" => "10.5061/DRYAD.8515/2" }]
response = subject.parse_attributes(element, first: true)
expect(response).to eq("10.5061/DRYAD.8515/1")
end
end
context "normalize id" do
it "doi" do
doi = "10.5061/DRYAD.8515"
response = subject.normalize_id(doi)
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
end
it "doi as url" do
doi = "http://dx.doi.org/10.5061/DRYAD.8515"
response = subject.normalize_id(doi)
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
end
it "url" do
url = "https://blog.datacite.org/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to eq("https://blog.datacite.org/eating-your-own-dog-food")
end
it "url with utf-8" do
url = "http://www.詹姆斯.com/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to eq("http://www.xn--8ws00zhy3a.com/eating-your-own-dog-food")
end
it "ftp" do
url = "ftp://blog.datacite.org/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "invalid url" do
url = "http://"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "string" do
url = "eating-your-own-dog-food"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "sandbox via url" do
url = "https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7"
response = subject.normalize_id(url)
expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7")
end
it "sandbox via options" do
url = "10.20375/0000-0001-ddb8-7"
response = subject.normalize_id(url, sandbox: true)
expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7")
end
end
context "normalize ids" do
it "doi" do
ids = [{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"}, {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55E5-T5C0"}]
response = subject.normalize_ids(ids: ids)
expect(response).to eq([{"relatedIdentifier"=>"10.5438/0012",
"relatedIdentifierType"=>"DOI",
"resourceTypeGeneral"=>"Text"},
{"relatedIdentifier"=>"10.5438/55e5-t5c0",
"relatedIdentifierType"=>"DOI",
"resourceTypeGeneral"=>"Text"}])
end
it "url" do
ids = [{"@type"=>"CreativeWork", "@id"=>"https://blog.datacite.org/eating-your-own-dog-food/"}]
response = subject.normalize_ids(ids: ids)
expect(response).to eq("relatedIdentifier"=>"https://blog.datacite.org/eating-your-own-dog-food", "relatedIdentifierType"=>"URL", "resourceTypeGeneral" => "Text")
end
end
context "normalize url" do
it "with trailing slash" do
url = "http://creativecommons.org/publicdomain/zero/1.0/"
response = subject.normalize_url(url)
expect(response).to eq("http://creativecommons.org/publicdomain/zero/1.0")
end
it "with trailing slash and to https" do
url = "http://creativecommons.org/publicdomain/zero/1.0/"
response = subject.normalize_url(url, https: true)
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0")
end
it "uri" do
url = "info:eu-repo/semantics/openAccess"
response = subject.normalize_url(url)
expect(response).to eq("info:eu-repo/semantics/openAccess")
end
end
context "normalize cc url" do
it "with trailing slash" do
url = "http://creativecommons.org/publicdomain/zero/1.0/"
response = subject.normalize_cc_url(url)
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
end
it "with trailing slash and to https" do
url = "http://creativecommons.org/publicdomain/zero/1.0/"
response = subject.normalize_cc_url(url)
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
end
it "not found" do
url = "http://creativecommons.org/publicdomain/zero/2.0/"
response = subject.normalize_cc_url(url)
expect(response).to eq("https://creativecommons.org/publicdomain/zero/2.0")
end
end
context "normalize issn" do
it "from array" do
input = [{"media_type"=>"print", "__content__"=>"13040855"}, {"media_type"=>"electronic", "__content__"=>"21468427"}]
response = subject.normalize_issn(input)
expect(response).to eq("2146-8427")
end
it "from empty array" do
input = []
response = subject.normalize_issn(input)
expect(response).to be_nil
end
it "from hash" do
input = {"media_type"=>"electronic", "__content__"=>"21468427"}
response = subject.normalize_issn(input)
expect(response).to eq("2146-8427")
end
it "from string" do
input = "2146-8427"
response = subject.normalize_issn(input)
expect(response).to eq("2146-8427")
end
end
context "to_schema_org" do
it "with id" do
author = {"type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
response = subject.to_schema_org(author)
expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405")
end
end
context "from_schema_org" do
it "with @id" do
author = {"@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
response = subject.from_schema_org(author)
expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405")
end
end
context "from_schema_org_creators" do
it "with affiliation" do
authors = [{"@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "affiliation" => {
"@id" => "https://ror.org/04wxnsj81",
"name" => "DataCite",
"@type" => "Organization"
}}]
response = subject.from_schema_org_creators(authors)
expect(response).to eq([{"affiliation"=>
{"affiliationIdentifier"=>"https://ror.org/04wxnsj81",
"affiliationIdentifierScheme"=>"ROR",
"__content__"=>"DataCite",
"schemeUri"=>"https://ror.org/"},
"creatorName"=>{"__content__"=>"Martin Fenner", "nameType"=>"Personal"},
"familyName"=>"Fenner",
"givenName"=>"Martin",
"nameIdentifier"=>
[{"__content__"=>"http://orcid.org/0000-0003-1419-2405",
"nameIdentifierScheme"=>"ORCID",
"schemeUri"=>"https://orcid.org"}]}])
end
it "without affiliation" do
authors = [{"@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }]
response = subject.from_schema_org_creators(authors)
expect(response).to eq([{"creatorName"=>{"__content__"=>"Martin Fenner", "nameType"=>"Personal"},
"familyName"=>"Fenner",
"givenName"=>"Martin",
"nameIdentifier"=>
[{"__content__"=>"http://orcid.org/0000-0003-1419-2405",
"nameIdentifierScheme"=>"ORCID",
"schemeUri"=>"https://orcid.org"}]}])
end
end
context "to_schema_org_identifiers" do
it "with identifiers" do
identifiers = [{"identifier" => "https://doi.org/10.23725/8na3-9s47", "identifierType" => "DOI"}, {"identifierType"=>"md5", "identifier"=>"3b33f6b9338fccab0901b7d317577ea3"}, {"identifierType"=>"minid", "identifier"=>"ark:/99999/fk41CrU4eszeLUDe"}, {"identifierType"=>"dataguid", "identifier"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}]
response = subject.to_schema_org_identifiers(identifiers, type: "Dataset")
expect(response).to eq([{"@type"=>"PropertyValue", "propertyID"=>"DOI", "value"=>"https://doi.org/10.23725/8na3-9s47"},
{"@type"=>"PropertyValue",
"propertyID"=>"md5",
"value"=>"3b33f6b9338fccab0901b7d317577ea3"},
{"@type"=>"PropertyValue",
"propertyID"=>"minid",
"value"=>"ark:/99999/fk41CrU4eszeLUDe"},
{"@type"=>"PropertyValue",
"propertyID"=>"dataguid",
"value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}])
end
end
context "sanitize" do
it 'should remove a tags' do
text = "In 1998 Tim Berners-Lee coined the term cool URIs"
content = subject.sanitize(text)
expect(content).to eq("In 1998 Tim Berners-Lee coined the term cool URIs")
end
it 'should only keep specific tags' do
text = "In 1998 Tim Berners-Lee coined the term cool URIs"
content = subject.sanitize(text, tags: ["a"])
expect(content).to eq("In 1998 Tim Berners-Lee coined the term cool URIs")
end
end
context "get_datetime_from_time" do
it "present" do
time = "20200226071709"
response = subject.get_datetime_from_time(time)
expect(response).to eq("2020-02-26T07:17:09Z")
end
it "past" do
time = "18770312071709"
response = subject.get_datetime_from_time(time)
expect(response).to eq("1877-03-12T07:17:09Z")
end
it "future" do
time = "20970114071709"
response = subject.get_datetime_from_time(time)
expect(response).to eq("2097-01-14T07:17:09Z")
end
it "invalid" do
time = "20201587168864794"
response = subject.get_datetime_from_time(time)
expect(response).to be_nil
end
it "nil" do
time = nil
response = subject.get_datetime_from_time(time)
expect(response).to be_nil
end
end
context "get_date_parts" do
it "date" do
date = "2016-12-20"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016, 12, 20]])
end
it "year-month" do
date = "2016-12"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016, 12]])
end
it "year" do
date = "2016"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016]])
end
end
context "get_date_from_parts" do
it "date" do
response = subject.get_date_from_parts(2016, 12, 20)
expect(response).to eq("2016-12-20")
end
it "year-month" do
response = subject.get_date_from_parts(2016, 12)
expect(response).to eq("2016-12")
end
it "year" do
response = subject.get_date_from_parts(2016)
expect(response).to eq("2016")
end
end
context "get_date_from_date_parts" do
it "date" do
date_as_parts = { "date-parts"=>[[2016, 12, 20]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016-12-20")
end
it "year-month" do
date_as_parts = { "date-parts"=>[[2016, 12]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016-12")
end
it "year" do
date_as_parts = { "date-parts"=>[[2016]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016")
end
end
context "get_date" do
it "publication date" do
dates = [{ "date"=>"2016-12-20", "dateType" => "Issued" }]
response = subject.get_date(dates, "Issued")
expect(response).to eq("2016-12-20")
end
end
context "get_series_information" do
it "only title" do
str = nil
response = subject.get_series_information(str)
expect(response).to eq({})
end
it "only title" do
str = "DataCite Blog"
response = subject.get_series_information(str)
expect(response).to eq("title"=>"DataCite Blog")
end
it "title and pages" do
str = "DataCite Blog, 1-3"
response = subject.get_series_information(str)
expect(response).to eq("firstPage"=>"1", "lastPage"=>"3", "title"=>"DataCite Blog")
end
it "title, volume and pages" do
str = "DataCite Blog, 7, 1-3"
response = subject.get_series_information(str)
expect(response).to eq("firstPage"=>"1", "lastPage"=>"3", "title"=>"DataCite Blog", "volume"=>"7")
end
it "title, volume, issue and pages" do
str = "DataCite Blog, 7(11), 1-3"
response = subject.get_series_information(str)
expect(response).to eq("firstPage"=>"1", "issue"=>"11", "lastPage"=>"3", "title"=>"DataCite Blog", "volume"=>"7")
end
end
context "github" do
it "github_from_url" do
url = "https://github.com/datacite/bolognese"
response = subject.github_from_url(url)
expect(response).to eq(:owner=>"datacite", :repo=>"bolognese")
end
it "github_from_url file" do
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
response = subject.github_from_url(url)
expect(response).to eq(:owner=>"datacite", :repo=>"metadata-reports", :release=>"master", :path=>"software/codemeta.json")
end
it "github_from_url cff file" do
url = "https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff"
response = subject.github_from_url(url)
expect(response).to eq(:owner=>"citation-file-format", :path=>"CITATION.cff", :release=>"main", :repo=>"ruby-cff")
end
it "github_as_codemeta_url" do
url = "https://github.com/datacite/bolognese"
response = subject.github_as_codemeta_url(url)
expect(response).to eq("https://raw.githubusercontent.com/datacite/bolognese/master/codemeta.json")
end
it "github_as_cff_url" do
url = "https://github.com/citation-file-format/ruby-cff"
response = subject.github_as_cff_url(url)
expect(response).to eq("https://raw.githubusercontent.com/citation-file-format/ruby-cff/main/CITATION.cff")
end
it "github_from_url file" do
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
response = subject.github_as_codemeta_url(url)
expect(response).to eq("https://raw.githubusercontent.com/datacite/metadata-reports/master/software/codemeta.json")
end
end
context "spdx" do
it "name_to_spdx exists" do
name = "Creative Commons Attribution 4.0 International"
response = subject.name_to_spdx(name)
expect(response).to eq({"rights"=>"Creative Commons Attribution 4.0 International", "rightsUri"=>"https://creativecommons.org/licenses/by/4.0/legalcode", "rightsIdentifier"=>"cc-by-4.0", "rightsIdentifierScheme"=>"SPDX", "schemeUri"=>"https://spdx.org/licenses/"})
end
it "name_to_spdx id" do
name = "CC-BY-4.0"
response = subject.name_to_spdx(name)
expect(response).to eq({"rights"=>"Creative Commons Attribution 4.0 International", "rightsUri"=>"https://creativecommons.org/licenses/by/4.0/legalcode", "rightsIdentifier"=>"cc-by-4.0", "rightsIdentifierScheme"=>"SPDX", "schemeUri"=>"https://spdx.org/licenses/"})
end
it "hsh_to_spdx id" do
hsh = { "rightsIdentifier" => "cc-by-4.0" }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq({"rights"=>"Creative Commons Attribution 4.0 International", "rightsUri"=>"https://creativecommons.org/licenses/by/4.0/legalcode", "rightsIdentifier"=>"cc-by-4.0", "rightsIdentifierScheme"=>"SPDX", "schemeUri"=>"https://spdx.org/licenses/"})
end
it "hsh_to_spdx url" do
hsh = { "rightsURI" => "http://creativecommons.org/licenses/by-nc/4.0/legalcode" }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq("rights"=>"Creative Commons Attribution Non Commercial 4.0 International", "rightsUri"=>"https://creativecommons.org/licenses/by-nc/4.0/legalcode", "rightsIdentifier"=>"cc-by-nc-4.0", "rightsIdentifierScheme"=>"SPDX", "schemeUri"=>"https://spdx.org/licenses/")
end
it "hsh_to_spdx not found" do
hsh = { "rightsURI" => "info:eu-repo/semantics/openAccess" }
response = subject.hsh_to_spdx(hsh)
expect(response).to eq({"rightsUri"=>"info:eu-repo/semantics/openAccess"})
end
end
context "fos" do
it "name_to_fos match" do
name = "Biological sciences"
response = subject.name_to_fos(name)
expect(response).to eq([{"subject"=>"biological sciences"},
{"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"FOS: Biological sciences",
"subjectScheme"=>"Fields of Science and Technology (FOS)"}])
end
it "name_to_fos for match" do
name = "Statistics"
response = subject.name_to_fos(name)
expect(response).to eq([{"subject"=>"statistics"},
{"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"FOS: Mathematics",
"subjectScheme"=>"Fields of Science and Technology (FOS)"}])
end
it "name_to_fos no match" do
name = "Random tag"
response = subject.name_to_fos(name)
expect(response).to eq([{"subject"=>"random tag"}])
end
it "hsh_to_fos match" do
hsh = { "__content__" => "Biological sciences" }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{"subject"=>"Biological sciences"},
{"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"FOS: Biological sciences",
"subjectScheme"=>"Fields of Science and Technology (FOS)"}])
end
it "hsh_to_fos for match" do
hsh = { "__content__" => "Statistics" }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{"subject"=>"Statistics"},
{"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"FOS: Mathematics",
"subjectScheme"=>"Fields of Science and Technology (FOS)"}])
end
it "hsh_to_fos for with schemeUri in hash" do
hsh = {
"subject" => "FOS: Computer and information sciences",
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"}
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{
"subject" => "FOS: Computer and information sciences",
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"}])
end
it "hsh_to_fos no match" do
hsh = { "__content__"=>"Random tag" }
response = subject.hsh_to_fos(hsh)
expect(response).to eq([{"subject"=>"Random tag"}])
end
end
context "random doi" do
it "encode doi" do
prefix = "10.53731"
response = subject.encode_doi(prefix)
expect(response).to match(/#{prefix}\/[-._;()\/:A-Za-z0-9]+/)
expect(response.length).to eq(40)
end
it "decode doi" do
doi = "https://doi.org/10.53731/revzwnv-rpd913d-8drwz"
response = subject.decode_doi(doi)
expect(response).to eq (30286005717401267192153432991)
end
it "decode anothe doi" do
doi = "https://doi.org/10.53731/rckvde5-tzg61kj-7zvc1"
response = subject.decode_doi(doi)
expect(response).to eq (30198793950250854133601922433)
end
end
end