# frozen_string_literal: true
require 'spec_helper'
describe Bolognese::Metadata, vcr: true do
let(:input) { "https://doi.org/10.1101/097196" }
subject { Bolognese::Metadata.new(input: input, from: "crossref") }
context "validate url" do
it "DOI" do
str = "https://doi.org/10.5438/0000-00ss"
response = subject.validate_url(str)
expect(response).to eq("DOI")
end
it "URL" do
str = "https://blog.datacite.org/eating-your-own-dog-food"
response = subject.validate_url(str)
expect(response).to eq("URL")
end
it "ISSN" do
str = "ISSN 2050-084X"
response = subject.validate_url(str)
expect(response).to eq("ISSN")
end
it "string" do
str = "eating-your-own-dog-food"
response = subject.validate_url(str)
expect(response).to be_nil
end
end
context "validate_orcid" do
it "validate_orcid" do
orcid = "http://orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid https" do
orcid = "https://orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid id" do
orcid = "0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid www" do
orcid = "http://www.orcid.org/0000-0002-2590-225X"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-2590-225X")
end
it "validate_orcid with spaces" do
orcid = "0000 0002 1394 3097"
response = subject.validate_orcid(orcid)
expect(response).to eq("0000-0002-1394-3097")
end
it "validate_orcid wrong id" do
orcid = "0000-0002-1394-309"
response = subject.validate_orcid(orcid)
expect(response).to be_nil
end
end
context "validate_orcid_scheme" do
it "validate_orcid_scheme" do
orcid = "http://orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme trailing slash" do
orcid = "http://orcid.org/"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme https" do
orcid = "https://orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
it "validate_orcid_scheme www" do
orcid = "http://www.orcid.org"
response = subject.validate_orcid_scheme(orcid)
expect(response).to eq("orcid.org")
end
end
context "parse attributes" do
it "string" do
element = "10.5061/DRYAD.8515"
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "hash" do
element = { "__content__" => "10.5061/DRYAD.8515" }
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "array" do
element = [{ "__content__" => "10.5061/DRYAD.8515" }]
response = subject.parse_attributes(element)
expect(response).to eq("10.5061/DRYAD.8515")
end
it "array of strings" do
element = ["datacite", "doi", "metadata", "featured"]
response = subject.parse_attributes(element)
expect(response).to eq(["datacite", "doi", "metadata", "featured"])
end
it "nil" do
element = nil
response = subject.parse_attributes(element)
expect(response).to be_nil
end
it "first" do
element = [{ "__content__" => "10.5061/DRYAD.8515/1" }, { "__content__" => "10.5061/DRYAD.8515/2" }]
response = subject.parse_attributes(element, first: true)
expect(response).to eq("10.5061/DRYAD.8515/1")
end
end
context "normalize id" do
it "doi" do
doi = "10.5061/DRYAD.8515"
response = subject.normalize_id(doi)
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
end
it "doi as url" do
doi = "http://dx.doi.org/10.5061/DRYAD.8515"
response = subject.normalize_id(doi)
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
end
it "url" do
url = "https://blog.datacite.org/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to eq("https://blog.datacite.org/eating-your-own-dog-food")
end
it "url with utf-8" do
url = "http://www.詹姆斯.com/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to eq("http://www.xn--8ws00zhy3a.com/eating-your-own-dog-food")
end
it "ftp" do
url = "ftp://blog.datacite.org/eating-your-own-dog-food/"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "invalid url" do
url = "http://"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "string" do
url = "eating-your-own-dog-food"
response = subject.normalize_id(url)
expect(response).to be_nil
end
it "sandbox via url" do
url = "https://handle.test.datacite.org/10.20375/0000-0001-ddb8-7"
response = subject.normalize_id(url)
expect(response).to eq("https://handle.test.datacite.org/10.20375/0000-0001-ddb8-7")
end
it "sandbox via options" do
url = "10.20375/0000-0001-ddb8-7"
response = subject.normalize_id(url, sandbox: true)
expect(response).to eq("https://handle.test.datacite.org/10.20375/0000-0001-ddb8-7")
end
end
context "normalize ids" do
it "doi" do
ids = [{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"}, {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55E5-T5C0"}]
response = subject.normalize_ids(ids: ids)
expect(response).to eq([{"related_identifier"=>"10.5438/0012", "related_identifier_type"=>"DOI"}, {"related_identifier"=>"10.5438/55e5-t5c0", "related_identifier_type"=>"DOI"}])
end
it "url" do
ids = [{"@type"=>"CreativeWork", "@id"=>"https://blog.datacite.org/eating-your-own-dog-food/"}]
response = subject.normalize_ids(ids: ids)
expect(response).to eq("related_identifier"=>"https://blog.datacite.org/eating-your-own-dog-food", "related_identifier_type"=>"URL")
end
end
context "normalize url" do
it "with trailing slash" do
url = "http://creativecommons.org/publicdomain/zero/1.0/"
response = subject.normalize_url(url)
expect(response).to eq("http://creativecommons.org/publicdomain/zero/1.0")
end
it "uri" do
url = "info:eu-repo/semantics/openAccess"
response = subject.normalize_url(url)
expect(response).to be_nil
end
end
context "to_schema_org" do
it "with id" do
author = {"type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
response = subject.to_schema_org(author)
expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405")
end
end
context "from_schema_org" do
it "with @id" do
author = {"@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
response = subject.from_schema_org(author)
expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405")
end
end
context "to_schema_org_identifier" do
it "with alternate_identifier" do
identifier = "https://doi.org/10.23725/8na3-9s47"
alternate_identifiers = [{"alternate_identifier_type"=>"md5", "alternate_identifier"=>"3b33f6b9338fccab0901b7d317577ea3"}, {"alternate_identifier_type"=>"minid", "alternate_identifier"=>"ark:/99999/fk41CrU4eszeLUDe"}, {"alternate_identifier_type"=>"dataguid", "alternate_identifier"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}]
response = subject.to_schema_org_identifier(identifier, alternate_identifiers: alternate_identifiers, type: "Dataset")
expect(response).to eq([{"@type"=>"PropertyValue", "propertyID"=>"doi", "value"=>"https://doi.org/10.23725/8na3-9s47"},
{"@type"=>"PropertyValue",
"propertyID"=>"md5",
"value"=>"3b33f6b9338fccab0901b7d317577ea3"},
{"@type"=>"PropertyValue",
"propertyID"=>"minid",
"value"=>"ark:/99999/fk41CrU4eszeLUDe"},
{"@type"=>"PropertyValue",
"propertyID"=>"dataguid",
"value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}])
end
end
context "sanitize" do
it 'should remove a tags' do
text = "In 1998 Tim Berners-Lee coined the term cool URIs"
content = subject.sanitize(text)
expect(content).to eq("In 1998 Tim Berners-Lee coined the term cool URIs")
end
it 'should only keep specific tags' do
text = "In 1998 Tim Berners-Lee coined the term cool URIs"
content = subject.sanitize(text, tags: ["a"])
expect(content).to eq("In 1998 Tim Berners-Lee coined the term cool URIs")
end
end
context "get_date_parts" do
it "date" do
date = "2016-12-20"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016, 12, 20]])
end
it "year-month" do
date = "2016-12"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016, 12]])
end
it "year" do
date = "2016"
response = subject.get_date_parts(date)
expect(response).to eq("date-parts"=>[[2016]])
end
end
context "get_date_from_parts" do
it "date" do
response = subject.get_date_from_parts(2016, 12, 20)
expect(response).to eq("2016-12-20")
end
it "year-month" do
response = subject.get_date_from_parts(2016, 12)
expect(response).to eq("2016-12")
end
it "year" do
response = subject.get_date_from_parts(2016)
expect(response).to eq("2016")
end
end
context "get_date_from_date_parts" do
it "date" do
date_as_parts = { "date-parts"=>[[2016, 12, 20]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016-12-20")
end
it "year-month" do
date_as_parts = { "date-parts"=>[[2016, 12]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016-12")
end
it "year" do
date_as_parts = { "date-parts"=>[[2016]] }
response = subject.get_date_from_date_parts(date_as_parts)
expect(response).to eq("2016")
end
end
context "get_date" do
it "publication date" do
dates = [{ "date"=>"2016-12-20", "date_type" => "Issued" }]
response = subject.get_date(dates, "Issued")
expect(response).to eq("2016-12-20")
end
end
context "github" do
it "github_from_url" do
url = "https://github.com/datacite/bolognese"
response = subject.github_from_url(url)
expect(response).to eq(:owner=>"datacite", :repo=>"bolognese")
end
it "github_from_url file" do
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
response = subject.github_from_url(url)
expect(response).to eq(:owner=>"datacite", :repo=>"metadata-reports", :release=>"master", :path=>"software/codemeta.json")
end
it "github_as_codemeta_url" do
url = "https://github.com/datacite/bolognese"
response = subject.github_as_codemeta_url(url)
expect(response).to eq("https://raw.githubusercontent.com/datacite/bolognese/master/codemeta.json")
end
it "github_from_url file" do
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
response = subject.github_as_codemeta_url(url)
expect(response).to eq("https://raw.githubusercontent.com/datacite/metadata-reports/master/software/codemeta.json")
end
end
end