# frozen_string_literal: true require "spec_helper" describe Commonmeta::Metadata, vcr: true do subject { described_class.new(input: input, from: "crossref") } let(:input) { "https://doi.org/10.1101/097196" } context "validate url" do it "DOI" do str = "https://doi.org/10.5438/0000-00ss" response = subject.validate_url(str) expect(response).to eq("DOI") end it "URL" do str = "https://blog.datacite.org/eating-your-own-dog-food" response = subject.validate_url(str) expect(response).to eq("URL") end it "ISSN" do str = "ISSN 2050-084X" response = subject.validate_url(str) expect(response).to eq("ISSN") end it "string" do str = "eating-your-own-dog-food" response = subject.validate_url(str) expect(response.nil?).to be(true) end end context "validate_orcid" do it "validate_orcid" do orcid = "http://orcid.org/0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid https" do orcid = "https://orcid.org/0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid id" do orcid = "0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid www" do orcid = "http://www.orcid.org/0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid with spaces" do orcid = "0000 0002 1394 3097" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-1394-3097") end it "validate_orcid sandbox" do orcid = "http://sandbox.orcid.org/0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid sandbox https" do orcid = "https://sandbox.orcid.org/0000-0002-2590-225X" response = subject.validate_orcid(orcid) expect(response).to eq("0000-0002-2590-225X") end it "validate_orcid wrong id" do orcid = "0000-0002-1394-309" response = subject.validate_orcid(orcid) expect(response.nil?).to be(true) end end context "validate_orcid_scheme" do it "validate_orcid_scheme" do orcid = "http://orcid.org" response = subject.validate_orcid_scheme(orcid) expect(response).to eq("orcid.org") end it "validate_orcid_scheme trailing slash" do orcid = "http://orcid.org/" response = subject.validate_orcid_scheme(orcid) expect(response).to eq("orcid.org") end it "validate_orcid_scheme https" do orcid = "https://orcid.org" response = subject.validate_orcid_scheme(orcid) expect(response).to eq("orcid.org") end it "validate_orcid_scheme www" do orcid = "http://www.orcid.org" response = subject.validate_orcid_scheme(orcid) expect(response).to eq("orcid.org") end end context "parse attributes" do it "string" do element = "10.5061/DRYAD.8515" response = subject.parse_attributes(element) expect(response).to eq("10.5061/DRYAD.8515") end it "hash" do element = { "__content__" => "10.5061/DRYAD.8515" } response = subject.parse_attributes(element) expect(response).to eq("10.5061/DRYAD.8515") end it "hash with array value" do element = { "__content__" => ["10.5061/DRYAD.8515", "10.5061/DRYAD.8516"] } response = subject.parse_attributes(element) expect(response).to eq(["10.5061/DRYAD.8515", "10.5061/DRYAD.8516"]) end it "array" do element = [{ "__content__" => "10.5061/DRYAD.8515" }] response = subject.parse_attributes(element) expect(response).to eq("10.5061/DRYAD.8515") end it "array of strings" do element = %w[datacite doi metadata featured] response = subject.parse_attributes(element) expect(response).to eq(%w[datacite doi metadata featured]) end it "nil" do element = nil response = subject.parse_attributes(element) expect(response.nil?).to be(true) end it "first" do element = [{ "__content__" => "10.5061/DRYAD.8515/1" }, { "__content__" => "10.5061/DRYAD.8515/2" }] response = subject.parse_attributes(element, first: true) expect(response).to eq("10.5061/DRYAD.8515/1") end end context "normalize id" do it "doi" do doi = "10.5061/DRYAD.8515" response = subject.normalize_id(doi) expect(response).to eq("https://doi.org/10.5061/dryad.8515") end it "doi as url" do doi = "http://dx.doi.org/10.5061/DRYAD.8515" response = subject.normalize_id(doi) expect(response).to eq("https://doi.org/10.5061/dryad.8515") end it "url" do url = "https://blog.datacite.org/eating-your-own-dog-food/" response = subject.normalize_id(url) expect(response).to eq("https://blog.datacite.org/eating-your-own-dog-food") end it "url with utf-8" do url = "http://www.詹姆斯.com/eating-your-own-dog-food/" response = subject.normalize_id(url) expect(response).to eq("http://www.xn--8ws00zhy3a.com/eating-your-own-dog-food") end it "ftp" do url = "ftp://blog.datacite.org/eating-your-own-dog-food/" response = subject.normalize_id(url) expect(response.nil?).to be(true) end it "invalid url" do url = "http://" response = subject.normalize_id(url) expect(response.nil?).to be(true) end it "string" do url = "eating-your-own-dog-food" response = subject.normalize_id(url) expect(response.nil?).to be(true) end it "filename" do url = "crossref.xml" response = subject.normalize_id(url) expect(response.nil?).to be(true) end it "sandbox via url" do url = "https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7" response = subject.normalize_id(url) expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7") end it "sandbox via options" do url = "10.20375/0000-0001-ddb8-7" response = subject.normalize_id(url, sandbox: true) expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7") end end context "normalize url" do it "with trailing slash" do url = "http://creativecommons.org/publicdomain/zero/1.0/" response = subject.normalize_url(url) expect(response).to eq("http://creativecommons.org/publicdomain/zero/1.0") end it "with trailing slash and to https" do url = "http://creativecommons.org/publicdomain/zero/1.0/" response = subject.normalize_url(url, https: true) expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0") end it "uri" do url = "info:eu-repo/semantics/openAccess" response = subject.normalize_url(url) expect(response).to eq("info:eu-repo/semantics/openAccess") end end context "normalize cc url" do it "with trailing slash" do url = "http://creativecommons.org/publicdomain/zero/1.0/" response = subject.normalize_cc_url(url) expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode") end it "with trailing slash and to https" do url = "http://creativecommons.org/publicdomain/zero/1.0/" response = subject.normalize_cc_url(url) expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode") end it "not found" do url = "http://creativecommons.org/publicdomain/zero/2.0/" response = subject.normalize_cc_url(url) expect(response).to eq("https://creativecommons.org/publicdomain/zero/2.0") end end context "normalize issn" do it "from array" do input = [{ "media_type" => "print", "__content__" => "13040855" }, { "media_type" => "electronic", "__content__" => "21468427" }] response = subject.normalize_issn(input) expect(response).to eq("2146-8427") end it "from empty array" do input = [] response = subject.normalize_issn(input) expect(response.nil?).to be(true) end it "from hash" do input = { "media_type" => "electronic", "__content__" => "21468427" } response = subject.normalize_issn(input) expect(response).to eq("2146-8427") end it "from string" do input = "2146-8427" response = subject.normalize_issn(input) expect(response).to eq("2146-8427") end end context "to_schema_org" do it "with id" do author = { "type" => "Person", "id" => "http://orcid.org/0000-0003-1419-2405", "givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner" } response = subject.to_schema_org(author) expect(response).to eq("givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner", "@type" => "Person", "@id" => "http://orcid.org/0000-0003-1419-2405") end end context "from_schema_org" do it "with @id" do author = { "@type" => "Person", "@id" => "http://orcid.org/0000-0003-1419-2405", "givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner" } response = subject.from_schema_org(author) expect(response).to eq("givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner", "type" => "Person", "id" => "http://orcid.org/0000-0003-1419-2405") end end context "to_schema_org_identifiers" do it "with identifiers" do identifiers = [ { "alternateIdentifier" => "https://doi.org/10.23725/8na3-9s47", "alternateIdentifierType" => "DOI" }, { "alternateIdentifierType" => "md5", "alternateIdentifier" => "3b33f6b9338fccab0901b7d317577ea3" }, { "alternateIdentifierType" => "minid", "alternateIdentifier" => "ark:/99999/fk41CrU4eszeLUDe" }, { "alternateIdentifierType" => "dataguid", "alternateIdentifier" => "dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7" }, ] response = subject.to_schema_org_identifiers(identifiers, type: "Dataset") expect(response).to eq([{ "@type" => "PropertyValue", "propertyID" => "DOI", "value" => "https://doi.org/10.23725/8na3-9s47" }, { "@type" => "PropertyValue", "propertyID" => "md5", "value" => "3b33f6b9338fccab0901b7d317577ea3" }, { "@type" => "PropertyValue", "propertyID" => "minid", "value" => "ark:/99999/fk41CrU4eszeLUDe" }, { "@type" => "PropertyValue", "propertyID" => "dataguid", "value" => "dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7" }]) end end context "sanitize" do it "removes a tags" do text = 'In 1998 Tim Berners-Lee coined the term cool URIs' content = subject.sanitize(text) expect(content).to eq("In 1998 Tim Berners-Lee coined the term cool URIs") end it "onlies keep specific tags" do text = 'In 1998 Tim Berners-Lee coined the term cool URIs' content = subject.sanitize(text, tags: ["a"]) expect(content).to eq('In 1998 Tim Berners-Lee coined the term cool URIs') end end context "get_datetime_from_time" do it "present" do time = "20200226071709" response = subject.get_datetime_from_time(time) expect(response).to eq("2020-02-26T07:17:09Z") end it "past" do time = "18770312071709" response = subject.get_datetime_from_time(time) expect(response).to eq("1877-03-12T07:17:09Z") end it "future" do time = "20970114071709" response = subject.get_datetime_from_time(time) expect(response).to eq("2097-01-14T07:17:09Z") end it "invalid" do time = "20201587168864794" response = subject.get_datetime_from_time(time) expect(response.nil?).to be(true) end it "nil" do time = nil response = subject.get_datetime_from_time(time) expect(response.nil?).to be(true) end end context "get_date_parts" do it "date" do date = "2016-12-20" response = subject.get_date_parts(date) expect(response).to eq("date-parts" => [[2016, 12, 20]]) end it "year-month" do date = "2016-12" response = subject.get_date_parts(date) expect(response).to eq("date-parts" => [[2016, 12]]) end it "year" do date = "2016" response = subject.get_date_parts(date) expect(response).to eq("date-parts" => [[2016]]) end end context "get_date_from_parts" do it "date" do response = subject.get_date_from_parts(2016, 12, 20) expect(response).to eq("2016-12-20") end it "year-month" do response = subject.get_date_from_parts(2016, 12) expect(response).to eq("2016-12") end it "year" do response = subject.get_date_from_parts(2016) expect(response).to eq("2016") end end context "get_date_from_date_parts" do it "date" do date_as_parts = { "date-parts" => [[2016, 12, 20]] } response = subject.get_date_from_date_parts(date_as_parts) expect(response).to eq("2016-12-20") end it "year-month" do date_as_parts = { "date-parts" => [[2016, 12]] } response = subject.get_date_from_date_parts(date_as_parts) expect(response).to eq("2016-12") end it "year" do date_as_parts = { "date-parts" => [[2016]] } response = subject.get_date_from_date_parts(date_as_parts) expect(response).to eq("2016") end end context "get_date" do it "publication date" do dates = [{ "date" => "2016-12-20", "dateType" => "Issued" }] response = subject.get_date(dates, "Issued") expect(response).to eq("2016-12-20") end end context "get_link" do links = [{ "rel" => "self", "type" => "application/atom+xml", "href" => "https://syldavia-gazette.org/atom/" }, { "rel" => "alternate", "type" => "text/html", "href" => "https://syldavia-gazette.org" }, { "rel" => "license", "type" => "text/html", "href" => "https://creativecommons.org/licenses/by/4.0/legalcode" }] it "url" do response = subject.get_link(links, "self") expect(response).to eq("https://syldavia-gazette.org/atom/") end it "license" do response = subject.get_link(links, "license") expect(response).to eq("https://creativecommons.org/licenses/by/4.0/legalcode") end end context "get_series_information" do it "only title" do str = nil response = subject.get_series_information(str) expect(response).to eq({}) end it "only title" do str = "DataCite Blog" response = subject.get_series_information(str) expect(response).to eq("title" => "DataCite Blog") end it "title and pages" do str = "DataCite Blog, 1-3" response = subject.get_series_information(str) expect(response).to eq("firstPage" => "1", "lastPage" => "3", "title" => "DataCite Blog") end it "title, volume and pages" do str = "DataCite Blog, 7, 1-3" response = subject.get_series_information(str) expect(response).to eq("firstPage" => "1", "lastPage" => "3", "title" => "DataCite Blog", "volume" => "7") end it "title, volume, issue and pages" do str = "DataCite Blog, 7(11), 1-3" response = subject.get_series_information(str) expect(response).to eq("firstPage" => "1", "issue" => "11", "lastPage" => "3", "title" => "DataCite Blog", "volume" => "7") end end context "github" do it "github_from_url" do url = "https://github.com/datacite/bolognese" response = subject.github_from_url(url) expect(response).to eq(owner: "datacite", repo: "bolognese") end it "github_from_url file" do url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json" response = subject.github_from_url(url) expect(response).to eq(owner: "datacite", repo: "metadata-reports", release: "master", path: "software/codemeta.json") end it "github_from_url cff file" do url = "https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff" response = subject.github_from_url(url) expect(response).to eq(owner: "citation-file-format", path: "CITATION.cff", release: "main", repo: "ruby-cff") end it "github_as_codemeta_url" do url = "https://github.com/datacite/bolognese" response = subject.github_as_codemeta_url(url) expect(response).to eq("https://raw.githubusercontent.com/datacite/bolognese/master/codemeta.json") end it "github_as_cff_url" do url = "https://github.com/citation-file-format/ruby-cff" response = subject.github_as_cff_url(url) expect(response).to eq("https://raw.githubusercontent.com/citation-file-format/ruby-cff/main/CITATION.cff") end it "github_from_url file" do url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json" response = subject.github_as_codemeta_url(url) expect(response).to eq("https://raw.githubusercontent.com/datacite/metadata-reports/master/software/codemeta.json") end end context "spdx" do it "name_to_spdx exists" do name = "Creative Commons Attribution 4.0 International" response = subject.name_to_spdx(name) expect(response).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") end it "name_to_spdx id" do name = "CC-BY-4.0" response = subject.name_to_spdx(name) expect(response).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") end it "hsh_to_spdx id" do hsh = { "rightsIdentifier" => "CC-BY-4.0" } response = subject.hsh_to_spdx(hsh) expect(response).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") end it "hsh_to_spdx url" do hsh = { "rightsURI" => "http://creativecommons.org/licenses/by-nc/4.0/legalcode" } response = subject.hsh_to_spdx(hsh) expect(response).to eq( "id" => "CC-BY-NC-4.0", "url" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode", ) end it "hsh_to_spdx not found" do hsh = { "rightsURI" => "info:eu-repo/semantics/openAccess" } response = subject.hsh_to_spdx(hsh) expect(response).to eq({ "url" => "info:eu-repo/semantics/openAccess" }) end end context "fos" do it "name_to_fos match" do name = "Biological sciences" response = subject.name_to_fos(name) expect(response).to eq([{ "subject" => "biological sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Biological sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) end it "name_to_fos for match" do name = "Statistics" response = subject.name_to_fos(name) expect(response).to eq([{ "subject" => "statistics" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Mathematics", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) end it "name_to_fos no match" do name = "Random tag" response = subject.name_to_fos(name) expect(response).to eq([{ "subject" => "random tag" }]) end it "hsh_to_fos match" do hsh = { "__content__" => "Biological sciences" } response = subject.hsh_to_fos(hsh) expect(response).to eq([{ "subject" => "Biological sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Biological sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) end it "hsh_to_fos for match" do hsh = { "__content__" => "Statistics" } response = subject.hsh_to_fos(hsh) expect(response).to eq([{ "subject" => "Statistics" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Mathematics", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) end it "hsh_to_fos for with schemeUri in hash" do hsh = { "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)", "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", } response = subject.hsh_to_fos(hsh) expect(response).to eq([{ "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)", "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", }]) end it "hsh_to_fos no match" do hsh = { "__content__" => "Random tag" } response = subject.hsh_to_fos(hsh) expect(response).to eq([{ "subject" => "Random tag" }]) end end context "random doi" do it "encode doi" do prefix = "10.53731" response = subject.encode_doi(prefix) expect(response).to match(%r{#{prefix}/[a-z0-9]+}) expect(response.length).to eq(36) end it "decode doi" do doi = "https://doi.org/10.53731/revzwnv-rpd913d-8drwz" response = subject.decode_doi(doi) expect(response).to eq(30_286_005_717_401_267_192_153_432_991) end it "decode another doi" do doi = "https://doi.org/10.53731/rckvde5-tzg61kj-7zvc1" response = subject.decode_doi(doi) expect(response).to eq(30_198_793_950_250_854_133_601_922_433) end end context "random id" do it "encode id" do response = subject.encode_container_id expect(response).to match(%r{[a-z0-9]+}) expect(response.length).to eq(7) end it "decode id" do id = "4425y27" response = subject.decode_container_id(id) expect(response).to eq(4_431_476_807) end it "decode another id" do id = "gr1by89" response = subject.decode_container_id(id) expect(response).to eq(17_986_615_561) end end context 'json_feed_url' do it 'front-matter blog' do id = 'f0m0e38' response = subject.json_feed_url(id) expect(response).to eq("https://rogue-scholar.org/api/blogs/f0m0e38") end end end