# frozen_string_literal: true require "spec_helper" describe Commonmeta::Metadata, vcr: true do subject { described_class.new } context "get json_feed_item metadata" do it "blogger post" do input = "https://api.rogue-scholar.org/posts/f3629c86-06e0-42c0-844a-266b03a91ef1" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/37y2z-gre70") expect(subject.url).to eq("https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "f3629c86-06e0-42c0-844a-266b03a91ef1", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://orcid.org/0000-0002-7101-9767", "familyName" => "Page", "givenName" => "Roderic", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Ten years and a million links" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-05-31", "updated" => "2023-05-31") expect(subject.descriptions.first["description"]).to start_with("As trailed on a Twitter thread last week I’ve been working on a manuscript describing the efforts to map taxonomic names to their original descriptions in the taxonomic literature.") expect(subject.publisher).to eq("name" => "iPhylo") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://iphylo.blogspot.com", "identifierType" => "URL", "title" => "iPhylo", "type" => "Periodical") end it "ghost post with doi" do input = "https://api.rogue-scholar.org/posts/5bb66e92-5cb9-4659-8aca-20e486b695c9" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.53731/4nwxn-frt36") expect(subject.url).to eq("https://blog.front-matter.io/posts/does-it-compose") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.53731/4nwxn-frt36.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5bb66e92-5cb9-4659-8aca-20e486b695c9", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Does it compose?" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-05-16", "updated" => "2023-06-18") expect(subject.descriptions.first["description"]).to start_with("One question I have increasingly asked myself in the past few years. Meaning Can I run this open source software using Docker containers and a Docker Compose file?") expect(subject.publisher).to eq("name" => "Front Matter") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical") end it "ghost post with related_identifiers" do input = "https://api.rogue-scholar.org/posts/8a4de443-3347-4b82-b57d-e3c82b6485fc" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.53731/r79v4e1-97aq74v-ag578") expect(subject.url).to eq("https://blog.front-matter.io/posts/differences-between-orcid-and-datacite-metadata") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.53731/r79v4e1-97aq74v-ag578.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "8a4de443-3347-4b82-b57d-e3c82b6485fc", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Differences between ORCID and DataCite Metadata" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2015-09-18", "updated" => "2023-09-07") expect(subject.descriptions.first["description"]).to start_with("One of the first tasks for DataCite in the European Commission-funded THOR project") expect(subject.publisher).to eq("name" => "Front Matter") expect(subject.related_identifiers).to eq([{ "id" => "https://doi.org/10.5438/bc11-cqw1", "type" => "IsIdenticalTo" }]) expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical") end it "ghost post with related_identifiers and link to peer-reviewed article" do input = "https://api.rogue-scholar.org/posts/2bdebfc5-e02e-42c2-90c5-e873e2d0435d" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.53731/r294649-6f79289-8cw18") expect(subject.url).to eq("https://blog.front-matter.io/posts/nine-simple-ways-to-make-it-easier-to-re-use-your-data") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.53731/r294649-6f79289-8cw18.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "2bdebfc5-e02e-42c2-90c5-e873e2d0435d", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Nine simple ways to make it easier to (re)use your data" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2013-06-25", "updated" => "2023-09-07") expect(subject.descriptions.first["description"]).to start_with("This paper in markdown format was written by Ethan White et al.") expect(subject.publisher).to eq("name" => "Front Matter") expect(subject.related_identifiers).to eq([{ "id" => "https://doi.org/10.4033/iee.2013.6b.6.f", "type" => "IsPreprintOf" }]) expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical") end it "ghost post with related_identifiers and funding" do input = "https://api.rogue-scholar.org/posts/e58dc9c8-b870-4db2-8896-238b3246c551" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.53731/r79s4nh-97aq74v-ag4t1") expect(subject.url).to eq("https://blog.front-matter.io/posts/tracking-the-growth-of-the-pid-graph") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.53731/r79s4nh-97aq74v-ag4t1.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "e58dc9c8-b870-4db2-8896-238b3246c551", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Tracking the Growth of the PID Graph" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2019-07-01", "updated" => "2023-09-07") expect(subject.descriptions.first["description"]).to start_with("The connections between scholarly resources generated by persistent identifiers (PIDs)") expect(subject.publisher).to eq("name" => "Front Matter") expect(subject.funding_references).to eq([{ "awardNumber" => "777523", "funderIdentifier" => "http://doi.org/10.13039/501100000780", "funderName" => "European Commission" }]) expect(subject.related_identifiers).to eq([{ "id" => "https://doi.org/10.5438/bv9z-dc66", "type" => "IsIdenticalTo" }]) expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical") end it "ghost post without doi" do input = "https://api.rogue-scholar.org/posts/c3095752-2af0-40a4-a229-3ceb7424bce2" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/kj95y-gp867") expect(subject.url).to eq("https://www.ideasurg.pub/residency-visual-abstract") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "c3095752-2af0-40a4-a229-3ceb7424bce2", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "id" => "https://orcid.org/0000-0003-0449-4469", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "The Residency Visual Abstract" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-04-08", "updated" => "2023-04-08") expect(subject.descriptions.first["description"]).to start_with("My prototype for a Residency Visual Abstract") expect(subject.publisher).to eq("name" => "I.D.E.A.S.") expect(subject.subjects).to eq([{ "subject" => "Clinical medicine" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Clinical medicine", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical") end it "ghost post with author name suffix" do input = "https://api.rogue-scholar.org/posts/153d65e1-f0e4-49a6-a969-33d2129800ba" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/ytvy2-59450") expect(subject.url).to eq("https://www.ideasurg.pub/academic-powerhouse") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "153d65e1-f0e4-49a6-a969-33d2129800ba", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "type" => "Person", "contributorRoles" => ["Author"], "id" => "https://orcid.org/0000-0003-0449-4469") expect(subject.titles).to eq([{ "title" => "How to Build an Academic Powerhouse: Let's Study Who's Doing it" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-06-03", "updated" => "2023-06-03") expect(subject.descriptions.first["description"]).to start_with("Students and residents aspiring for a career in academic surgery") expect(subject.publisher).to eq("name" => "I.D.E.A.S.") expect(subject.subjects).to eq([{ "subject" => "Clinical medicine" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Clinical medicine", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical") expect(subject.references).to be_nil end it "syldavia gazette post with references" do input = "https://api.rogue-scholar.org/posts/0022b9ef-525a-4a79-81ad-13411697f58a" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.53731/ffbx660-083tnag") expect(subject.url).to eq("https://syldavia-gazette.org/guinea-worms-chatgpt-neanderthals") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.53731/ffbx660-083tnag.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "0022b9ef-525a-4a79-81ad-13411697f58a", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Fenner", "givenName" => "Martin", "id" => "https://orcid.org/0000-0003-1419-2405", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Guinea Worms, ChatGPT, Neanderthals, Plagiarism, Tidyverse" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-02-01", "updated" => "2023-04-13") expect(subject.descriptions.first["description"]).to start_with("Guinea worm disease reaches all-time low: only 13* human cases") expect(subject.publisher).to eq("name" => "Syldavia Gazette") expect(subject.subjects).to eq([{ "subject" => "Humanities" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Humanities", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://syldavia-gazette.org", "identifierType" => "URL", "title" => "Syldavia Gazette", "type" => "Periodical") expect(subject.references.length).to eq(5) expect(subject.references[1]).to eq("doi" => "https://doi.org/10.1126/science.adg7879", "key" => "ref2", "publicationYear" => "2023", "title" => "ChatGPT is fun, but not an author") end it "wordpress post" do input = "https://api.rogue-scholar.org/posts/1c578558-1324-4493-b8af-84c49eabc52f" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/kz04m-s8z58") expect(subject.url).to eq("https://wisspub.net/2023/05/23/eu-mitgliedstaaten-betonen-die-rolle-von-wissenschaftsgeleiteten-open-access-modellen-jenseits-von-apcs") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/kz04m-s8z58.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1c578558-1324-4493-b8af-84c49eabc52f", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Pampel", "givenName" => "Heinz", "id" => "https://orcid.org/0000-0003-3334-2771", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "EU-Mitgliedstaaten betonen die Rolle von wissenschaftsgeleiteten Open-Access-Modellen jenseits von APCs" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-05-23", "updated" => "2023-05-23") expect(subject.descriptions.first["description"]).to start_with("Die EU-Wissenschaftsministerien haben sich auf ihrer heutigen Sitzung in Brüssel unter dem Titel “Council conclusions on high-quality, transparent, open, trustworthy and equitable scholarly publishing”") expect(subject.publisher).to eq("name" => "wisspub.net") expect(subject.subjects).to eq([{ "subject" => "Social sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Social sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("de") expect(subject.container).to eq("identifier" => "https://wisspub.net", "identifierType" => "URL", "title" => "wisspub.net", "type" => "Periodical") end it "archived wordpress post" do input = "https://api.rogue-scholar.org/posts/570c8129-e867-49e6-8517-bd783627e76e" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/faeph-x4x84") expect(subject.url).to eq("https://wayback.archive-it.org/22143/20231103191454/https://project-thor.eu/2016/08/10/orcid-integration-in-pangaea") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/faeph-x4x84.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "570c8129-e867-49e6-8517-bd783627e76e", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("contributorRoles" => ["Author"], "type" => "Person", "familyName" => "Stocker", "givenName" => "Markus", "id" => "https://orcid.org/0000-0001-5492-3212") expect(subject.titles).to eq([{ "title" => "ORCID Integration Series: PANGAEA" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2016-08-10", "updated" => "2016-08-10") expect(subject.descriptions.first["description"]).to start_with("This is the first in a series of posts describing how THOR partners") expect(subject.publisher).to eq("name" => "Project THOR") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.funding_references).to eq([{ "awardNumber" => "654039", "funderIdentifier" => "https://doi.org/10.13039/501100007601", "funderIdentifierType" => "Crossref Funder ID", "funderName" => "European Union’s Horizon 2020 research and innovation programme" }]) expect(subject.container).to eq("identifier" => "https://project-thor.eu", "identifierType" => "URL", "title" => "Project THOR", "type" => "Periodical") expect(subject.archive_locations).to eq(["Internet Archive"]) end it "wordpress post with references" do input = "https://api.rogue-scholar.org/posts/4e4bf150-751f-4245-b4ca-fe69e3c3bb24" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/hke8v-d1e66") expect(subject.url).to eq("https://svpow.com/2023/06/09/new-paper-curtice-et-al-2023-on-the-first-haplocanthosaurus-from-dry-mesa") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/hke8v-d1e66.pdf") expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Wedel", "givenName" => "Matt", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-06-09", "updated" => "2023-06-09") expect(subject.descriptions.first["description"]).to start_with("Brian Curtice and Colin Boisvert are presenting our talk on this project") expect(subject.publisher).to eq("name" => "Sauropod Vertebra Picture of the Week") expect(subject.subjects).to eq([{ "subject" => "Earth and related environmental sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Earth and related environmental sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://svpow.com", "identifierType" => "URL", "title" => "Sauropod Vertebra Picture of the Week", "type" => "Periodical") expect(subject.references.length).to eq(3) expect(subject.references.first).to eq("key" => "ref1", "url" => "https://sauroposeidon.files.wordpress.com/2010/04/foster-and-wedel-2014-haplocanthosaurus-from-snowmass-colorado.pdf") end it "wordpress post with tracking code on url" do input = "https://api.rogue-scholar.org/posts/5d95d90d-ff59-4c8b-b7f8-44e6b45fd593" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/tpa8t-j6292") expect(subject.url).to eq("https://www.samuelmoore.org/2023/04/20/how-to-cultivate-good-closures-scaling-small-and-the-limits-of-openness") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/tpa8t-j6292.pdf") expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Moore", "givenName" => "Samuel", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "How to cultivate good closures: ‘scaling small’ and the limits of openness" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-04-20", "updated" => "2023-06-19") expect(subject.descriptions.first["description"]).to start_with("Text of a talk given to the COPIM end-of-project conference:") expect(subject.publisher).to eq("name" => "Samuel Moore") expect(subject.subjects).to eq([{ "subject" => "Social sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Social sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://www.samuelmoore.org", "identifierType" => "URL", "title" => "Samuel Moore", "type" => "Periodical") expect(subject.references).to be_nil end it "ghost post with institutional author" do input = "https://api.rogue-scholar.org/posts/2b3cdd27-5123-4167-9482-3c074392e2d2" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/tfahc-rp566") expect(subject.url).to eq("https://blog.oa.works/nature-features-oa-reports-work-putting-oa-policy-into-practice") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/tfahc-rp566.pdf") expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("name" => "OA.Works", "type" => "Organization", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Nature features OA.Report's work putting OA policy into practice!" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-01-24", "updated" => "2023-10-01") expect(subject.descriptions.first["description"]).to start_with("After a couple of years of working to support institutions implementing their OA policies") expect(subject.publisher).to eq("name" => "OA.Works Blog") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://blog.oa.works", "identifierType" => "URL", "title" => "OA.Works Blog", "type" => "Periodical") end it "upstream post with references" do input = "https://api.rogue-scholar.org/posts/954f8138-0ecd-4090-87c5-cef1297f1470" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.54900/zwm7q-vet94") expect(subject.url).to eq("https://upstream.force11.org/the-research-software-alliance-resa") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "954f8138-0ecd-4090-87c5-cef1297f1470", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(2) expect(subject.contributors.first).to eq("familyName" => "Katz", "givenName" => "Daniel S.", "id" => "https://orcid.org/0000-0001-5934-7525", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "The Research Software Alliance (ReSA)" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-04-18", "updated" => "2023-04-18") expect(subject.descriptions.first["description"]).to start_with("Research software is a key part of most research today.") expect(subject.publisher).to eq("name" => "Upstream") expect(subject.subjects).to eq([{ "subject" => "Humanities" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Humanities", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://upstream.force11.org", "identifierType" => "URL", "title" => "Upstream", "type" => "Periodical") expect(subject.references.length).to eq(11) expect(subject.references.first).to eq("key" => "ref1", "url" => "https://software.ac.uk/blog/2014-12-04-its-impossible-conduct-research-without-software-say-7-out-10-uk-researchers") end it "jekyll post" do input = "https://api.rogue-scholar.org/posts/efdacb04-bcec-49d7-b689-ab3eab0634bf" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/zwdq7-waa43") expect(subject.url).to eq("https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/zwdq7-waa43.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "efdacb04-bcec-49d7-b689-ab3eab0634bf", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(0) # expect(subject.contributors.first).to eq("familyName" => "Karcher", "givenName" => "Sebastian", # "id" => "https://orcid.org/0000-0001-8249-7388", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Seeking Public Comment on CSL 1.0.2 Release" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2020-07-11", "updated" => "2020-07-11") expect(subject.descriptions.first["description"]).to start_with("Over the past few months, Citation Style Language developers have worked to address a backlog of feature requests. This work will be reflected in two upcoming releases.") expect(subject.publisher).to eq("name" => "Citation Style Language") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://citationstyles.org", "identifierType" => "URL", "title" => "Citation Style Language", "type" => "Periodical") end it "ghost post with organizational author" do input = "https://api.rogue-scholar.org/posts/5561f8e4-2ff1-4186-a8d5-8dacb3afe414" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/2shz7-ehx26") expect(subject.url).to eq("https://libscie.org/ku-leuven-supports-researchequals") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/2shz7-ehx26.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5561f8e4-2ff1-4186-a8d5-8dacb3afe414", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("id" => "https://ror.org/0342dzm54", "name" => "Liberate Science", "type" => "Organization", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "KU Leuven supports ResearchEquals" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-05-09", "updated" => "2023-05-09") expect(subject.descriptions.first["description"]).to start_with("KU Leuven is now an inaugural supporting member of ResearchEquals") expect(subject.publisher).to eq("name" => "Liberate Science") expect(subject.subjects).to eq([{ "subject" => "Social sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Social sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://libscie.org", "identifierType" => "URL", "title" => "Liberate Science", "type" => "Periodical") expect(subject.references).to be_nil end it "jekyll post with anonymous author" do input = "https://api.rogue-scholar.org/posts/a163e340-5b3c-4736-9ab0-8c54fdff6a3c" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/33es7-fqz31") expect(subject.url).to eq("https://lab.sub.uni-goettingen.de/welcome.html") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/33es7-fqz31.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "a163e340-5b3c-4736-9ab0-8c54fdff6a3c", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Göbel", "givenName" => "Mathias", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Welcome to the Lab" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2017-01-01", "updated" => "2017-01-01") expect(subject.descriptions.first["description"]).to start_with("Welcome everyone!") expect(subject.publisher).to eq("name" => "lab.sub - Articles") expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Computer and information sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://lab.sub.uni-goettingen.de", "identifierType" => "URL", "title" => "lab.sub - Articles", "type" => "Periodical") expect(subject.references).to be_nil end it "blog post with non-url id" do input = "https://api.rogue-scholar.org/posts/1898d2d7-4d87-4487-96c4-3073cf99e9a5" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/63055-a8604") expect(subject.url).to eq("http://sfmatheson.blogspot.com/2023/01/quintessence-of-dust-2023-restart-why.html") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/63055-a8604.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1898d2d7-4d87-4487-96c4-3073cf99e9a5", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Matheson", "givenName" => "Stephen", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Quintessence of Dust 2023 restart: the why" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-01-09", "updated" => "2023-04-02") expect(subject.descriptions.first["description"]).to start_with("It's early January 2023, a little before sunset in Tucson.") expect(subject.publisher).to eq("name" => "Quintessence of Dust") expect(subject.subjects).to eq([{ "subject" => "Social sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Social sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "http://sfmatheson.blogspot.com", "identifierType" => "URL", "title" => "Quintessence of Dust", "type" => "Periodical") expect(subject.references).to be_nil end it "wordpress post with many references" do input = "https://api.rogue-scholar.org/posts/f3dc29da-0481-4f3b-8110-4c07260fca67" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/9gfjs-pey70") expect(subject.url).to eq("https://flavoursofopen.science/grundlagen-fur-die-entwicklung-einer-open-scholarship-strategie") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/9gfjs-pey70.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "f3dc29da-0481-4f3b-8110-4c07260fca67", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Steiner", "givenName" => "Tobias", "id" => "https://orcid.org/0000-0002-3158-3136", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "Grundlagen für die Entwicklung einer Open Scholarship-Strategie" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2019-01-30", "updated" => "2023-07-08") expect(subject.descriptions.first["description"]).to start_with("Versionshistorie Version 1.0 — 16. Oktober 2017") expect(subject.publisher).to eq("name" => "Flavours of Open") expect(subject.subjects).to eq([{ "subject" => "Humanities" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Humanities", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://flavoursofopen.science", "identifierType" => "URL", "title" => "Flavours of Open", "type" => "Periodical") expect(subject.references.length).to eq(56) expect(subject.references.first).to eq("key" => "ref1", "url" => "http://oerstrategy.org/home/read-the-doc") end it "substack post with broken reference" do input = "https://api.rogue-scholar.org/posts/2b105b29-acbc-4eae-9ff1-368803f36a4d" subject = described_class.new(input: input) # expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.59350/bbcsr-r4b59") expect(subject.url).to eq("https://markrubin.substack.com/p/the-preregistration-prescriptiveness") expect(subject.files.length).to eq(3) expect(subject.files[1]).to eq("mimeType" => "application/pdf", "url" => "https://api.rogue-scholar.org/posts/10.59350/bbcsr-r4b59.pdf") expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "2b105b29-acbc-4eae-9ff1-368803f36a4d", "alternateIdentifierType" => "UUID" }]) expect(subject.type).to eq("Article") expect(subject.contributors.length).to eq(1) expect(subject.contributors.first).to eq("familyName" => "Rubin", "givenName" => "Mark", "type" => "Person", "contributorRoles" => ["Author"]) expect(subject.titles).to eq([{ "title" => "The Preregistration Prescriptiveness Trade-Off and Unknown Unknowns in Science" }]) expect(subject.license).to eq("id" => "CC-BY-4.0", "url" => "https://creativecommons.org/licenses/by/4.0/legalcode") expect(subject.date).to eq("published" => "2023-06-06", "updated" => "2023-06-06") expect(subject.descriptions.first["description"]).to start_with("Abstract I discuss Van Drimmelen’s (2023) Metascience2023") expect(subject.publisher).to eq("name" => "Critical Metascience") expect(subject.subjects).to eq([{ "subject" => "Social sciences" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Social sciences", "subjectScheme" => "Fields of Science and Technology (FOS)" }]) expect(subject.language).to eq("en") expect(subject.container).to eq("identifier" => "https://markrubin.substack.com", "identifierType" => "URL", "title" => "Critical Metascience", "type" => "Periodical") expect(subject.references.length).to eq(16) expect(subject.references.first).to eq("doi" => "https://doi.org/10.3386/w27250", "key" => "ref1", "publicationYear" => "2020", "title" => "Research Registries: Facts, Myths, and Possible Improvements") end end context "get json_feed", vcr: true do it "unregistered posts" do response = subject.get_json_feed_unregistered expect(response).to eq("17d0e31c-bc41-42b8-b873-d3dacee61f5c") end it "updated posts" do response = subject.get_json_feed_updated expect(response).to eq("bb8932b9-2820-42f8-b22f-f191aaf00e32") end it "by blog_id" do response = subject.get_json_feed_by_blog("iphylo") expect(response).to eq("3749f8c4-1ba7-4e51-9dd6-9d9551ad451a") end end context "get doi_prefix for blog", vcr: true do it "by blog_id" do response = subject.get_doi_prefix_by_blog_slug("iphylo") expect(response).to eq("10.59350") end it "by blog post id" do response = subject.get_doi_prefix_by_json_feed_item_id("1898d2d7-4d87-4487-96c4-3073cf99e9a5") expect(response).to eq("10.59350") end it "by blog post id specific prefix" do response = subject.get_doi_prefix_by_json_feed_item_id("2b22bbba-bcba-4072-94cc-3f88442fff88") expect(response).to eq("10.54900") end end context "get blog_id for json_feed item id", vcr: true do it "by blog post id" do response = subject.get_json_feed_blog_slug("1898d2d7-4d87-4487-96c4-3073cf99e9a5") expect(response).to eq("sfmatheson") end it "not found" do response = subject.get_doi_prefix_by_json_feed_item_id("2b22bbba-bcba-4072-94cc-3f88442fff55") expect(response).to eq(nil) end end end