# frozen_string_literal: true require 'spec_helper' describe Briard::Metadata, vcr: true do let(:fixture_path) { 'spec/fixtures/' } context 'get schema_org raw' do it 'BlogPosting' do input = "#{fixture_path}schema_org.json" subject = described_class.new(input: input) expect(subject.raw).to eq(File.read(input).strip) end end context 'get schema_org metadata' do it 'BlogPosting' do input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.53731/r79vxn1-97aq74v-ag58n') expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article') expect(subject.creators).to eq([{ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }]) expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }]) expect(subject.descriptions.first['description']).to start_with('Eating your own dog food') expect(subject.subjects).to eq([{ 'subject' => 'feature' }]) expect(subject.dates).to eq([{ 'date' => '2016-12-20T00:00:00Z', 'dateType' => 'Issued' }, { 'date' => '2022-08-15T09:06:22Z', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2016') expect(subject.related_identifiers.length).to eq(0) expect(subject.publisher).to eq('Front Matter') end it 'BlogPosting with new DOI' do input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food' subject = described_class.new(input: input, doi: '10.5438/0000-00ss') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.5438/0000-00ss') expect(subject.doi).to eq('10.5438/0000-00ss') expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article') end it 'BlogPosting with type as array' do input = "#{fixture_path}schema_org_type_as_array.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg') expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting') expect(subject.creators).to eq([{ 'affiliation' => [{ 'name' => 'DataCite' }], 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }]) expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }]) expect(subject.descriptions.first['description']).to start_with('Eating your own dog food') expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' }, { 'subject' => 'metadata' }, { 'subject' => 'featured' }]) expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' }, { 'date' => '2016-12-20', 'dateType' => 'Created' }, { 'date' => '2016-12-20', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2016') expect(subject.related_identifiers.length).to eq(3) expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0', 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text') expect(subject.publisher).to eq('DataCite') end context 'get schema_org metadata front matter' do it 'BlogPosting' do input = 'https://blog.front-matter.io/posts/step-forward-for-software-citation' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.53731/r9531p1-97aq74v-ag78v') expect(subject.url).to eq('https://blog.front-matter.io/posts/step-forward-for-software-citation') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article') expect(subject.creators).to eq([{ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }]) expect(subject.titles).to eq([{ 'title' => 'A step forward for software citation: GitHub's enhanced software citation support' }]) expect(subject.descriptions.first['description']).to start_with('On August 19, GitHub announced software citation') expect(subject.subjects).to eq([{ 'subject' => 'news' }]) expect(subject.dates).to eq([{ 'date' => '2021-08-24T16:57:24Z', 'dateType' => 'Issued' }, { 'date' => '2022-08-15T19:05:14Z', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2021') expect(subject.related_identifiers.length).to eq(0) expect(subject.container).to eq('identifier' => '2749-9952', 'identifierType' => 'ISSN', 'title' => 'Front Matter', 'type' => 'Blog') expect(subject.publisher).to eq('Front Matter') end end it 'zenodo' do input = 'https://www.zenodo.org/record/1196821' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.language).to eq('eng') expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1196821') expect(subject.doi).to eq('10.5281/zenodo.1196821') expect(subject.url).to eq('https://zenodo.org/record/1196821') expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.titles).to eq([{ 'title' => 'PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US' }]) expect(subject.creators.size).to eq(6) expect(subject.creators.first).to eq('name' => 'Staib, Matthias', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0001-9688-838X', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal', 'givenName' => 'Matthias', 'familyName' => 'Staib', 'affiliation' => [{ 'name' => 'University of Zurich, Zurich, Switzerland' }]) expect(subject.publisher).to eq('Zenodo') expect(subject.publication_year).to eq('2018') expect(subject.subjects).to eq([{ 'subject' => 'pupil size response' }, { 'subject' => 'skin conductance response' }, { 'subject' => 'electrocardiogram' }, { 'subject' => 'electromyogram' }, { 'subject' => 'electrodermal activity' }, { 'subject' => 'galvanic skin response' }, { 'subject' => 'psr' }, { 'subject' => 'scr' }, { 'subject' => 'ecg' }, { 'subject' => 'emg' }, { 'subject' => 'eda' }, { 'subject' => 'gsr' }]) end it 'pangaea' do input = 'https://doi.pangaea.de/10.1594/PANGAEA.836178' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.1594/pangaea.836178') expect(subject.doi).to eq('10.1594/pangaea.836178') expect(subject.url).to eq('https://doi.pangaea.de/10.1594/PANGAEA.836178') expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.titles).to eq([{ 'title' => 'Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland' }]) expect(subject.creators.size).to eq(8) expect(subject.creators.first).to eq('nameType' => 'Personal', 'name' => 'Johansson, Emma', 'givenName' => 'Emma', 'familyName' => 'Johansson') expect(subject.publisher).to eq('PANGAEA') expect(subject.publication_year).to eq('2014') end # TODO: check redirections # it "ornl" do # input = "https://doi.org/10.3334/ornldaac/1339" # subject = Briard::Metadata.new(input: input, from: "schema_org") # expect(subject.valid?).to be true # expect(subject.id).to eq("https://doi.org/10.3334/ornldaac/1339") # expect(subject.doi).to eq("10.3334/ornldaac/1339") # expect(subject.url).to eq("https://doi.org/10.3334/ornldaac/1339") # expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"article-journal", "ris"=>"GEN", "schemaOrg"=>"DataSet") # expect(subject.titles).to eq([{"title"=>"Soil Moisture Profiles and Temperature Data from SoilSCAPE Sites, USA"}]) # expect(subject.creators.size).to eq(12) # expect(subject.creators.first).to eq("familyName"=>"MOGHADDAM", "givenName"=>"M.", "name"=>"MOGHADDAM, M.", "nameType"=>"Personal", "nameIdentifiers"=>[], "affiliation" => []) # end it 'harvard dataverse' do input = 'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.7910/dvn/nj7xso') expect(subject.doi).to eq('10.7910/dvn/nj7xso') expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.titles).to eq([{ 'title' => 'Summary data ankylosing spondylitis GWAS' }]) expect(subject.container).to eq('identifier' => 'https://dataverse.harvard.edu', 'identifierType' => 'URL', 'title' => 'Harvard Dataverse', 'type' => 'DataRepository') expect(subject.creators).to eq([{ 'name' => 'International Genetics of Ankylosing Spondylitis Consortium (IGAS)' }]) expect(subject.subjects).to eq([{ 'subject' => 'medicine, health and life sciences' }, { 'subject' => 'genome-wide association studies' }, { 'subject' => 'ankylosing spondylitis' }]) end it 'upstream blog' do input = 'https://upstream.force11.org/elife-reviewed-preprints-interview-with-fiona-hutton' subject = described_class.new(input: input, from: 'schema_org') expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.54900/8d7emer-rm2pg72') expect(subject.doi).to eq('10.54900/8d7emer-rm2pg72') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article') expect(subject.titles).to eq([{ 'title' => 'eLife Reviewed Preprints: Interview with Fiona Hutton' }]) expect(subject.container).to eq('identifier' => 'https://upstream.force11.org/', 'identifierType' => 'URL', 'title' => 'Upstream', 'type' => 'Blog') expect(subject.creators.size).to eq(2) expect(subject.creators.first).to eq("familyName"=>"Hutton", "givenName" => "Fiona", "name" => "Hutton, Fiona", "nameType" => "Personal") expect(subject.subjects).to eq([{ 'subject' => 'interviews' }]) expect(subject.publisher).to eq('Upstream') expect(subject.dates).to eq([{ 'date' => '2022-11-15T10:29:38Z', 'dateType' => 'Issued' }, { 'date' => '2023-01-11T22:58:48Z', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2022') expect(subject.rights_list).to eq([{"rights"=>"Creative Commons Attribution 4.0 International", "rightsUri"=>"https://creativecommons.org/licenses/by/4.0/legalcode", "rightsIdentifier"=>"cc-by-4.0", "rightsIdentifierScheme"=>"SPDX", "schemeUri"=>"https://spdx.org/licenses/"}]) end # TODO: check 403 status in DOI resolver # it "harvard dataverse via identifiers.org" do # input = "https://identifiers.org/doi/10.7910/DVN/NJ7XSO" # subject = Briard::Metadata.new(input: input, from: "schema_org") # expect(subject.valid?).to be true # expect(subject.id).to eq("https://doi.org/10.7910/dvn/nj7xso") # expect(subject.doi).to eq("10.7910/dvn/nj7xso") # expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset") # expect(subject.titles).to eq([{"title"=>"Summary data ankylosing spondylitis GWAS"}]) # expect(subject.container).to eq("identifier"=>"https://dataverse.harvard.edu", "identifierType"=>"URL", "title"=>"Harvard Dataverse", "type"=>"DataRepository") # expect(subject.creators).to eq([{"name" => "International Genetics Of Ankylosing Spondylitis Consortium (IGAS)", "nameIdentifiers"=>[], "affiliation" => []}]) # end end context 'get schema_org metadata as string' do it 'BlogPosting' do input = "#{fixture_path}schema_org.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.language).to eq('en') expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg') expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food') expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog', 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting') expect(subject.creators).to eq([{ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', + 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }]) expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }]) expect(subject.descriptions.first['description']).to start_with('Eating your own dog food') expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' }, { 'subject' => 'metadata' }, { 'subject' => 'featured' }]) expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' }, { 'date' => '2016-12-20', 'dateType' => 'Created' }, { 'date' => '2016-12-20', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2016') expect(subject.related_identifiers.length).to eq(3) expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0', 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text') expect(subject.publisher).to eq('DataCite') end it 'GTEx dataset' do input = "#{fixture_path}schema_org_gtex.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.25491/d50j-3083') expect(subject.identifiers).to eq([{ 'identifier' => '687610993', 'identifierType' => 'md5' }]) expect(subject.url).to eq('https://ors.datacite.org/doi:/10.25491/d50j-3083') expect(subject.content_url).to eq(['https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz']) expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceType' => 'Gene expression matrices', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.creators).to eq([{ 'name' => 'The GTEx Consortium', 'nameType' => 'Organizational'}]) expect(subject.titles).to eq([{ 'title' => 'Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery' }]) expect(subject.version_info).to eq('v7') expect(subject.subjects).to eq([{ 'subject' => 'gtex' }, { 'subject' => 'annotation' }, { 'subject' => 'phenotype' }, { 'subject' => 'gene regulation' }, { 'subject' => 'transcriptomics' }]) expect(subject.dates).to eq([{ 'date' => '2017', 'dateType' => 'Issued' }]) expect(subject.publication_year).to eq('2017') expect(subject.container).to eq('title' => 'GTEx', 'type' => 'DataRepository') expect(subject.publisher).to eq('GTEx') expect(subject.funding_references.length).to eq(7) expect(subject.funding_references.first).to eq( 'funderIdentifier' => 'https://doi.org/10.13039/100000052', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'Common Fund of the Office of the Director of the NIH' ) end it 'TOPMed dataset' do input = "#{fixture_path}schema_org_topmed.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.identifiers).to eq([{ 'identifier' => '3b33f6b9338fccab0901b7d317577ea3', 'identifierType' => 'md5' }, { 'identifier' => 'ark:/99999/fk41CrU4eszeLUDe', 'identifierType' => 'minid' }, { 'identifier' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7', 'identifierType' => 'dataguid' }]) expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/8na3-9s47') expect(subject.content_url).to eq([ 's3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram', 'gs://topmed-irc-share/public/NWD165827.recab.cram' ]) expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.creators).to eq([{ 'name' => 'TOPMed IRC', 'nameType' => 'Organizational' }]) expect(subject.titles).to eq([{ 'title' => 'NWD165827.recab.cram' }]) expect(subject.subjects).to eq([{ 'subject' => 'topmed' }, { 'subject' => 'whole genome sequencing' }]) expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }]) expect(subject.publication_year).to eq('2017') expect(subject.publisher).to eq('TOPMed') expect(subject.related_identifiers).to eq([{ 'relatedIdentifier' => '10.23725/2g4s-qv04', 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Dataset' }]) expect(subject.funding_references).to eq([{ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)' }]) end it 'tdl_iodp dataset' do input = "#{fixture_path}schema_org_tdl_iodp_invalid_authors.json" subject = described_class.new(input: input) expect(subject.valid?).to be false end it 'geolocation' do input = "#{fixture_path}schema_org_geolocation.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.identifiers).to eq([{ 'identifier' => 'https://doi.org/10.6071/z7wc73', 'identifierType' => 'DOI' }]) expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceType' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.creators.length).to eq(6) expect(subject.creators.first).to eq('familyName' => 'Bales', 'givenName' => 'Roger', 'name' => 'Bales, Roger', 'nameType' => 'Personal') expect(subject.titles).to eq([{ 'title' => 'Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature' }]) expect(subject.subjects).to eq([{ 'subject' => 'earth sciences' }, { 'subject' => 'soil moisture' }, { 'subject' => 'soil temperature' }, { 'subject' => 'snow depth' }, { 'subject' => 'air temperature' }, { 'subject' => 'water balance' }, { 'subject' => 'nevada' }, { 'subject' => 'sierra (mountain range)' }]) expect(subject.dates).to eq([{ 'date' => '2013', 'dateType' => 'Issued' }, { 'date' => '2014-10-17', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2013') expect(subject.publisher).to eq('UC Merced') expect(subject.funding_references).to eq([{ 'funderName' => 'National Science Foundation, Division of Earth Sciences, Critical Zone Observatories' }]) expect(subject.geo_locations).to eq([{ 'geoLocationPlace' => 'Providence Creek (Lower, Upper and P301)', 'geoLocationPoint' => { 'pointLatitude' => '37.047756', 'pointLongitude' => '-119.221094' } }]) end it 'geolocation geoshape' do input = "#{fixture_path}schema_org_geoshape.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.language).to eq('en') expect(subject.id).to eq('https://doi.org/10.1594/pangaea.842237') expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.creators.length).to eq(2) expect(subject.creators.first).to eq('name' => 'Tara Oceans Consortium, Coordinators', 'nameType' => 'Organizational') expect(subject.titles).to eq([{ 'title' => 'Registry of all stations from the Tara Oceans Expedition (2009-2013)' }]) expect(subject.dates).to eq([{ 'date' => '2015-02-03', 'dateType' => 'Issued' }]) expect(subject.publication_year).to eq('2015') expect(subject.publisher).to eq('PANGAEA') expect(subject.geo_locations).to eq([{ 'geoLocationBox' => { 'eastBoundLongitude' => '174.9006', 'northBoundLatitude' => '79.6753', 'southBoundLatitude' => '-64.3088', 'westBoundLongitude' => '-168.5182' } }]) end it 'schema_org list' do data = File.read("#{fixture_path}schema_org_list.json").strip input = JSON.parse(data).first.to_json subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.23725/7jg3-v803') expect(subject.identifiers).to eq([{ 'identifier' => 'ark:/99999/fk4E1n6n1YHKxPk', 'identifierType' => 'minid' }, { 'identifier' => 'dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d', 'identifierType' => 'dataguid' }, { 'identifier' => 'f9e72bdf25bf4b4f0e581d9218fec2eb', 'identifierType' => 'md5' }]) expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/7jg3-v803') expect(subject.content_url).to eq([ 's3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram', 'gs://topmed-irc-share/public/NWD100953.recab.cram', 'dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z' ]) expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') expect(subject.creators).to eq([{ 'name' => 'TOPMed', 'nameType' => 'Organizational' }]) expect(subject.titles).to eq([{ 'title' => 'NWD100953.recab.cram' }]) expect(subject.subjects).to eq([{ 'subject' => 'topmed' }, { 'subject' => 'whole genome sequencing' }]) expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }]) expect(subject.publication_year).to eq('2017') expect(subject.publisher).to eq('TOPMed') expect(subject.funding_references).to eq([{ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)' }]) end it 'aida dataset' do input = "#{fixture_path}aida.json" subject = described_class.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq('https://doi.org/10.23698/aida/drov') expect(subject.url).to eq('https://doi.aida.medtech4health.se/10.23698/aida/drov') expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset') # expect(subject.creators).to eq([{"familyName"=>"Lindman", "givenName"=>"Karin", "name"=>"Lindman, Karin", "nameIdentifiers"=>[{"nameIdentifier"=> "https://orcid.org/0000-0003-1298-517X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}]) expect(subject.titles).to eq([{ 'title' => 'Ovary data from the Visual Sweden project DROID' }]) expect(subject.version_info).to eq('1.0') expect(subject.subjects).to eq([{ 'subject' => 'pathology' }, { 'subject' => 'whole slide imaging' }, { 'subject' => 'annotated' }]) expect(subject.dates).to eq([{ 'date' => '2019-01-09', 'dateType' => 'Issued' }, { 'date' => '2019-01-09', 'dateType' => 'Created' }, { 'date' => '2019-01-09', 'dateType' => 'Updated' }]) expect(subject.publication_year).to eq('2019') expect(subject.id).to eq('https://doi.org/10.23698/aida/drov') expect(subject.publisher).to eq('AIDA') expect(subject.rights_list).to eq([{ 'rights' => 'Restricted access', 'rightsUri' => 'https://datasets.aida.medtech4health.se/10.23698/aida/drov#license' }]) expect(subject.id).to eq('https://doi.org/10.23698/aida/drov') end it 'from attributes' do subject = described_class.new(input: nil, from: 'schema_org', doi: '10.5281/zenodo.1239', creators: [{ 'type' => 'Person', 'name' => 'Jahn, Najko', 'givenName' => 'Najko', 'familyName' => 'Jahn' }], titles: [{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }], descriptions: [{ 'description' => 'The dataset contains a sample of metadata describing papers' }], publisher: 'Zenodo', publication_year: '2013', dates: [{ 'date' => '2013-04-03', 'dateType' => 'Issued' }], funding_references: [{ 'awardNumber' => '246686', 'awardTitle' => 'Open Access Infrastructure for Research in Europe', 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/', 'funderIdentifier' => 'https://doi.org/10.13039/501100000780', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'European Commission' }], types: { 'resourceTypeGeneral' => 'Dataset', 'schemaOrg' => 'Dataset' }) expect(subject.valid?).to be true expect(subject.doi).to eq('10.5281/zenodo.1239') expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1239') expect(subject.types['schemaOrg']).to eq('Dataset') expect(subject.types['resourceTypeGeneral']).to eq('Dataset') expect(subject.creators).to eq([{ 'familyName' => 'Jahn', 'givenName' => 'Najko', 'name' => 'Jahn, Najko', 'type' => 'Person' }]) expect(subject.titles).to eq([{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }]) expect(subject.descriptions.first['description']).to start_with('The dataset contains a sample of metadata describing papers') expect(subject.dates).to eq([{ 'date' => '2013-04-03', 'dateType' => 'Issued' }]) expect(subject.publication_year).to eq('2013') expect(subject.publisher).to eq('Zenodo') expect(subject.funding_references).to eq([{ 'awardNumber' => '246686', 'awardTitle' => 'Open Access Infrastructure for Research in Europe', 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/', 'funderIdentifier' => 'https://doi.org/10.13039/501100000780', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'European Commission' }]) end end end