# frozen_string_literal: true

require 'spec_helper'

describe Bolognese::Metadata, vcr: true do
  context "write metadata as schema_org" do
    it "journal article" do
      input = "10.7554/eLife.01567"
      subject = Bolognese::Metadata.new(input: input, from: "crossref")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.7554/elife.01567")
      expect(json["isPartOf"]).to eq("@type"=>"Periodical", "name"=>"eLife", "issn"=>"2050-084X")
      expect(json["citation"].length).to eq(26)
      expect(json["citation"].first).to eq("@id"=>"https://doi.org/10.1038/nature02100", "@type"=>"CreativeWork", "name" => "APL regulates vascular tissue identity in Arabidopsis")
      expect(json["funding"]).to eq([{"name"=>"SystemsX", "@type"=>"Organization"},
                                     {"name"=>"EMBO",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100003043"},
                                     {"name"=>"Swiss National Science Foundation",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100001711"},
                                     {"name"=>"University of Lausanne",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100006390"}])
    end

    it "maremma schema.org JSON" do
      input = "https://github.com/datacite/maremma"
      subject = Bolognese::Metadata.new(input: input, from: "codemeta")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5438/qeg0-3gm3")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("Maremma: a Ruby library for simplified network calls")
      expect(json["author"]).to eq("name"=>"Martin Fenner", "givenName"=>"Martin", "familyName"=>"Fenner", "@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-0077-4738")
    end

    it "Schema.org JSON" do
      input = "https://doi.org/10.5281/ZENODO.48440"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5281/zenodo.48440")
      expect(json["name"]).to eq("Analysis Tools For Crossover Experiment Of Ui Using Choice Architecture")
    end

    it "Schema.org JSON isReferencedBy" do
      input = "https://doi.org/10.5061/DRYAD.8515"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5061/dryad.8515")
      expect(json["@reverse"]).to eq("citation"=>{"@id"=>"https://doi.org/10.1371/journal.ppat.1000446"}, "isBasedOn"=>{"@id"=>"https://doi.org/10.1371/journal.ppat.1000446"})
    end

    it "Schema.org JSON IsSupplementTo" do
      input = "https://doi.org/10.5517/CC8H01S"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5517/cc8h01s")
      expect(json["@reverse"]).to eq("isBasedOn"=>{"@id"=>"https://doi.org/10.1107/s1600536804021154"})
    end

    it "rdataone" do
      input = fixture_path + 'codemeta.json'
      subject = Bolognese::Metadata.new(input: input, from: "codemeta")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5063/f1m61h5x")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("R Interface to the DataONE REST API")
      expect(json["author"]).to eq([{"name"=>"Matt Jones",
                                     "givenName"=>"Matt",
                                     "familyName"=>"Jones",
                                     "@type"=>"Person",
                                     "@id"=>"http://orcid.org/0000-0003-0077-4738"},
                                    {"name"=>"Peter Slaughter",
                                     "givenName"=>"Peter",
                                     "familyName"=>"Slaughter",
                                     "@type"=>"Person",
                                     "@id"=>"http://orcid.org/0000-0002-2192-403X"},
                                    {"name"=>"University Of California, Santa Barbara", "@type"=>"Organization"}])
      expect(json["version"]).to eq("2.0.0")
    end

    it "Funding" do
      input = "https://doi.org/10.5438/6423"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5438/6423")
      expect(json["hasPart"].length).to eq(25)
      expect(json["hasPart"].first).to eq("@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5281/zenodo.30799")
      expect(json["funding"]).to eq("@type" => "Award",
                                    "funder" => {"@type"=>"Organization", "@id"=>"https://doi.org/10.13039/501100000780", "name"=>"European Commission"},
                                    "identifier" => "654039",
                                    "name" => "THOR – Technical and Human Infrastructure for Open Research",
                                    "url" => "http://cordis.europa.eu/project/rcn/194927_en.html")
    end

    it "Funding OpenAIRE" do
      input = "https://doi.org/10.5281/ZENODO.1239"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5281/zenodo.1239")
      expect(json["funding"]).to eq("@type" => "Award",
                                    "funder" => {"@type"=>"Organization", "@id"=>"https://doi.org/10.13039/501100000780", "name"=>"European Commission"},
                                    "identifier" => "246686",
                                    "name" => "Open Access Infrastructure for Research in Europe",
                                    "url" => "info:eu-repo/grantAgreement/EC/FP7/246686/")
    end

    it "subject scheme" do
      input = "https://doi.org/10.4232/1.2745"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.4232/1.2745")
      expect(json["name"]).to eq("Flash Eurobarometer 54 (Madrid Summit)")
      expect(json["keywords"]).to eq("KAT12 International Institutions, Relations, Conditions")
    end

    it "subject scheme multiple keywords" do
      input = "https://doi.org/10.1594/pangaea.721193"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.1594/pangaea.721193")
      expect(json["name"]).to eq("Seawater carbonate chemistry and processes during experiments with Crassostrea gigas, 2007, supplement to: Kurihara, Haruko; Kato, Shoji; Ishimatsu, Atsushi (2007): Effects of increased seawater pCO2 on early development of the oyster Crassostrea gigas. Aquatic Biology, 1(1), 91-98")
      expect(json["keywords"]).to eq("GetInfo, Animalia, Bottles or small containers/Aquaria ( 20 L), Calcification/Dissolution, Coast and continental shelf, Crassostrea gigas, Development, Growth/Morphology, Laboratory experiment, Mollusca, Pelagos, Single species, Temperate, Zooplankton, Experimental treatment, Carbonate system computation flag, Temperature, water, Salinity, pH, Alkalinity, total, Carbon, inorganic, dissolved, Carbon dioxide, Bicarbonate ion, Carbonate ion, Partial pressure of carbon dioxide (water) at sea surface temperature (wet air), Fugacity of carbon dioxide (water) at sea surface temperature (wet air), Aragonite saturation state, Calcite saturation state, Proportion, Crassostrea gigas, larvae length, Crassostrea gigas, larvae height, Crassostrea gigas, non mineralized, Crassostrea gigas, partially mineralized, Crassostrea gigas, fully mineralized, Calculated using seacarb after Nisumaa et al. (2010), Refractometer (Atago 100-S), pH meter (Mettler Toledo), pH meter (PHM290, Radiometer), Measured, European Project on Ocean Acidification (EPOCA), European network of excellence for Ocean Ecosystems Analysis (EUR-OCEANS), Ocean Acidification International Coordination Centre (OA-ICC)")
    end

    it "author is organization" do
      input = fixture_path + 'gtex.xml'
      b_url = "https://ors.datacite.org/doi:/10.25491/9hx8-ke93"
      content_url = "https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz"  
      subject = Bolognese::Metadata.new(input: input, b_url: b_url, content_url: content_url, from: "datacite")
      json = JSON.parse(subject.schema_org)
      puts json
      expect(json["@id"]).to eq("https://doi.org/10.25491/9hx8-ke93")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.25491/9hx8-ke93")
      expect(json["encodingFormat"]).to eq("application/tar")
      expect(json["contentSize"]).to eq("15.7M")
      expect(json["contentUrl"]).to eq("https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz")
      expect(json["includedInDataCatalog"]).to eq("@id"=>"https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000663", "@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["@reverse"]).to eq("isBasedOn"=>{"@id"=>"https://doi.org/10.1038/nmeth.4407"})
    end

    it "series information" do
      input = "10.4229/23RDEUPVSEC2008-5CO.8.3"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["isPartOf"]).to eq("@type"=>"Periodical", "name"=>"23rd European Photovoltaic Solar Energy Conference and Exhibition, 1-5 September 2008, Valencia, Spain; 3353-3356")
      expect(json["@id"]).to eq("https://doi.org/10.4229/23rdeupvsec2008-5co.8.3")
      expect(json["@type"]).to eq("ScholarlyArticle")
      expect(json["name"]).to eq("Rural Electrification With Hybrid Power Systems Based on Renewables - Technical System Configurations From the Point of View of the European Industry")
      expect(json["author"].count).to eq(3)
      expect(json["author"].first).to eq("@type"=>"Person", "name"=>"P. Llamas", "givenName"=>"P.", "familyName"=>"Llamas")
    end

    it "data catalog" do
      input = "10.25491/8KMC-G314"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.25491/8kmc-g314")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Covariates used in eQTL analysis. Includes genotyping principal components and PEER factors")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["includedInDataCatalog"]).to eq("@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["identifier"]).to eq([{"@type"=>"PropertyValue", "propertyID"=>"doi", "value"=>"https://doi.org/10.25491/8kmc-g314"},{"@type"=>"PropertyValue", "propertyID"=>"md5", "value"=>"c7c89fe7366d50cd75448aa603c9de58"}])
      expect(json["contentUrl"]).to eq(["https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_covariates.tar.gz"])
    end

    it "alternate identifiers" do
      input = "10.23725/8na3-9s47"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.23725/8na3-9s47")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("NWD165827.recab.cram")
      expect(json["author"]).to eq("name"=>"TOPMed")
      expect(json["includedInDataCatalog"]).to be_nil
      expect(json["identifier"]).to eq(
        [{"@type"=>"PropertyValue",
          "propertyID"=>"doi",
          "value"=>"https://doi.org/10.23725/8na3-9s47"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"minid",
          "value"=>"ark:/99999/fk41CrU4eszeLUDe"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"dataguid",
          "value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"md5",
          "value"=>"3b33f6b9338fccab0901b7d317577ea3"}]
      )
      expect(json["contentUrl"]).to include("s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram")
    end

    it "from schema_org gtex" do
      input = fixture_path + 'schema_org_gtex.json'
      subject = Bolognese::Metadata.new(input: input, from: "schema_org")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.25491/d50j-3083")
      expect(json["@type"]).to eq("Dataset")
      expect(json["identifier"]).to eq([{"@type"=>"PropertyValue", "propertyID"=>"doi", "value"=>"https://doi.org/10.25491/d50j-3083"}, {"@type"=>"PropertyValue", "propertyID"=>"md5", "value"=>"687610993"}])
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.25491/d50j-3083")
      expect(json["additionalType"]).to eq("Gene expression matrices")
      expect(json["name"]).to eq("Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery")
      expect(json["version"]).to eq("v7")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["keywords"]).to eq("gtex, annotation, phenotype, gene regulation, transcriptomics")
      expect(json["datePublished"]).to eq("2017")
      expect(json["contentUrl"]).to eq("https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz")
      expect(json["schemaVersion"]).to eq("http://datacite.org/schema/kernel-4")
      expect(json["includedInDataCatalog"]).to eq("@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["publisher"]).to eq("@type"=>"Organization", "name"=>"GTEx")
      expect(json["funding"]).to eq([{"@id"=>"https://doi.org/10.13039/100000052",
        "name"=>"Common Fund of the Office of the Director of the NIH",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000054",
        "name"=>"National Cancer Institute (NCI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000051",
        "name"=>"National Human Genome Research Institute (NHGRI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000050",
        "name"=>"National Heart, Lung, and Blood Institute (NHLBI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000026",
        "name"=>"National Institute on Drug Abuse (NIDA)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000025",
        "name"=>"National Institute of Mental Health (NIMH)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000065",
        "name"=>"National Institute of Neurological Disorders and Stroke (NINDS)",
        "@type"=>"Organization"}])
      expect(json["provider"]).to eq("@type"=>"Organization", "name"=>"DataCite")
    end

    it "from schema_org topmed" do
      input = fixture_path + 'schema_org_topmed.json'
      subject = Bolognese::Metadata.new(input: input, from: "schema_org")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.23725/8na3-9s47")
      expect(json["@type"]).to eq("Dataset")
      expect(json["identifier"]).to eq(
        [{"@type"=>"PropertyValue",
          "propertyID"=>"doi",
          "value"=>"https://doi.org/10.23725/8na3-9s47"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"md5",
          "value"=>"3b33f6b9338fccab0901b7d317577ea3"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"minid",
          "value"=>"ark:/99999/fk41CrU4eszeLUDe"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"dataguid",
          "value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}])
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.23725/8na3-9s47")
      expect(json["additionalType"]).to eq("CRAM file")
      expect(json["name"]).to eq("NWD165827.recab.cram")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"TOPMed IRC")
      expect(json["keywords"]).to eq("topmed, whole genome sequencing")
      expect(json["datePublished"]).to eq("2017-11-30")
      expect(json["contentUrl"]).to eq(["s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram"])
      expect(json["schemaVersion"]).to eq("http://datacite.org/schema/kernel-4")
      expect(json["publisher"]).to eq("@type"=>"Organization", "name"=>"TOPMed")
      expect(json["citation"]).to eq("@id"=>"https://doi.org/10.23725/2g4s-qv04", "@type"=>"Dataset")
      expect(json["funding"]).to eq("@id"=>"https://doi.org/10.13039/100000050", "@type"=>"Organization", "name"=>"National Heart, Lung, and Blood Institute (NHLBI)")
      expect(json["provider"]).to eq("@type"=>"Organization", "name"=>"DataCite")
    end
  end
end