# frozen_string_literal: true

require 'spec_helper'

describe Bolognese::Metadata, vcr: true do
  context "write metadata as schema_org" do
    it "journal article" do
      input = "10.7554/eLife.01567"
      subject = Bolognese::Metadata.new(input: input, from: "crossref")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.7554/elife.01567")
      expect(json["@type"]).to eq("ScholarlyArticle")
      expect(json["isPartOf"]).to eq("@type"=>"Periodical", "issn"=>"2050-084X")
      expect(json["periodical"]).to eq("@type"=>"Journal", "firstPage" => "e01567", "identifier"=>"2050-084X", "identifierType"=>"ISSN", "name"=>"eLife", "volume"=>"3")
      expect(json["citation"].length).to eq(26)
      expect(json["citation"].first).to eq("@id"=>"https://doi.org/10.1038/nature02100", "@type"=>"CreativeWork")
      expect(json["funder"]).to eq([{"name"=>"SystemsX", "@type"=>"Organization"},
                                     {"name"=>"EMBO",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100003043"},
                                     {"name"=>"Swiss National Science Foundation",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100001711"},
                                     {"name"=>"University of Lausanne",
                                      "@type"=>"Organization",
                                      "@id"=>"https://doi.org/10.13039/501100006390"}])
      expect(json["license"]).to eq("https://creativecommons.org/licenses/by/3.0/legalcode")
    end

    it "maremma schema.org JSON" do
      input = "https://github.com/datacite/maremma"
      subject = Bolognese::Metadata.new(input: input, from: "codemeta")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5438/qeg0-3gm3")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("Maremma: a Ruby library for simplified network calls")
      expect(json["author"]).to eq("name"=>"Martin Fenner", "givenName"=>"Martin", "familyName"=>"Fenner", "@type"=>"Person", "@id"=>"https://orcid.org/0000-0003-0077-4738", "affiliation" => {"@type"=>"Organization", "name"=>"DataCite"})
    end

    it "Schema.org JSON" do
      input = "https://doi.org/10.5281/ZENODO.48440"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5281/zenodo.48440")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("Analysis Tools For Crossover Experiment Of Ui Using Choice Architecture")
      expect(json["license"]).to eq(["https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode", "info:eu-repo/semantics/openAccess"])
    end

    it "Schema.org JSON isReferencedBy" do
      input = "https://doi.org/10.5061/DRYAD.8515"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5061/dryad.8515")
      expect(json["@type"]).to eq("Dataset")
      expect(json["citation"]).to eq("@id"=>"https://doi.org/10.1371/journal.ppat.1000446", "@type"=>"CreativeWork")
      expect(json["license"]).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
      expect(json["keywords"]).to eq("Plasmodium, malaria, taxonomy, mitochondrial genome, Parasites")
    end

    it "Schema.org JSON IsSupplementTo" do
      input = "https://doi.org/10.5517/CC8H01S"
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5517/cc8h01s")
      expect(json["@type"]).to eq("Dataset")
      expect(json["@reverse"]).to eq("isBasedOn"=>{"@id"=>"https://doi.org/10.1107/s1600536804021154", "@type"=>"ScholarlyArticle"})
    end

    it "Schema.org JSON Cyark" do
      input = "https://doi.org/10.26301/jgf3-jm06"
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.26301/jgf3-jm06")
      expect(json["@type"]).to eq("Dataset")
    end

    it "rdataone" do
      input = fixture_path + 'codemeta.json'
      subject = Bolognese::Metadata.new(input: input, from: "codemeta")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5063/f1m61h5x")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("R Interface to the DataONE REST API")
      expect(json["author"]).to eq([{"name"=>"Matt Jones",
                                     "givenName"=>"Matt",
                                     "familyName"=>"Jones",
                                     "@type"=>"Person",
                                     "@id"=>"https://orcid.org/0000-0003-0077-4738",
                                     "affiliation"=>{"@type"=>"Organization", "name"=>"NCEAS"}},
                                    {"name"=>"Peter Slaughter",
                                     "givenName"=>"Peter",
                                     "familyName"=>"Slaughter",
                                     "@type"=>"Person",
                                     "@id"=>"https://orcid.org/0000-0002-2192-403X",
                                     "affiliation"=>{"@type"=>"Organization", "name"=>"NCEAS"}},
                                    {"name"=>"University Of California, Santa Barbara", "@type"=>"Organization"}])
      expect(json["version"]).to eq("2.0.0")
      expect(json["keywords"]).to eq("data sharing, data repository, DataONE")
    end

    it "Funding" do
      input = "https://doi.org/10.5438/6423"
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5438/6423")
      expect(json["@type"]).to eq("Collection")
      expect(json["hasPart"].length).to eq(25)
      expect(json["hasPart"].first).to eq("@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5281/zenodo.30799")
      expect(json["funder"]).to eq("@id"=>"https://doi.org/10.13039/501100000780", "@type"=>"Organization", "name"=>"European Commission")
      expect(json["license"]).to eq("https://creativecommons.org/licenses/by/4.0/legalcode")
    end

    it "Funding OpenAIRE" do
      input = "https://doi.org/10.5281/ZENODO.1239"
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5281/zenodo.1239")
      expect(json["@type"]).to eq("Dataset")
      expect(json["funder"]).to eq("@id"=>"https://doi.org/10.13039/501100000780", "@type"=>"Organization", "name"=>"European Commission")
      expect(json["license"]).to eq(["https://creativecommons.org/publicdomain/zero/1.0/legalcode", "info:eu-repo/semantics/openAccess"])
    end

    it "subject scheme" do
      input = "https://doi.org/10.4232/1.2745"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.4232/1.2745")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Flash Eurobarometer 54 (Madrid Summit)")
      expect(json["keywords"]).to eq("KAT12 International Institutions, Relations, Conditions, Internationale Politik und Institutionen, Regierung, politische Systeme, Parteien und Verbände, Wirtschaftssysteme und wirtschaftliche Entwicklung, International politics and organisation, Government, political systems and organisation, Economic systems and development")
    end

    it "subject scheme multiple keywords" do
      input = "https://doi.org/10.1594/pangaea.721193"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.1594/pangaea.721193")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Seawater carbonate chemistry and processes during experiments with Crassostrea gigas, 2007, supplement to: Kurihara, Haruko; Kato, Shoji; Ishimatsu, Atsushi (2007): Effects of increased seawater pCO2 on early development of the oyster Crassostrea gigas. Aquatic Biology, 1(1), 91-98")
      expect(json["keywords"]).to include("Animalia, Bottles or small containers/Aquaria (&lt;20 L)")
      expect(json["license"]).to eq("https://creativecommons.org/licenses/by/3.0/legalcode")
    end

    it "author is organization" do
      input = fixture_path + 'gtex.xml'
      url = "https://ors.datacite.org/doi:/10.25491/9hx8-ke93"
      content_url = "https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz"
      subject = Bolognese::Metadata.new(input: input, url: url, content_url: content_url, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.25491/9hx8-ke93")
      expect(json["@type"]).to eq("Dataset")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.25491/9hx8-ke93")
      expect(json["encodingFormat"]).to eq("application/tar")
      expect(json["contentSize"]).to eq("15.7M")
      expect(json["contentUrl"]).to eq("https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz")
      expect(json["includedInDataCatalog"]).to eq("@id"=>"https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000663", "@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["@reverse"]).to eq("isBasedOn"=>{"@id"=>"https://doi.org/10.1038/nmeth.4407", "@type"=>"ScholarlyArticle"})
    end

    it "series information" do
      input = "10.4229/23RDEUPVSEC2008-5CO.8.3"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.4229/23rdeupvsec2008-5co.8.3")
      expect(json["@type"]).to eq("ScholarlyArticle")
      expect(json["name"]).to eq("Rural Electrification With Hybrid Power Systems Based on Renewables - Technical System Configurations From the Point of View of the European Industry")
      expect(json["author"].count).to eq(3)
      expect(json["author"].first).to eq("@type"=>"Person", "name"=>"P. Llamas", "givenName"=>"P.", "familyName"=>"Llamas")
      expect(json["periodical"]).to eq("@type"=>"Series", "firstPage"=>"Spain; 3353", "lastPage"=>"3356", "name"=>"23rd European Photovoltaic Solar Energy Conference and Exhibition", "volume"=>"1-5 September 2008")
    end

    it "data catalog" do
      input = "10.25491/8KMC-G314"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.25491/8kmc-g314")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Covariates used in eQTL analysis. Includes genotyping principal components and PEER factors")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["includedInDataCatalog"]).to eq("@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["identifier"]).to eq("@type"=>"PropertyValue", "propertyID"=>"md5", "value"=>"c7c89fe7366d50cd75448aa603c9de58")
      expect(json["contentUrl"]).to eq("https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_covariates.tar.gz")
    end

    it "alternate identifiers" do
      input = "10.23725/8na3-9s47"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.23725/8na3-9s47")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("NWD165827.recab.cram")
      expect(json["author"]).to eq("name"=>"TOPMed")
      expect(json["includedInDataCatalog"]).to be_nil
      expect(json["identifier"]).to eq(
        [{"@type"=>"PropertyValue",
          "propertyID"=>"minid",
          "value"=>"ark:/99999/fk41CrU4eszeLUDe"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"dataguid",
          "value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"md5",
          "value"=>"3b33f6b9338fccab0901b7d317577ea3"}]
      )
      expect(json["contentUrl"]).to include("s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram")
    end

    it "affiliation identifier" do
      input = fixture_path + 'datacite-example-affiliation.xml'
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5072/example-full")
      expect(json["@type"]).to eq("SoftwareSourceCode")
      expect(json["name"]).to eq("Full DataCite XML Example")
      expect(json["author"].length).to eq(3)
      expect(json["author"].first).to eq("@id" => "https://orcid.org/0000-0001-5000-0007",
        "@type" => "Person",
        "affiliation" => {"@id"=>"https://ror.org/04wxnsj81", "@type"=>"Organization", "name"=>"DataCite"},
        "familyName" => "Miller",
        "givenName" => "Elizabeth",
        "name" => "Elizabeth Miller")
      expect(json["identifier"]).to eq(
         {"@type"=>"PropertyValue",
          "propertyID"=>"URL",
          "value"=>"https://schema.datacite.org/meta/kernel-4.2/example/datacite-example-full-v4.2.xml"}
      )
      expect(json["license"]).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
    end

    it "geo_location_point" do
      input = fixture_path + 'datacite-example-geolocation-2.xml'
      doi = "10.6071/Z7WC73"
      subject = Bolognese::Metadata.new(input: input, doi: doi)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.6071/z7wc73")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature")
      expect(json["author"].length).to eq(6)
      expect(json["author"][2]).to eq("@id"=>"https://orcid.org/0000-0002-8862-1404", "@type"=>"Person", "familyName"=>"Stacy", "givenName"=>"Erin", "name"=>"Erin Stacy", "affiliation" => {"@type"=>"Organization", "name"=>"UC Merced"})
      expect(json["includedInDataCatalog"]).to be_nil
      expect(json["spatialCoverage"]).to eq([{"@type"=>"Place",
        "geo"=>
        {"@type"=>"GeoCoordinates",
         "address"=>"Providence Creek (Lower, Upper and P301)",
         "latitude"=>"37.047756",
         "longitude"=>"-119.221094"}},
        {"@type"=>"Place",
         "geo"=>
        {"@type"=>"GeoShape",
         "address"=>"Providence Creek (Lower, Upper and P301)",
         "box"=>"37.046 -119.211 37.075 -119.182"}}])
      expect(json["license"]).to eq("https://creativecommons.org/licenses/by/4.0/legalcode")
    end

    it "geo_location_box" do
      input = "10.1594/PANGAEA.842237"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.1594/pangaea.842237")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Registry of all stations from the Tara Oceans Expedition (2009-2013)")
      expect(json["author"]).to eq([{"familyName"=>"Tara Oceans Consortium",
        "givenName"=>"Coordinators",
        "name"=>"Coordinators Tara Oceans Consortium"},
       {"familyName"=>"Tara Oceans Expedition",
        "givenName"=>"Participants",
        "name"=>"Participants Tara Oceans Expedition"}])
      expect(json["includedInDataCatalog"]).to be_nil
      expect(json["spatialCoverage"]).to eq("@type"=>"Place", "geo"=>{"@type"=>"GeoShape", "box"=>"-64.3088 -168.5182 79.6753 174.9006"})
      expect(json["license"]).to eq("https://creativecommons.org/licenses/by/3.0/legalcode")
    end

    it "geo_location_polygon" do
      input = fixture_path + 'datacite-example-polygon-v4.1.xml'
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.5072/example-polygon")
      expect(json["@type"]).to eq("Dataset")
      expect(json["name"]).to eq("Meteo measurements at the Sand Motor")
      expect(json["author"]).to eq("@type"=>"Person", "familyName"=>"Den Heijer", "givenName"=>"C", "name"=>"C Den Heijer")
      expect(json["includedInDataCatalog"]).to be_nil
      expect(json["spatialCoverage"].dig("geo", "polygon").length).to eq(34)
      expect(json["spatialCoverage"].dig("geo", "polygon")[0].first).to eq(["4.1738852605822", "52.03913926329928"])
    end

    it "from schema_org gtex" do
      input = fixture_path + 'schema_org_gtex.json'
      subject = Bolognese::Metadata.new(input: input, from: "schema_org")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.25491/d50j-3083")
      expect(json["@type"]).to eq("Dataset")
      expect(json["identifier"]).to eq("@type"=>"PropertyValue", "propertyID"=>"md5", "value"=>"687610993")
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.25491/d50j-3083")
      expect(json["additionalType"]).to eq("Gene expression matrices")
      expect(json["name"]).to eq("Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery")
      expect(json["version"]).to eq("v7")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"The GTEx Consortium")
      expect(json["keywords"]).to eq("gtex, annotation, phenotype, gene regulation, transcriptomics")
      expect(json["datePublished"]).to eq("2017")
      expect(json["contentUrl"]).to eq("https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz")
      expect(json["schemaVersion"]).to eq("http://datacite.org/schema/kernel-4")
      expect(json["includedInDataCatalog"]).to eq("@type"=>"DataCatalog", "name"=>"GTEx")
      expect(json["publisher"]).to eq("@type"=>"Organization", "name"=>"GTEx")
      expect(json["funder"]).to eq([{"@id"=>"https://doi.org/10.13039/100000052",
        "name"=>"Common Fund of the Office of the Director of the NIH",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000054",
        "name"=>"National Cancer Institute (NCI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000051",
        "name"=>"National Human Genome Research Institute (NHGRI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000050",
        "name"=>"National Heart, Lung, and Blood Institute (NHLBI)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000026",
        "name"=>"National Institute on Drug Abuse (NIDA)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000025",
        "name"=>"National Institute of Mental Health (NIMH)",
        "@type"=>"Organization"},
       {"@id"=>"https://doi.org/10.13039/100000065",
        "name"=>"National Institute of Neurological Disorders and Stroke (NINDS)",
        "@type"=>"Organization"}])
      expect(json["provider"]).to eq("@type"=>"Organization", "name"=>"DataCite")
    end

    it "from schema_org topmed" do
      input = fixture_path + 'schema_org_topmed.json'
      subject = Bolognese::Metadata.new(input: input, from: "schema_org")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.23725/8na3-9s47")
      expect(json["@type"]).to eq("Dataset")
      expect(json["identifier"]).to eq(
        [{"@type"=>"PropertyValue",
          "propertyID"=>"md5",
          "value"=>"3b33f6b9338fccab0901b7d317577ea3"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"minid",
          "value"=>"ark:/99999/fk41CrU4eszeLUDe"},
         {"@type"=>"PropertyValue",
          "propertyID"=>"dataguid",
          "value"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7"}])
      expect(json["url"]).to eq("https://ors.datacite.org/doi:/10.23725/8na3-9s47")
      expect(json["additionalType"]).to eq("CRAM file")
      expect(json["name"]).to eq("NWD165827.recab.cram")
      expect(json["author"]).to eq("@type"=>"Organization", "name"=>"TOPMed IRC")
      expect(json["keywords"]).to eq("topmed, whole genome sequencing")
      expect(json["datePublished"]).to eq("2017-11-30")
      expect(json["contentUrl"]).to eq(["s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram"])
      expect(json["schemaVersion"]).to eq("http://datacite.org/schema/kernel-4")
      expect(json["publisher"]).to eq("@type"=>"Organization", "name"=>"TOPMed")
      expect(json["citation"]).to eq("@id"=>"https://doi.org/10.23725/2g4s-qv04", "@type"=>"Dataset")
      expect(json["funder"]).to eq("@id"=>"https://doi.org/10.13039/100000050", "@type"=>"Organization", "name"=>"National Heart, Lung, and Blood Institute (NHLBI)")
      expect(json["provider"]).to eq("@type"=>"Organization", "name"=>"DataCite")
    end

    it "interactive resource without dates" do
      input = "https://doi.org/10.34747/g6yb-3412"
      subject = Bolognese::Metadata.new(input: input, from: "datacite")
      json = JSON.parse(subject.schema_org)
      expect(json["@id"]).to eq("https://doi.org/10.34747/g6yb-3412")
      expect(json["@type"]).to eq("CreativeWork")
      expect(json["datePublished"]).to eq("2019")
    end

    it "from Schema 4.5 with publisher attributes" do
      input = fixture_path + 'datacite-example-full-v4.5.xml'
      subject = Bolognese::Metadata.new(input: input)
      json = JSON.parse(subject.schema_org)
      expect(json["publisher"]).to eq(
         {
          "@type" => "Organization",
          "@id" => "https://ror.org/04z8jg394",
          "name" => "Example Publisher"
         }
      )
    end
  end
end