# frozen_string_literal: true

module Briard
  module Utils
    NORMALIZED_LICENSES = {
      'https://creativecommons.org/licenses/by/1.0' => 'https://creativecommons.org/licenses/by/1.0/legalcode',
      'https://creativecommons.org/licenses/by/2.0' => 'https://creativecommons.org/licenses/by/2.0/legalcode',
      'https://creativecommons.org/licenses/by/2.5' => 'https://creativecommons.org/licenses/by/2.5/legalcode',
      'https://creativecommons.org/licenses/by/3.0' => 'https://creativecommons.org/licenses/by/3.0/legalcode',
      'https://creativecommons.org/licenses/by/3.0/us' => 'https://creativecommons.org/licenses/by/3.0/legalcode',
      'https://creativecommons.org/licenses/by/4.0' => 'https://creativecommons.org/licenses/by/4.0/legalcode',
      'https://creativecommons.org/licenses/by-nc/1.0' => 'https://creativecommons.org/licenses/by-nc/1.0/legalcode',
      'https://creativecommons.org/licenses/by-nc/2.0' => 'https://creativecommons.org/licenses/by-nc/2.0/legalcode',
      'https://creativecommons.org/licenses/by-nc/2.5' => 'https://creativecommons.org/licenses/by-nc/2.5/legalcode',
      'https://creativecommons.org/licenses/by-nc/3.0' => 'https://creativecommons.org/licenses/by-nc/3.0/legalcode',
      'https://creativecommons.org/licenses/by-nc/4.0' => 'https://creativecommons.org/licenses/by-nc/4.0/legalcode',
      'https://creativecommons.org/licenses/by-nd-nc/1.0' => 'https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode',
      'https://creativecommons.org/licenses/by-nd-nc/2.0' => 'https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode',
      'https://creativecommons.org/licenses/by-nd-nc/2.5' => 'https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode',
      'https://creativecommons.org/licenses/by-nd-nc/3.0' => 'https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode',
      'https://creativecommons.org/licenses/by-nd-nc/4.0' => 'https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-sa/1.0' => 'https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-sa/2.0' => 'https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-sa/2.5' => 'https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode',
      'https://creativecommons.org/licenses/by-nc-sa/3.0' => 'https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-sa/4.0' => 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode',
      'https://creativecommons.org/licenses/by-nd/1.0' => 'https://creativecommons.org/licenses/by-nd/1.0/legalcode',
      'https://creativecommons.org/licenses/by-nd/2.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode',
      'https://creativecommons.org/licenses/by-nd/2.5' => 'https://creativecommons.org/licenses/by-nd/2.5/legalcode',
      'https://creativecommons.org/licenses/by-nd/3.0' => 'https://creativecommons.org/licenses/by-nd/3.0/legalcode',
      'https://creativecommons.org/licenses/by-nd/4.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode',
      'https://creativecommons.org/licenses/by-sa/1.0' => 'https://creativecommons.org/licenses/by-sa/1.0/legalcode',
      'https://creativecommons.org/licenses/by-sa/2.0' => 'https://creativecommons.org/licenses/by-sa/2.0/legalcode',
      'https://creativecommons.org/licenses/by-sa/2.5' => 'https://creativecommons.org/licenses/by-sa/2.5/legalcode',
      'https://creativecommons.org/licenses/by-sa/3.0' => 'https://creativecommons.org/licenses/by-sa/3.0/legalcode',
      'https://creativecommons.org/licenses/by-sa/4.0' => 'https://creativecommons.org/licenses/by-sa/4.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-nd/1.0' => 'https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-nd/2.0' => 'https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-nd/2.5' => 'https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode',
      'https://creativecommons.org/licenses/by-nc-nd/3.0' => 'https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode',
      'https://creativecommons.org/licenses/by-nc-nd/4.0' => 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode',
      'https://creativecommons.org/licenses/publicdomain' => 'https://creativecommons.org/licenses/publicdomain/',
      'https://creativecommons.org/publicdomain/zero/1.0' => 'https://creativecommons.org/publicdomain/zero/1.0/legalcode'
    }

    DC_TO_SO_TRANSLATIONS = {
      'Audiovisual' => 'MediaObject',
      'Book' => 'Book',
      'BookChapter' => 'Chapter',
      'Collection' => 'Collection',
      'ComputationalNotebook' => 'SoftwareSourceCode',
      'ConferencePaper' => 'Article',
      'ConferenceProceeding' => 'Periodical',
      'DataPaper' => 'Article',
      'Dataset' => 'Dataset',
      'Dissertation' => 'Thesis',
      'Event' => 'Event',
      'Image' => 'ImageObject',
      'InteractiveResource' => nil,
      'Journal' => 'Periodical',
      'JournalArticle' => 'ScholarlyArticle',
      'Model' => nil,
      'OutputManagementPlan' => nil,
      'PeerReview' => 'Review',
      'PhysicalObject' => nil,
      'Preprint' => nil,
      'Report' => 'Report',
      'Service' => 'Service',
      'Software' => 'SoftwareSourceCode',
      'Sound' => 'AudioObject',
      'Standard' => nil,
      'Text' => 'ScholarlyArticle',
      'Workflow' => nil,
      'Other' => 'CreativeWork',
      # not part of DataCite schema, but used internally
      'Periodical' => 'Periodical',
      'DataCatalog' => 'DataCatalog'
    }

    DC_TO_CP_TRANSLATIONS = {
      'Audiovisual' => 'motion_picture',
      'Book' => 'book',
      'BookChapter' => 'chapter',
      'Collection' => nil,
      'ComputationalNotebook' => nil,
      'ConferencePaper' => 'paper_conference',
      'ConferenceProceeding' => nil,
      'DataPaper' => 'report',
      'Dataset' => 'dataset',
      'Dissertation' => nil,
      'Event' => nil,
      'Image' => 'graphic',
      'InteractiveResource' => nil,
      'Journal' => nil,
      'JournalArticle' => 'article-journal',
      'Model' => nil,
      'OutputManagementPlan' => nil,
      'PeerReview' => 'review',
      'PhysicalObject' => nil,
      'Preprint' => nil,
      'Report' => 'report',
      'Service' => nil,
      'Sound' => 'song',
      'Standard' => nil,
      'Text' => 'report',
      'Workflow' => nil,
      'Other' => nil
    }

    CR_TO_CP_TRANSLATIONS = {
      'Proceedings' => nil,
      'ReferenceBook' => nil,
      'JournalIssue' => 'article-journal',
      'ProceedingsArticle' => 'paper-conference',
      'Other' => nil,
      'Dissertation' => 'thesis',
      'Dataset' => 'dataset',
      'EditedBook' => 'book',
      'PostedContent' => 'article-journal',
      'JournalArticle' => 'article-journal',
      'Journal' => nil,
      'Report' => 'report',
      'BookSeries' => nil,
      'ReportSeries' => nil,
      'BookTrack' => nil,
      'Standard' => nil,
      'BookSection' => 'chapter',
      'BookPart' => nil,
      'Book' => 'book',
      'BookChapter' => 'chapter',
      'StandardSeries' => nil,
      'Monograph' => 'book',
      'Component' => nil,
      'ReferenceEntry' => 'entry-dictionary',
      'JournalVolume' => nil,
      'BookSet' => nil
    }

    CR_TO_SO_TRANSLATIONS = {
      'Proceedings' => nil,
      'ReferenceBook' => 'Book',
      'JournalIssue' => 'PublicationIssue',
      'ProceedingsArticle' => nil,
      'Other' => 'CreativeWork',
      'Dissertation' => 'Thesis',
      'Dataset' => 'Dataset',
      'EditedBook' => 'Book',
      'JournalArticle' => 'ScholarlyArticle',
      'Journal' => nil,
      'Report' => 'Report',
      'BookSeries' => nil,
      'ReportSeries' => nil,
      'BookTrack' => nil,
      'Standard' => nil,
      'BookSection' => nil,
      'BookPart' => nil,
      'Book' => 'Book',
      'BookChapter' => 'Chapter',
      'StandardSeries' => nil,
      'Monograph' => 'Book',
      'Component' => 'CreativeWork',
      'ReferenceEntry' => nil,
      'JournalVolume' => 'PublicationVolume',
      'BookSet' => nil,
      'PostedContent' => 'ScholarlyArticle',
      'PeerReview' => 'Review'
    }

    CR_TO_BIB_TRANSLATIONS = {
      'Proceedings' => 'proceedings',
      'ReferenceBook' => 'book',
      'JournalIssue' => nil,
      'ProceedingsArticle' => nil,
      'Other' => nil,
      'Dissertation' => 'phdthesis',
      'Dataset' => nil,
      'EditedBook' => 'book',
      'JournalArticle' => 'article',
      'Journal' => nil,
      'Report' => 'techreport',
      'BookSeries' => nil,
      'ReportSeries' => nil,
      'BookTrack' => nil,
      'Standard' => nil,
      'BookSection' => 'inbook',
      'BookPart' => nil,
      'Book' => 'book',
      'BookChapter' => 'inbook',
      'StandardSeries' => nil,
      'Monograph' => 'book',
      'Component' => nil,
      'ReferenceEntry' => nil,
      'JournalVolume' => nil,
      'BookSet' => nil,
      'PostedContent' => 'article'
    }

    BIB_TO_CR_TRANSLATIONS = {
      'proceedings' => 'Proceedings',
      'phdthesis' => 'Dissertation',
      'article' => 'JournalArticle',
      'book' => 'Book',
      'inbook' => 'BookChapter'
    }

    CR_TO_JATS_TRANSLATIONS = {
      'Proceedings' => 'working-paper',
      'ReferenceBook' => 'book',
      'JournalIssue' => 'journal',
      'ProceedingsArticle' => 'working-paper',
      'Other' => nil,
      'Dissertation' => nil,
      'Dataset' => 'data',
      'EditedBook' => 'book',
      'JournalArticle' => 'journal',
      'Journal' => 'journal',
      'Report' => 'report',
      'BookSeries' => 'book',
      'ReportSeries' => 'report',
      'BookTrack' => 'book',
      'Standard' => 'standard',
      'BookSection' => 'chapter',
      'BookPart' => 'chapter',
      'Book' => 'book',
      'BookChapter' => 'chapter',
      'StandardSeries' => 'standard',
      'Monograph' => 'book',
      'Component' => nil,
      'ReferenceEntry' => nil,
      'JournalVolume' => 'journal',
      'BookSet' => 'book'
    }

    CR_TO_DC_TRANSLATIONS = {
      'Proceedings' => nil,
      'ReferenceBook' => nil,
      'JournalIssue' => 'Text',
      'ProceedingsArticle' => 'ConferencePaper',
      'Other' => 'Other',
      'Dissertation' => 'Dissertation',
      'Dataset' => 'Dataset',
      'EditedBook' => 'Book',
      'JournalArticle' => 'JournalArticle',
      'Journal' => 'Journal',
      'Report' => 'Report',
      'BookSeries' => nil,
      'ReportSeries' => nil,
      'BookTrack' => nil,
      'Standard' => 'Standard',
      'BookSection' => 'BookChapter',
      'BookPart' => nil,
      'Book' => 'Book',
      'BookChapter' => 'BookChapter',
      'SaComponent' => 'Text',
      'StandardSeries' => 'Standard',
      'Monograph' => 'Book',
      'Component' => nil,
      'ReferenceEntry' => nil,
      'JournalVolume' => nil,
      'BookSet' => nil,
      'PostedContent' => 'Preprint',
      'PeerReview' => 'PeerReview'
    }

    SO_TO_DC_TRANSLATIONS = {
      'Article' => 'Preprint',
      'AudioObject' => 'Sound',
      'Blog' => 'Text',
      'BlogPosting' => 'Preprint',
      'Book' => 'Book',
      'Chapter' => 'BookChapter',
      'Collection' => 'Collection',
      'CreativeWork' => 'Text',
      'DataCatalog' => 'Dataset',
      'Dataset' => 'Dataset',
      'Event' => 'Event',
      'ImageObject' => 'Image',
      'Movie' => 'Audiovisual',
      'PublicationIssue' => 'Text',
      'Report' => 'Report',
      'ScholarlyArticle' => 'Text',
      'Thesis' => 'Text',
      'Service' => 'Service',
      'Review' => 'PeerReview',
      'SoftwareSourceCode' => 'Software',
      'VideoObject' => 'Audiovisual',
      'WebPage' => 'Text',
      'WebSite' => 'Text'
    }

    SO_TO_JATS_TRANSLATIONS = {
      'Article' => 'journal',
      'AudioObject' => nil,
      'Blog' => nil,
      'BlogPosting' => nil,
      'Book' => 'book',
      'Collection' => nil,
      'CreativeWork' => nil,
      'DataCatalog' => 'data',
      'Dataset' => 'data',
      'Event' => nil,
      'ImageObject' => nil,
      'Movie' => nil,
      'PublicationIssue' => 'journal',
      'ScholarlyArticle' => 'journal',
      'Service' => nil,
      'SoftwareSourceCode' => 'software',
      'VideoObject' => nil,
      'WebPage' => nil,
      'WebSite' => 'website'
    }

    SO_TO_CP_TRANSLATIONS = {
      'Article' => 'article-newspaper',
      'AudioObject' => 'song',
      'Blog' => 'report',
      'BlogPosting' => 'post-weblog',
      'Collection' => nil,
      'CreativeWork' => nil,
      'DataCatalog' => 'dataset',
      'Dataset' => 'dataset',
      'Event' => nil,
      'ImageObject' => 'graphic',
      'Movie' => 'motion_picture',
      'PublicationIssue' => nil,
      'Report' => 'report',
      'ScholarlyArticle' => 'article-journal',
      'Service' => nil,
      'Thesis' => 'thesis',
      'VideoObject' => 'broadcast',
      'WebPage' => 'webpage',
      'WebSite' => 'webpage'
    }

    SO_TO_RIS_TRANSLATIONS = {
      'Article' => 'GEN',
      'AudioObject' => nil,
      'Blog' => nil,
      'BlogPosting' => 'BLOG',
      'Collection' => nil,
      'CreativeWork' => 'GEN',
      'DataCatalog' => 'CTLG',
      'Dataset' => 'DATA',
      'Event' => nil,
      'ImageObject' => 'FIGURE',
      'Movie' => 'MPCT',
      'Report' => 'RPRT',
      'PublicationIssue' => nil,
      'ScholarlyArticle' => 'JOUR',
      'Service' => nil,
      'SoftwareSourceCode' => 'COMP',
      'VideoObject' => 'VIDEO',
      'WebPage' => 'ELEC',
      'WebSite' => nil
    }

    CR_TO_RIS_TRANSLATIONS = {
      'Proceedings' => 'CONF',
      'PostedContent' => 'JOUR',
      'ReferenceBook' => 'BOOK',
      'JournalIssue' => 'JOUR',
      'ProceedingsArticle' => 'CPAPER',
      'Other' => 'GEN',
      'Dissertation' => 'THES',
      'Dataset' => 'DATA',
      'EditedBook' => 'BOOK',
      'JournalArticle' => 'JOUR',
      'Journal' => nil,
      'Report' => 'RPRT',
      'BookSeries' => nil,
      'ReportSeries' => nil,
      'BookTrack' => nil,
      'Standard' => 'STAND',
      'BookSection' => 'CHAP',
      'BookPart' => 'CHAP',
      'Book' => 'BOOK',
      'BookChapter' => 'CHAP',
      'StandardSeries' => nil,
      'Monograph' => 'BOOK',
      'Component' => nil,
      'ReferenceEntry' => 'DICT',
      'JournalVolume' => nil,
      'BookSet' => nil
    }

    DC_TO_RIS_TRANSLATIONS = {
      'Audiovisual' => 'MPCT',
      'Book' => 'BOOK',
      'BookChapter' => 'CHAP',
      'Collection' => nil,
      'ComputationalNotebook' => 'COMP',
      'ConferencePaper' => 'CPAPER',
      'ConferenceProceeding' => 'CONF',
      'DataPaper' => nil,
      'Dataset' => 'DATA',
      'Dissertation' => 'THES',
      'Event' => nil,
      'Image' => 'FIGURE',
      'InteractiveResource' => nil,
      'Journal' => nil,
      'JournalArticle' => 'JOUR',
      'Model' => nil,
      'OutputManagementPlan' => nil,
      'PeerReview' => nil,
      'PhysicalObject' => nil,
      'Preprint' => 'RPRT',
      'Report' => 'RRPT',
      'Service' => nil,
      'Software' => 'COMP',
      'Sound' => 'SOUND',
      'Standard' => nil,
      'Text' => 'RPRT',
      'Workflow' => nil,
      'Other' => nil
    }

    RIS_TO_DC_TRANSLATIONS = {
      'BLOG' => 'Text',
      'GEN' => 'Text',
      'CTLG' => 'Collection',
      'DATA' => 'Dataset',
      'FIGURE' => 'Image',
      'THES' => 'Dissertation',
      'MPCT' => 'Audiovisual',
      'JOUR' => 'JournalArticle',
      'COMP' => 'Software',
      'VIDEO' => 'Audiovisual',
      'ELEC' => 'Text'
    }

    BIB_TO_DC_TRANSLATIONS = {
      'article' => 'JournalArticle',
      'book' => 'Book',
      'inbook' => 'BookChapter',
      'inproceedings' => nil,
      'manual' => nil,
      'misc' => 'Other',
      'phdthesis' => 'Dissertation',
      'proceedings' => 'ConferenceProceeding',
      'techreport' => 'Report',
      'unpublished' => nil
    }

    CP_TO_DC_TRANSLATIONS = {
      'song' => 'Audiovisual',
      'post-weblog' => 'Text',
      'dataset' => 'Dataset',
      'graphic' => 'Image',
      'motion_picture' => 'Audiovisual',
      'article-journal' => 'JournalArticle',
      'broadcast' => 'Audiovisual',
      'webpage' => 'Text'
    }

    SO_TO_BIB_TRANSLATIONS = {
      'Article' => 'article',
      'AudioObject' => 'misc',
      'Thesis' => 'phdthesis',
      'Blog' => 'misc',
      'BlogPosting' => 'article',
      'Collection' => 'misc',
      'CreativeWork' => 'misc',
      'DataCatalog' => 'misc',
      'Dataset' => 'misc',
      'Event' => 'misc',
      'ImageObject' => 'misc',
      'Movie' => 'misc',
      'PublicationIssue' => 'misc',
      'ScholarlyArticle' => 'article',
      'Service' => 'misc',
      'SoftwareSourceCode' => 'misc',
      'VideoObject' => 'misc',
      'WebPage' => 'misc',
      'WebSite' => 'misc'
    }

    UNKNOWN_INFORMATION = {
      ':unac' => 'temporarily inaccessible',
      ':unal' => 'unallowed, suppressed intentionally',
      ':unap' => 'not applicable, makes no sense',
      ':unas' => 'value unassigned (e.g., Untitled)',
      ':unav' => 'value unavailable, possibly unknown',
      ':unkn' => 'known to be unknown (e.g., Anonymous, Inconnue)',
      ':none' => 'never had a value, never will',
      ':null' => 'explicitly and meaningfully empty',
      ':tba' => 'to be assigned or announced later',
      ':etal' => 'too numerous to list (et alia)'
    }

    def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
      if id.present?
        find_from_format_by_id(id)
      elsif ext.present?
        find_from_format_by_filename(filename) || find_from_format_by_ext(string, ext: ext)
      elsif string.present?
        find_from_format_by_string(string)
      else
        'datacite'
      end
    end

    def find_from_format_by_id(id)
      id = normalize_id(id)

      if %r{\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org)/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(id)
        ra = get_doi_ra(id)
        %w[DataCite Crossref mEDRA KISTI JaLC OP].include?(ra) ? ra.downcase : nil
      elsif %r{\A(?:(http|https):/(/)?orcid\.org/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z}.match?(id)
        'orcid'
      elsif %r{\A(http|https):/(/)?github\.com/(.+)/package.json\z}.match?(id)
        'npm'
      elsif %r{\A(http|https):/(/)?github\.com/(.+)/codemeta.json\z}.match?(id)
        'codemeta'
      elsif %r{\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\z}.match?(id)
        'cff'
      elsif %r{\A(http|https):/(/)?github\.com/(.+)\z}.match?(id)
        'cff'
      else
        'schema_org'
      end
    end

    def find_from_format_by_filename(filename)
      if filename == 'package.json'
        'npm'
      elsif filename == 'CITATION.cff'
        'cff'
      end
    end

    def find_from_format_by_ext(string, options = {})
      if options[:ext] == '.bib'
        'bibtex'
      elsif options[:ext] == '.ris'
        'ris'
      elsif options[:ext] == '.xml' && Maremma.from_xml(string).to_h.dig('crossref_result',
                                                                         'query_result', 'body', 'query', 'doi_record', 'crossref')
        'crossref'
      elsif options[:ext] == '.xml' && Nokogiri::XML(string, nil, 'UTF-8',
                                                     &:noblanks).collect_namespaces.find do |_k, v|
              v.start_with?('http://datacite.org/schema/kernel')
            end
        'datacite'
      elsif options[:ext] == '.cff'
        'cff'
      elsif options[:ext] == '.json' && URI(Maremma.from_json(string).to_h.fetch('@context',
                                                                                 '')).host == 'schema.org'
        'schema_org'
      elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('source') == 'Crossref'
        'crossref_json'
      elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('@context') == ('https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld')
        'codemeta'
      elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('schemaVersion').to_s.start_with?('http://datacite.org/schema/kernel')
        'datacite_json'
      elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('types') && Maremma.from_json(string).to_h.dig('publication_year').present?
        'crosscite'
      elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('issued',
                                                                           'date-parts').present?
        'citeproc'
      end
    end

    def find_from_format_by_string(string)
      if Maremma.from_xml(string).to_h.dig('crossref_result', 'query_result', 'body', 'query',
                                           'doi_record', 'crossref').present?
        'crossref'
      elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find do |_k, v|
              v.start_with?('http://datacite.org/schema/kernel')
            end
        'datacite'
      elsif URI(Maremma.from_json(string).to_h.fetch('@context', '')).host == 'schema.org'
        'schema_org'
      elsif Maremma.from_json(string).to_h.dig('@context') == ('https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld')
        'codemeta'
      elsif Maremma.from_json(string).to_h.dig('schema-version').to_s.start_with?('http://datacite.org/schema/kernel')
        'datacite_json'
      elsif Maremma.from_json(string).to_h.dig('types').present? && Maremma.from_json(string).to_h.dig('publication_year').present?
        'crosscite'
      elsif Maremma.from_json(string).to_h.dig('issued', 'date-parts').present?
        'citeproc'
      elsif string.start_with?('TY  - ')
        'ris'
      elsif YAML.load(string).to_h.fetch('cff-version', nil).present?
        'cff'
      elsif BibTeX.parse(string).first
        'bibtex'
      end
    rescue Psych::SyntaxError => e
      'bibtex'
    rescue BibTeX::ParseError => e
      nil
    end

    def orcid_from_url(url)
      Array(%r{\A:(http|https)://orcid\.org/(.+)}.match(url)).last
    end

    def orcid_as_url(orcid)
      "https://orcid.org/#{orcid}" if orcid.present?
    end

    def validate_orcid(orcid)
      orcid = Array(%r{\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(orcid)).last
      orcid.gsub(/[[:space:]]/, '-') if orcid.present?
    end

    def validate_orcid_scheme(orcid_scheme)
      Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last
    end

    def validate_url(str)
      if %r{\A(?:(http|https)://(dx\.)?doi.org/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(str)
        'DOI'
      elsif %r{\A(http|https)://}.match?(str)
        'URL'
      elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match?(str)
        'ISSN'
      end
    end

    def parse_attributes(element, options = {})
      content = options[:content] || '__content__'

      if element.is_a?(String) && options[:content].nil?
        CGI.unescapeHTML(element)
      elsif element.is_a?(Hash)
        element.fetch(CGI.unescapeHTML(content), nil)
      elsif element.is_a?(Array)
        a = element.map { |e| e.is_a?(Hash) ? e.fetch(CGI.unescapeHTML(content), nil) : e }.uniq
        a = options[:first] ? a.first : a.unwrap
      end
    end

    def normalize_id(id, options = {})
      return nil unless id.present?

      # check for valid DOI
      doi = normalize_doi(id, options)
      return doi if doi.present?

      # check for valid HTTP uri
      uri = Addressable::URI.parse(id)
      return nil unless uri && uri.host && %w[http https].include?(uri.scheme)

      # clean up URL
      PostRank::URI.clean(id)
    rescue Addressable::URI::InvalidURIError
      nil
    end

    def normalize_url(id, options = {})
      return nil unless id.present?

      # handle info URIs
      return id if id.to_s.start_with?('info')

      # check for valid HTTP uri
      uri = Addressable::URI.parse(id)

      return nil unless uri && uri.host && %w[http https ftp].include?(uri.scheme)

      # optionally turn into https URL
      uri.scheme = 'https' if options[:https]

      # clean up URL
      uri.path = PostRank::URI.clean(uri.path)

      uri.to_s
    rescue Addressable::URI::InvalidURIError
      nil
    end

    def normalize_cc_url(id)
      id = normalize_url(id, https: true)
      NORMALIZED_LICENSES.fetch(id, id)
    end

    def normalize_orcid(orcid)
      orcid = validate_orcid(orcid)
      return nil unless orcid.present?

      # turn ORCID ID into URL
      'https://orcid.org/' + Addressable::URI.encode(orcid)
    end

    def normalize_ids(ids: nil, relation_type: nil)
      Array.wrap(ids).select { |idx| idx['@id'].present? }.map do |idx|
        id = normalize_id(idx['@id'])
        related_identifier_type = doi_from_url(id).present? ? 'DOI' : 'URL'
        id = doi_from_url(id) || id

        { 'relatedIdentifier' => id,
          'relationType' => relation_type,
          'relatedIdentifierType' => related_identifier_type,
          'resourceTypeGeneral' => Metadata::SO_TO_DC_TRANSLATIONS[idx['@type']] }.compact
      end.unwrap
    end

    # pick electronic issn if there are multiple
    # format issn as xxxx-xxxx
    def normalize_issn(input, options = {})
      content = options[:content] || '__content__'

      issn = if input.blank?
               nil
             elsif input.is_a?(String) && options[:content].nil?
               input
             elsif input.is_a?(Hash)
               input.fetch(content, nil)
             elsif input.is_a?(Array)
               a = input.find { |a| a['media_type'] == 'electronic' } || input.first
               a.fetch(content, nil)
             end

      case issn.to_s.length
      when 9
        issn
      when 8
        issn[0..3] + '-' + issn[4..7]
      end
    end

    # find Creative Commons or OSI license in licenses array, normalize url and name
    def normalize_licenses(licenses)
      standard_licenses = Array.wrap(licenses).map do |l|
                            URI.parse(l['url'])
                          end.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
      return licenses unless standard_licenses.present?

      # use HTTPS
      uri.scheme = 'https'

      # use host name without subdomain
      uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last

      # normalize URLs
      if uri.host == 'creativecommons.org'
        uri.path = uri.path.split('/')[0..-2].join('/') if uri.path.split('/').last == 'legalcode'
        uri.path << '/' unless uri.path.end_with?('/')
      else
        uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '')
        uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
        uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
        uri.path = uri.path.sub(/([^0-9-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
          m = Regexp.last_match
          text = m[1]

          if m[3].present?
            version = [m[3], m[5].presence || '0'].join('.')
            [text, version].join('-')
          else
            text
          end
        end
      end

      uri.to_s
    rescue URI::InvalidURIError
      nil
    end

    def to_datacite_json(element, options = {})
      a = Array.wrap(element).map do |e|
        e.each_with_object({}) do |(k, v), h|
          h[k.dasherize] = v
        end
      end
      options[:first] ? a.unwrap : a.presence
    end

    def from_datacite_json(element)
      Array.wrap(element).map do |e|
        e.each_with_object({}) do |(k, v), h|
          h[k.underscore] = v
        end
      end
    end

    def to_schema_org(element)
      mapping = { 'type' => '@type', 'id' => '@id', 'title' => 'name' }

      map_hash_keys(element: element, mapping: mapping)
    end

    def to_schema_org_creators(element)
      element = Array.wrap(element).map do |c|
        c['affiliation'] = Array.wrap(c['affiliation']).map do |a|
          if a.is_a?(String)
            name = a
            affiliation_identifier = nil
          else
            name = a['name']
            affiliation_identifier = a['affiliationIdentifier']
          end

          { '@type' => 'Organization', '@id' => affiliation_identifier, 'name' => name }.compact
        end.unwrap
        c['@type'] = c['nameType'].present? ? c['nameType'][0..-3] : nil
        c['@id'] = Array.wrap(c['nameIdentifiers']).first.to_h.fetch('nameIdentifier', nil)
        c['name'] =
          c['familyName'].present? ? [c['givenName'], c['familyName']].join(' ') : c['name']
        c.except('nameIdentifiers', 'nameType').compact
      end.unwrap
    end

    def to_schema_org_contributors(element)
      element = Array.wrap(element).map do |c|
        c['affiliation'] = Array.wrap(c['affiliation']).map do |a|
          if a.is_a?(String)
            name = a
            affiliation_identifier = nil
          else
            name = a['name']
            affiliation_identifier = a['affiliationIdentifier']
          end

          { '@type' => 'Organization', '@id' => affiliation_identifier, 'name' => name }.compact
        end.unwrap
        c['@type'] = c['nameType'].present? ? c['nameType'][0..-3] : nil
        c['@id'] = Array.wrap(c['nameIdentifiers']).first.to_h.fetch('nameIdentifier', nil)
        c['name'] =
          c['familyName'].present? ? [c['givenName'], c['familyName']].join(' ') : c['name']
        c.except('nameIdentifiers', 'nameType').compact
      end.unwrap
    end

    def to_schema_org_container(element, options = {})
      return nil unless element.is_a?(Hash) || (element.nil? && options[:container_title].present?)

      {
        '@id' => element['identifier'],
        '@type' => options[:type] == 'Dataset' ? 'DataCatalog' : 'Periodical',
        'name' => element['title'] || options[:container_title]
      }.compact
    end

    def to_schema_org_identifiers(element, _options = {})
      Array.wrap(element).map do |ai|
        {
          '@type' => 'PropertyValue',
          'propertyID' => ai['identifierType'],
          'value' => ai['identifier']
        }
      end.unwrap
    end

    def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
      return nil unless related_identifiers.present? && relation_type.present?

      relation_type = if relation_type == 'References'
                        %w[References Cites
                           Documents]
                      else
                        [relation_type]
                      end

      Array.wrap(related_identifiers).select do |ri|
        relation_type.include?(ri['relationType'])
      end.map do |r|
        if r['relatedIdentifierType'] == 'ISSN' && r['relationType'] == 'IsPartOf'
          { '@type' => 'Periodical', 'issn' => r['relatedIdentifier'] }.compact
        else
          {
            '@id' => normalize_id(r['relatedIdentifier']),
            '@type' => DC_TO_SO_TRANSLATIONS[r['resourceTypeGeneral']] || 'CreativeWork'
          }.compact
        end
      end.unwrap
    end

    def to_schema_org_funder(funding_references)
      return nil unless funding_references.present?

      Array.wrap(funding_references).map do |fr|
        {
          '@id' => fr['funderIdentifier'],
          '@type' => 'Organization',
          'name' => fr['funderName']
        }.compact
      end.unwrap
    end

    def to_schema_org_spatial_coverage(geo_location)
      return nil unless geo_location.present?

      Array.wrap(geo_location).each_with_object([]) do |gl, sum|
        if gl.fetch('geoLocationPoint', nil)
          sum << {
            '@type' => 'Place',
            'geo' => {
              '@type' => 'GeoCoordinates',
              'address' => gl['geoLocationPlace'],
              'latitude' => gl.dig('geoLocationPoint', 'pointLatitude'),
              'longitude' => gl.dig('geoLocationPoint', 'pointLongitude')
            }
          }.compact
        end

        if gl.fetch('geoLocationBox', nil)
          sum << {
            '@type' => 'Place',
            'geo' => {
              '@type' => 'GeoShape',
              'address' => gl['geoLocationPlace'],
              'box' => [gl.dig('geoLocationBox', 'southBoundLatitude'),
                        gl.dig('geoLocationBox', 'westBoundLongitude'),
                        gl.dig('geoLocationBox', 'northBoundLatitude'),
                        gl.dig('geoLocationBox', 'eastBoundLongitude')].compact.join(' ').presence
            }.compact
          }.compact
        end

        if gl.fetch('geoLocationPolygon', nil)
          sum << {
            '@type' => 'Place',
            'geo' => {
              '@type' => 'GeoShape',
              'address' => gl['geoLocationPlace'],
              'polygon' => Array.wrap(gl.dig('geoLocationPolygon')).map do |glp|
                Array.wrap(glp).map do |glpp|
                  [glpp.dig('polygonPoint', 'pointLongitude'),
                   glpp.dig('polygonPoint', 'pointLatitude')].compact
                end.compact
              end.compact.presence
            }
          }
        end

        next unless gl.fetch('geoLocationPlace',
                             nil) && !gl.fetch('geoLocationPoint',
                                               nil) && !gl.fetch('geoLocationBox',
                                                                 nil) && !gl.fetch(
                                                                   'geoLocationPolygon', nil
                                                                 )

        sum << {
          '@type' => 'Place',
          'geo' => {
            '@type' => 'GeoCoordinates',
            'address' => gl['geoLocationPlace']
          }
        }.compact
      end.unwrap
    end

    def from_schema_org(element)
      mapping = { '@type' => 'type', '@id' => 'id' }

      map_hash_keys(element: element, mapping: mapping)
    end

    def from_schema_org_creators(element)
      element = Array.wrap(element).map do |c|
        if c['affiliation'].is_a?(String)
          c['affiliation'] = { 'name' => c['affiliation'] }
          affiliation_identifier_scheme = nil
          scheme_uri = nil
        elsif c.dig('affiliation', '@id').to_s.starts_with?('https://ror.org')
          affiliation_identifier_scheme = 'ROR'
          scheme_uri = 'https://ror.org/'
        elsif c.dig('affiliation', '@id').to_s.starts_with?('https://isni.org')
          affiliation_identifier_scheme = 'ISNI'
          scheme_uri = 'https://isni.org/isni/'
        else
          affiliation_identifier_scheme = nil
          scheme_uri = nil
        end

        # alternatively find the nameIdentifier in the identifer attribute
        c['@id'] = c['identifier'] if c['identifier'].present? && c['@id'].blank?

        # alternatively find the nameIdentifier in the sameAs attribute
        c['@id'] = c['sameAs'].first if Array(c['sameAs']).find do |item|
                                          URI(item).host == 'orcid.org'
                                        end

        if normalize_orcid(c['@id'])
          c['nameIdentifier'] =
            [{ '__content__' => c['@id'], 'nameIdentifierScheme' => 'ORCID',
               'schemeUri' => 'https://orcid.org' }]
        end
        if c['@type'].is_a?(Array)
          c['@type'] = c['@type'].find do |t|
            %w[Person Organization].include?(t)
          end
        end
        c['creatorName'] =
          { 'nameType' => c['@type'].present? ? c['@type'].titleize + 'al' : nil,
            '__content__' => c['name'] }.compact
        c['affiliation'] =
          { '__content__' => c.dig('affiliation', 'name'),
            'affiliationIdentifier' => c.dig('affiliation', '@id'), 'affiliationIdentifierScheme' => affiliation_identifier_scheme, 'schemeUri' => scheme_uri }.compact.presence
        c.except('@id', '@type', 'name').compact
      end
    end

    def from_schema_org_contributors(element)
      element = Array.wrap(element).map do |c|
        if c['affiliation'].is_a?(String)
          c['affiliation'] = { 'name' => c['affiliation'] }
          affiliation_identifier_scheme = nil
          scheme_uri = nil
        elsif c.dig('affiliation', '@id').to_s.starts_with?('https://ror.org')
          affiliation_identifier_scheme = 'ROR'
          scheme_uri = 'https://ror.org/'
        elsif c.dig('affiliation', '@id').to_s.starts_with?('https://isni.org')
          affiliation_identifier_scheme = 'ISNI'
          scheme_uri = 'https://isni.org/isni/'
        else
          affiliation_identifier_scheme = nil
          scheme_uri = nil
        end

        if normalize_orcid(c['@id'])
          c['nameIdentifier'] =
            [{ '__content__' => c['@id'], 'nameIdentifierScheme' => 'ORCID',
               'schemeUri' => 'https://orcid.org' }]
        end
        c['contributorName'] =
          { 'nameType' => c['@type'].present? ? c['@type'].titleize + 'al' : nil,
            '__content__' => c['name'] }.compact
        c['affiliation'] =
          { '__content__' => c.dig('affiliation', 'name'),
            'affiliationIdentifier' => c.dig('affiliation', '@id'), 'affiliationIdentifierScheme' => affiliation_identifier_scheme, 'schemeUri' => scheme_uri }.compact.presence
        c.except('@id', '@type', 'name').compact
      end
    end

    def map_hash_keys(element: nil, mapping: nil)
      Array.wrap(element).map do |a|
        a.map { |k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
          if v.is_a?(Hash)
            hsh[k] = to_schema_org(v)
            hsh
          else
            hsh[k] = v
            hsh
          end
        end
      end.unwrap
    end

    def to_identifier(identifier)
      {
        '@type' => 'PropertyValue',
        'propertyID' => identifier['relatedIdentifierType'],
        'value' => identifier['relatedIdentifier']
      }
    end

    def from_citeproc(element)
      Array.wrap(element).map do |a|
        if a['literal'].present?
          a['@type'] = 'Organization'
          a['name'] = a['literal']
        elsif a['name'].present?
          a['@type'] = 'Organization'
        else
          a['@type'] = 'Person'
          a['name'] = [a['given'], a['family']].compact.join(' ')
        end
        a['givenName'] = a['given']
        a['familyName'] = a['family']
        a.except('given', 'family', 'literal').compact
      end.unwrap
    end

    def to_citeproc(element)
      Array.wrap(element).map do |a|
        a['family'] = a['familyName']
        a['given'] = a['givenName']
        a['literal'] = a['name'] unless a['familyName'].present?
        a.except('nameType', 'type', '@type', 'id', '@id', 'name', 'familyName', 'givenName',
                 'affiliation', 'nameIdentifiers', 'contributorType').compact
      end.presence
    end

    def to_ris(element)
      Array.wrap(element).map do |a|
        if a['familyName'].present?
          [a['familyName'], a['givenName']].join(', ')
        else
          a['name']
        end
      end.unwrap
    end

    def sanitize(text, options = {})
      options[:tags] ||= Set.new(%w[strong em b i code pre sub sup br])
      content = options[:content] || '__content__'
      custom_scrubber = Briard::WhitelistScrubber.new(options)

      if text.is_a?(String)
        # remove excessive internal whitespace with squish
        Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
      elsif text.is_a?(Hash)
        sanitize(text.fetch(content, nil))
      elsif text.is_a?(Array)
        a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
        a = options[:first] ? a.first : a.unwrap
      end
    end

    def github_from_url(url)
      return {} unless %r{\Ahttps://github\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\z}.match?(url)

      words = URI.parse(url).path[1..-1].split('/')
      path = words.length > 3 ? words[4...words.length].join('/') : nil

      { owner: words[0], repo: words[1], release: words[3], path: path }.compact
    end

    def github_repo_from_url(url)
      github_from_url(url).fetch(:repo, nil)
    end

    def github_release_from_url(url)
      github_from_url(url).fetch(:release, nil)
    end

    def github_owner_from_url(url)
      github_from_url(url).fetch(:owner, nil)
    end

    def github_as_owner_url(url)
      github_hash = github_from_url(url)
      "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present?
    end

    def github_as_repo_url(url)
      github_hash = github_from_url(url)
      return unless github_hash[:repo].present?

      "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}"
    end

    def github_as_release_url(url)
      github_hash = github_from_url(url)
      return unless github_hash[:release].present?

      "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}"
    end

    def github_as_codemeta_url(url)
      github_hash = github_from_url(url)

      if github_hash[:path].to_s.end_with?('codemeta.json')
        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
      elsif github_hash[:owner].present?
        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
      end
    end

    def github_as_cff_url(url)
      github_hash = github_from_url(url)

      if github_hash[:path].to_s.end_with?('CITATION.cff')
        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
      elsif github_hash[:owner].present?
        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
      end
    end

    def get_date_parts(iso8601_time)
      return { 'date-parts' => [[]] } if iso8601_time.nil?

      year = iso8601_time[0..3].to_i
      month = iso8601_time[5..6].to_i
      day = iso8601_time[8..9].to_i
      { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] }
    rescue TypeError
      nil
    end

    def get_date_from_date_parts(date_as_parts)
      date_parts = date_as_parts.fetch('date-parts', []).first
      return nil if date_parts == [nil]

      year = date_parts[0]
      month = date_parts[1]
      day = date_parts[2]
      get_date_from_parts(year, month, day)
    rescue NoMethodError # if date_parts is nil
      nil
    end

    def get_date_from_parts(year, month = nil, day = nil)
      [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject do |part|
        part == '00'
      end.join('-')
    end

    def get_date_parts_from_parts(year, month = nil, day = nil)
      { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
    end

    def get_iso8601_date(iso8601_time)
      return nil if iso8601_time.nil?

      iso8601_time[0..9]
    end

    def get_year_month(iso8601_time)
      return [] if iso8601_time.nil?

      year = iso8601_time[0..3]
      month = iso8601_time[5..6]

      [year.to_i, month.to_i].reject { |part| part == 0 }
    end

    def get_year_month_day(iso8601_time)
      return [] if iso8601_time.nil?

      year = iso8601_time[0..3]
      month = iso8601_time[5..6]
      day = iso8601_time[8..9]

      [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }
    end

    # parsing of incomplete iso8601 timestamps such as 2015-04 is broken
    # in standard library
    # return nil if invalid iso8601 timestamp
    def get_datetime_from_iso8601(iso8601_time)
      ISO8601::DateTime.new(iso8601_time).to_time.utc
    rescue StandardError
      nil
    end

    # strip milliseconds if there is a time, as it interferes with edtc parsing
    # keep dates unchanged
    def strip_milliseconds(iso8601_time)
      return iso8601_time.split(' ').first if iso8601_time.to_s.include? ' '

      return iso8601_time.split('.').first + 'Z' if iso8601_time.to_s.include? '.'

      iso8601_time
    end

    # iso8601 datetime without hyphens and colons, used by Crossref
    # return nil if invalid
    def get_datetime_from_time(time)
      DateTime.strptime(time.to_s, '%Y%m%d%H%M%S').strftime('%Y-%m-%dT%H:%M:%SZ')
    rescue ArgumentError
      nil
    end

    def get_date(dates, date_type)
      dd = Array.wrap(dates).find { |d| d['dateType'] == date_type } || {}
      dd.fetch('date', nil)
    end

    def get_contributor(contributor, contributor_type)
      contributor.select { |c| c['contributorType'] == contributor_type }
    end

    def get_identifier(identifiers, identifier_type)
      id = Array.wrap(identifiers).find { |i| i['identifierType'] == identifier_type } || {}
      id.fetch('identifier', nil)
    end

    def get_identifier_type(identifier_type)
      return nil unless identifier_type.present?

      identifierTypes = {
        'ark' => 'ARK',
        'arxiv' => 'arXiv',
        'bibcode' => 'bibcode',
        'doi' => 'DOI',
        'ean13' => 'EAN13',
        'eissn' => 'EISSN',
        'handle' => 'Handle',
        'igsn' => 'IGSN',
        'isbn' => 'ISBN',
        'issn' => 'ISSN',
        'istc' => 'ISTC',
        'lissn' => 'LISSN',
        'lsid' => 'LSID',
        'pmid' => 'PMID',
        'purl' => 'PURL',
        'upc' => 'UPC',
        'url' => 'URL',
        'urn' => 'URN',
        'md5' => 'md5',
        'minid' => 'minid',
        'dataguid' => 'dataguid'
      }

      identifierTypes[identifier_type.downcase] || identifier_type
    end

    def get_series_information(str)
      return {} unless str.present?

      str = str.split(',').map(&:strip)

      title = str.first
      volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
      volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
      issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
      pages = str.length > 1 ? str.last : nil
      first_page = pages.present? ? pages.split('-').map(&:strip)[0] : nil
      last_page = pages.present? ? pages.split('-').map(&:strip)[1] : nil

      {
        'title' => title,
        'volume' => volume,
        'issue' => issue,
        'firstPage' => first_page,
        'lastPage' => last_page
      }.compact
    end

    def jsonlint(json)
      return ['No JSON provided'] unless json.present?

      error_array = []
      linter = JsonLint::Linter.new
      linter.send(:check_data, json, error_array)
      error_array
    end

    def name_to_spdx(name)
      spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json',
                                                  __dir__))).fetch('licenses')
      license = spdx.find do |l|
        l['name'] == name || l['licenseId'] == name || l['seeAlso'].first == normalize_cc_url(name)
      end

      if license
        {
          'rights' => license['name'],
          'rightsUri' => license['seeAlso'].first,
          'rightsIdentifier' => license['licenseId'].downcase,
          'rightsIdentifierScheme' => 'SPDX',
          'schemeUri' => 'https://spdx.org/licenses/'
        }.compact
      else
        { 'rights' => name }
      end
    end

    def hsh_to_spdx(hsh)
      spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json',
                                                  __dir__))).fetch('licenses')
      license = spdx.find do |l|
        l['licenseId'].casecmp?(hsh['rightsIdentifier']) || l['seeAlso'].first == normalize_cc_url(hsh['rightsURI']) || l['name'] == hsh['rights'] || l['seeAlso'].first == normalize_cc_url(hsh['rights'])
      end

      if license
        {
          'rights' => license['name'],
          'rightsUri' => license['seeAlso'].first,
          'rightsIdentifier' => license['licenseId'].downcase,
          'rightsIdentifierScheme' => 'SPDX',
          'schemeUri' => 'https://spdx.org/licenses/',
          'lang' => hsh['lang']
        }.compact
      else
        {
          'rights' => hsh['__content__'] || hsh['rights'],
          'rightsUri' => hsh['rightsURI'] || hsh['rightsUri'],
          'rightsIdentifier' => hsh['rightsIdentifier'].present? ? hsh['rightsIdentifier'].downcase : nil,
          'rightsIdentifierScheme' => hsh['rightsIdentifierScheme'],
          'schemeUri' => hsh['schemeUri'],
          'lang' => hsh['lang']
        }.compact
      end
    end

    def name_to_fos(name)
      # first find subject in Fields of Science (OECD)
      fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
                                                 __dir__))).fetch('fosFields')

      subject = fos.find { |l| l['fosLabel'] == name || 'FOS: ' + l['fosLabel'] == name }

      if subject
        return [{
          'subject' => sanitize(name).downcase
        },
                {
                  'subject' => 'FOS: ' + subject['fosLabel'],
                  'subjectScheme' => 'Fields of Science and Technology (FOS)',
                  'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
                }]
      end

      # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
      # and map to Fields of Science. Add an extra entry for the latter
      fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
                                                   __dir__)))
      for_fields = fores.fetch('forFields')
      for_disciplines = fores.fetch('forDisciplines')

      subject = for_fields.find { |l| l['forLabel'] == name } ||
                for_disciplines.find { |l| l['forLabel'] == name }

      if subject
        [{
          'subject' => sanitize(name).downcase
        },
         {
           'subject' => 'FOS: ' + subject['fosLabel'],
           'subjectScheme' => 'Fields of Science and Technology (FOS)',
           'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
         }]
      else
        [{ 'subject' => sanitize(name).downcase }]
      end
    end

    def hsh_to_fos(hsh)
      # first find subject in Fields of Science (OECD)
      fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
                                                 __dir__))).fetch('fosFields')
      subject = fos.find do |l|
        l['fosLabel'] == hsh['__content__'] || 'FOS: ' + l['fosLabel'] == hsh['__content__'] || l['fosLabel'] == hsh['subject']
      end

      if subject
        return [{
          'subject' => sanitize(hsh['__content__'] || hsh['subject']),
          'subjectScheme' => hsh['subjectScheme'],
          'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
          'valueUri' => hsh['valueURI'] || hsh['valueUri'],
          'classificationCode' => hsh['classificationCode'],
          'lang' => hsh['lang']
        }.compact,
                {
                  'subject' => 'FOS: ' + subject['fosLabel'],
                  'subjectScheme' => 'Fields of Science and Technology (FOS)',
                  'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
                }.compact]
      end

      # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
      # and map to Fields of Science. Add an extra entry for the latter
      fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
                                                   __dir__)))
      for_fields = fores.fetch('forFields')
      for_disciplines = fores.fetch('forDisciplines')

      # try to extract forId
      if hsh['subjectScheme'] == 'FOR'
        for_id = hsh['__content__'].to_s.split(' ').first || hsh['subject'].to_s.split(' ').first
        for_id = for_id.rjust(6, '0')

        subject = for_fields.find { |l| l['forId'] == for_id } ||
                  for_disciplines.find { |l| l['forId'] == for_id[0..3] }
      else
        subject = for_fields.find do |l|
          l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject']
        end ||
                  for_disciplines.find do |l|
                    l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject']
                  end
      end

      if subject
        [{
          'subject' => sanitize(hsh['__content__'] || hsh['subject']),
          'subjectScheme' => hsh['subjectScheme'],
          'classificationCode' => hsh['classificationCode'],
          'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
          'valueUri' => hsh['valueURI'] || hsh['valueUri'],
          'lang' => hsh['lang']
        }.compact,
         {
           'subject' => 'FOS: ' + subject['fosLabel'],
           'subjectScheme' => 'Fields of Science and Technology (FOS)',
           'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
         }]
      else
        [{
          'subject' => sanitize(hsh['__content__'] || hsh['subject']),
          'subjectScheme' => hsh['subjectScheme'],
          'classificationCode' => hsh['classificationCode'],
          'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
          'valueUri' => hsh['valueURI'] || hsh['valueUri'],
          'lang' => hsh['lang']
        }.compact]
      end
    end

    def encode_doi(prefix)
      random_int = SecureRandom.random_number(2**63..(2**64) - 1)
      suffix = Base32::URL.encode(random_int)
      str = "#{suffix[0, 7]}-#{suffix[6, 7]}"
      "https://doi.org/#{prefix}/#{str}"
    end

    def decode_doi(doi)
      suffix = doi.split('/', 5).last
      Base32::URL.decode(suffix)
    end
  end
end