# frozen_string_literal: true

module Briard
  module Readers
    module DataciteReader
      def get_datacite(id: nil, **options)
        return { 'string' => nil, 'state' => 'not_found' } unless id.present?

        api_url = datacite_api_url(id, options)
        response = Maremma.get(api_url)
        attributes = response.body.dig('data', 'attributes')
        return { 'string' => nil, 'state' => 'not_found' } unless attributes.present?

        string = attributes.fetch('xml', nil)
        string = Base64.decode64(string) if string.present?

        if string.present?
          doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)

          # remove leading and trailing whitespace in text nodes
          doc.xpath('//text()').each do |node|
            if /\S/.match?(node.content)
              node.content = node.content.strip
            else
              node.remove
            end
          end
          string = doc.to_xml(indent: 2)
        end

        client = Array.wrap(response.body.fetch('included', nil)).find do |m|
          m['type'] == 'clients'
        end
        client_id = client.to_h.fetch('id', nil)
        provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
          m['provider'].present?
        end.to_h.dig('provider', 'data', 'id')

        content_url = attributes.fetch('contentUrl',
                                       nil) || Array.wrap(response.body.fetch('included',
                                                                              nil)).select do |m|
                                                 m['type'] == 'media'
                                               end.map do |m|
                                                 m.dig('attributes', 'url')
                                               end.compact

        { 'string' => string,
          'url' => attributes.fetch('url', nil),
          'state' => attributes.fetch('state', nil),
          'date_registered' => attributes.fetch('registered', nil),
          'date_updated' => attributes.fetch('updated', nil),
          'provider_id' => provider_id,
          'client_id' => client_id,
          'content_url' => content_url }
      end

      def read_datacite(string: nil, **options)
        read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
                                                                                   :sandbox, :validate, :ra))

        doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
        if read_options.present?
          schema_version = 'http://datacite.org/schema/kernel-4'
        else
          ns = doc.collect_namespaces.find { |_k, v| v.start_with?('http://datacite.org/schema/kernel') }
          schema_version = Array.wrap(ns).last || 'http://datacite.org/schema/kernel-4'
        end
        doc.remove_namespaces!
        string = doc.to_xml(indent: 2)

        meta = Maremma.from_xml(string).to_h.fetch('resource', {})

        # validate only when option is set, as this step is expensive and
        # not needed if XML comes from DataCite MDS
        if options[:validate]
          errors = datacite_errors(xml: string, schema_version: schema_version)
          return { 'errors' => errors } if errors.present?
        end

        id = if options[:doi]
               normalize_doi(options[:doi], sandbox: options[:sandbox])
             else
               normalize_doi(meta.dig('identifier', '__content__') || options[:id],
                             sandbox: options[:sandbox])
             end

        identifiers = Array.wrap(meta.dig('alternateIdentifiers', 'alternateIdentifier')).map do |r|
          if r['__content__'].present?
            { 'identifierType' => get_identifier_type(r['alternateIdentifierType']),
              'identifier' => r['__content__'] }
          end
        end.compact

        resource_type_general = meta.dig('resourceType', 'resourceTypeGeneral')
        resource_type = meta.dig('resourceType', '__content__')
        schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'CreativeWork'
        types = {
          'resourceTypeGeneral' => resource_type_general,
          'resourceType' => resource_type,
          'schemaOrg' => schema_org,
          'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || 'article',
          'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || 'misc',
          'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'GEN'
        }.compact

        titles = get_titles(meta)

        descriptions = Array.wrap(meta.dig('descriptions', 'description')).map do |r|
          if r.blank?
            nil
          elsif r.is_a?(String)
            { 'description' => sanitize(r), 'descriptionType' => 'Abstract' }
          elsif r.is_a?(Hash)
            { 'description' => sanitize(r['__content__']),
              'descriptionType' => r['descriptionType'], 'lang' => r['lang'] }.compact
          end
        end.compact
        rights_list = Array.wrap(meta.dig('rightsList', 'rights')).map do |r|
          if r.blank?
            nil
          elsif r.is_a?(String)
            name_to_spdx(r)
          elsif r.is_a?(Hash)
            hsh_to_spdx(r)
          end
        end.compact

        subjects = Array.wrap(meta.dig('subjects', 'subject')).reduce([]) do |sum, subject|
          case subject
          when String
            sum += name_to_fos(subject)
          when Hash
            sum += hsh_to_fos(subject)
          end

          sum
        end.uniq { |s| s['subject'] }

        dates = Array.wrap(meta.dig('dates', 'date')).map do |r|
          next unless r.is_a?(Hash) && date = sanitize(r['__content__']).presence

          next unless Date.edtf(date).present? || Briard::Utils::UNKNOWN_INFORMATION.key?(date)

          { 'date' => date,
            'dateType' => parse_attributes(r, content: 'dateType'),
            'dateInformation' => parse_attributes(r, content: 'dateInformation') }.compact
        end.compact
        if meta.fetch('publicationYear', nil).present? && get_date(dates, 'Issued').blank?
          dates << { 'date' => meta.fetch('publicationYear', nil), 'dateType' => 'Issued' }
        end
        sizes = Array.wrap(meta.dig('sizes', 'size')).map do |k|
          if k.blank?
            nil
          elsif k.is_a?(String)
            sanitize(k).presence
          elsif k.is_a?(Hash)
            sanitize(k['__content__']).presence
          end
        end.compact
        formats = Array.wrap(meta.dig('formats', 'format')).map do |k|
          if k.blank?
            nil
          elsif k.is_a?(String)
            sanitize(k).presence
          elsif k.is_a?(Hash)
            sanitize(k['__content__']).presence
          end
        end.compact
                       .map { |s| s.to_s.squish.presence }.compact
        funding_references = Array.wrap(meta.dig('fundingReferences',
                                                 'fundingReference')).compact.map do |fr|
          scheme_uri = parse_attributes(fr['funderIdentifier'], content: 'schemeURI')
          funder_identifier = parse_attributes(fr['funderIdentifier'])
          funder_identifier_type = parse_attributes(fr['funderIdentifier'],
                                                    content: 'funderIdentifierType')
          if funder_identifier_type != 'Other'
            funder_identifier = if !funder_identifier.to_s.start_with?('https://',
                                                                       'http://') && scheme_uri.present?
                                  normalize_id(scheme_uri + funder_identifier)
                                else
                                  normalize_id(funder_identifier)
                                end
          end

          {
            'funderName' => fr['funderName'],
            'funderIdentifier' => funder_identifier,
            'funderIdentifierType' => funder_identifier_type,
            'awardNumber' => parse_attributes(fr['awardNumber']),
            'awardUri' => parse_attributes(fr['awardNumber'], content: 'awardURI'),
            'awardTitle' => fr['awardTitle']
          }.compact
        end
        related_identifiers = Array.wrap(meta.dig('relatedIdentifiers',
                                                  'relatedIdentifier')).map do |ri|
          rid = if ri['relatedIdentifierType'] == 'DOI'
                  validate_doi(ri['__content__'].to_s.downcase)
                else
                  ri['__content__']
                end

          {
            'relatedIdentifier' => rid,
            'relatedIdentifierType' => ri['relatedIdentifierType'],
            'relationType' => ri['relationType'],
            'resourceTypeGeneral' => ri['resourceTypeGeneral'],
            'relatedMetadataScheme' => ri['relatedMetadataScheme'],
            'schemeUri' => ri['schemeURI'],
            'schemeType' => ri['schemeType']
          }.compact
        end

        related_items = Array.wrap(meta.dig('relatedItems', 'relatedItem')).map do |ri|
          rii = ri['relatedItemIdentifier']
          relatedItemIdentifier = nil
          if rii
            rid = if rii['relatedItemIdentifierType'] == 'DOI'
                    validate_doi(rii['__content__'].to_s.downcase)
                  else
                    rii['__content__']
                  end

            relatedItemIdentifier = {
              'relatedItemIdentifier' => rid,
              'relatedItemIdentifierType' => rii['relatedItemIdentifierType'],
              'relatedMetadataScheme' => rii['relatedMetadataScheme'],
              'schemeURI' => rii['schemeURI'],
              'schemeType' => rii['schemeType']
            }.compact
          end

          number = ri['number']
          if number.is_a?(String)
            number = number
            numberType = nil
          else
            number = ri.dig('number', '__content__')
            numberType = ri.dig('number', 'numberType')
          end

          a = {
            'relationType' => ri['relationType'],
            'relatedItemType' => ri['relatedItemType'],
            'relatedItemIdentifier' => relatedItemIdentifier,
            'creators' => get_authors(Array.wrap(ri.dig('creators', 'creator'))),
            'titles' => get_titles(ri),
            'publicationYear' => ri['publicationYear'],
            'volume' => ri['volume'],
            'issue' => ri['issue'],
            'number' => number,
            'numberType' => numberType,
            'firstPage' => ri['firstPage'],
            'lastPage' => ri['lastPage'],
            'publisher' => ri['publisher'],
            'edition' => ri['edition'],
            'contributors' => get_authors(Array.wrap(ri.dig('contributors', 'contributor')))
          }.compact
        end

        geo_locations = Array.wrap(meta.dig('geoLocations', 'geoLocation')).map do |gl|
          if !gl.is_a?(Hash) || gl['geoLocationPoint'].is_a?(String) || gl['geoLocationBox'].is_a?(String) || gl['geoLocationPolygon'].is_a?(String)
            nil
          else

            # Handle scenario where multiple geoLocationPolygons are allowed within a single geoLocation
            # we want to return an array if it's already an array (i.e. multiple geoLocationPolygons)
            # vs if it's singular just return the object
            # This is for backwards compatability to allow both scenarios.
            if gl['geoLocationPolygon'].is_a?(Array)
              geoLocationPolygon = gl['geoLocationPolygon'].map do |glp|
                Array.wrap(glp['polygonPoint']).map do |glpp|
                  { 'polygonPoint' => glpp }
                end.compact.presence
              end.compact.presence
            else
              geoLocationPolygon = Array.wrap(gl.dig('geoLocationPolygon',
                                                     'polygonPoint')).map do |glp|
                { 'polygonPoint' => glp }
              end.compact.presence
            end

            {
              'geoLocationPoint' => {
                'pointLatitude' => gl.dig('geoLocationPoint', 'pointLatitude'),
                'pointLongitude' => gl.dig('geoLocationPoint', 'pointLongitude')
              }.compact.presence,
              'geoLocationBox' => {
                'westBoundLongitude' => gl.dig('geoLocationBox', 'westBoundLongitude'),
                'eastBoundLongitude' => gl.dig('geoLocationBox', 'eastBoundLongitude'),
                'southBoundLatitude' => gl.dig('geoLocationBox', 'southBoundLatitude'),
                'northBoundLatitude' => gl.dig('geoLocationBox', 'northBoundLatitude')
              }.compact.presence,
              'geoLocationPolygon' => geoLocationPolygon,
              'geoLocationPlace' => parse_attributes(gl['geoLocationPlace'],
                                                     first: true).to_s.strip.presence
            }.compact
          end
        end.compact

        state = id.present? || read_options.present? ? 'findable' : 'not_found'

        { 'id' => id,
          'types' => types,
          'doi' => doi_from_url(id),
          'identifiers' => identifiers,
          'url' => options.fetch(:url, nil).to_s.strip.presence,
          'titles' => titles,
          'creators' => get_authors(Array.wrap(meta.dig('creators', 'creator'))),
          'contributors' => get_authors(Array.wrap(meta.dig('contributors', 'contributor'))),
          'container' => set_container(meta),
          'publisher' => parse_attributes(meta.fetch('publisher', nil),
                                          first: true).to_s.strip.presence,
          'agency' => 'datacite',
          'funding_references' => funding_references,
          'dates' => dates,
          'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
                                                 first: true).to_s.strip.presence,
          'descriptions' => descriptions,
          'rights_list' => Array.wrap(rights_list),
          'version_info' => meta.fetch('version', nil).to_s.presence,
          'subjects' => subjects,
          'language' => parse_attributes(meta.fetch('language', nil),
                                         first: true).to_s.strip.presence,
          'geo_locations' => geo_locations,
          'related_identifiers' => related_identifiers,
          'related_items' => related_items,
          'formats' => formats,
          'sizes' => sizes,
          'schema_version' => schema_version,
          'state' => state }.merge(read_options)
      end

      def set_container(meta)
        series_information = Array.wrap(meta.dig('descriptions', 'description')).find do |r|
          r['descriptionType'] == 'SeriesInformation'
        end.to_h.fetch('__content__', nil)
        si = get_series_information(series_information)

        is_part_of = Array.wrap(meta.dig('relatedIdentifiers', 'relatedIdentifier')).find do |ri|
          ri['relationType'] == 'IsPartOf'
        end.to_h

        if si['title'].present? || is_part_of.present?
          {
            'type' => if meta.dig('resourceType', 'resourceTypeGeneral') == 'Dataset'
                        'DataRepository'
                      else
                        'Series'
                      end,
            'identifier' => is_part_of['__content__'],
            'identifierType' => is_part_of['relatedIdentifierType'],
            'title' => si['title'],
            'volume' => si['volume'],
            'issue' => si['issue'],
            'firstPage' => si['firstPage'],
            'lastPage' => si['lastPage']
          }.compact
        else
          {}
        end
      end

      def get_titles(meta)
        Array.wrap(meta.dig('titles', 'title')).map do |r|
          if r.blank?
            nil
          elsif r.is_a?(String)
            { 'title' => sanitize(r) }
          else
            { 'title' => sanitize(r['__content__']), 'titleType' => r['titleType'],
              'lang' => r['lang'] }.compact
          end
        end.compact
      end
    end
  end
end