require 'typhoeus'

module Unipept::Commands
  class Uniprot
    attr_reader :root_command
    attr_reader :valid_formats

    valid_formats = Set.new %w[fasta txt xml rdf gff sequence]
    @root_command = Cri::Command.define do
      name 'uniprot'
      summary 'Command line interface to UniProt web services.'
      usage 'uniprot [options]'
      description <<-EOS
      The uniprot command fetches UniProt entries from the UniProt web services. The command expects a list of UniProt Accession Numbers that are passed

      - as separate command line arguments

      - to standard input

      The command will give priority to the first way UniProt Accession Numbers are passed, in the order as listed above. The standard input should have one UniProt Accession Number per line.

      The uniprot command yields just the protein sequences as a default, but can return several formats.
      EOS
      required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ') (default: sequence)'
      flag :h, :help, 'show help for this command' do |_value, cmd|
        puts cmd.help
        exit 0
      end
      run do |opts, args, _cmd|
        format = opts.fetch(:format, 'sequence')
        unless valid_formats.include? format
          warn format + ' is not a valid output format. Available formats are: ' + valid_formats.to_a.join(', ')
          exit 1
        end
        iterator = args.empty? ? $stdin.each_line : args
        iterator.each do |accession|
          puts Uniprot.get_uniprot_entry(accession.chomp, format)
        end
      end
    end

    # Invokes the uniprot command-line tool with the given arguments.
    #
    # @param [Array<String>] args An array of command-line arguments
    #
    # @return [void]
    def self.run(args)
      @root_command.run(args)
    end

    # Fetches a UniProt entry from the UniProt website with the given accession
    # number in the requested format.
    #
    # @param [String] accession The accession number of the record to fetch
    #
    # @param [String] format The format of of the record. If the format is 'sequence', the sequence will be returned in as a single line
    #
    # @return [String] The requested UniProt entry in the requested format
    def self.get_uniprot_entry(accession, format)
      if format == 'sequence'
        get_uniprot_entry(accession, 'fasta').lines.map(&:chomp)[1..-1].join('')
      else
        # other format has been specified, just download and output
        resp = Typhoeus.get("https://www.uniprot.org/uniprot/#{accession}.#{format}")
        resp.response_body if resp.success?
      end
    end
  end
end