#!/usr/bin/env ruby require 'cri' require 'faraday' require 'yaml' require 'json' require 'fileutils' require_relative '../lib/unipept' class ApiRunner < Cri::CommandRunner def initialize(args, opts, cmd) super host = Unipept::Configuration.new['host'] if host.nil? || host.empty? puts "WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`" exit 1 end if !host.start_with? "http://" host = "http://#{host}" end @url = "#{host}/api/v1/#{mapping[cmd.name]}.json" end def mapping {'pept2taxa' => 'single', 'pept2lca' => 'lca'} end def input_iterator if options[:input] File.readlines(options[:input]).each else STDIN.each_line end end def batch_size 1000 end def url_options(sub_part) filter = options[:select] ? options[:select] : [] if filter.empty? names = true else names = filter.any? {|f| /.*name.*/.match f} end {:sequences => sub_part, :equate_il => options[:equate], :full_lineage => options[:lineage], :names => names, } end def run formatter = Unipept::Formatter.new_for_format(options[:format]) peptides = input_iterator filter_list = options[:select] ? options[:select] : [] filter_list = filter_list.map {|f| glob_to_regex(f) } output = STDOUT.tty? ? STDOUT : STDERR printed_header = false result = [] peptide_iterator(peptides) do |sub_division| begin sub_result = JSON[Faraday.post(@url, url_options(sub_division)).body] rescue STDERR.puts "API endpoint gave an error, exiting." exit 1 end sub_result = [sub_result] if not sub_result.kind_of? Array sub_result.map! {|r| r.select! {|k,v| filter_list.any? {|f| f.match k } } } if ! filter_list.empty? result << sub_result if ! printed_header write_to_output formatter.header(sub_result) printed_header = true end write_to_output formatter.format(sub_result) end begin download_xml(result) rescue STDERR.puts "Something went wrong while downloading xml information! please check the output" end end def write_to_output(string) if options[:output] File.open(options[:output], 'a') do |f| f.write string end else puts string end end def download_xml(result) if options[:xml] File.open(options[:xml] + ".xml", "wb") do |f| f.write Faraday.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map{|h| h['taxon_id'] }.join(",")}&retmode=xml").body end end end def peptide_iterator(peptides, &block) peptides.each_slice(batch_size, &block) end private def glob_to_regex(glob_string) # only implement * -> . for now Regexp.new glob_string.gsub("*", ".*") end end class Taxa2lca < ApiRunner def mapping {"taxa2lca" => "taxa2lca"} end def url_options(sub_part) {:taxon_ids => sub_part, :full_lineage => options[:lineage]} end def peptide_iterator(peptides, &block) block.call(peptides.to_a) end def batch_size raise "NOT NEEDED FOR TAXA2LCA" end end class Pept2prot < ApiRunner def mapping {"pept2prot" => "pept2pro"} end def url_options(sub_part) {:sequences => sub_part, :equate_il => options[:equate]} end def download_xml(result) if options[:xml] FileUtils.mkdir_p(options[:xml]) result.first.each do |prot| File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f| f.write Faraday.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").body end end end end end root_cmd = Cri::Command.new_basic_root.modify do name 'unipept' flag :v, :verbose, "verbose mode" option :i, :input, "input file", :argument => :required option :o, :output, "output file", :argument => :required option :f, :format, "output format (available: #{Unipept::Formatter.available.join "," }) (default: #{Unipept::Formatter.default})", :argument => :required end root_cmd.define_command('config') do usage 'config attr [value]' run do |opts, args, cmd| config = Unipept::Configuration.new if args.size > 1 config[args.first] = args[1] config.save elsif args.size == 1 puts config[args.first] end end end root_cmd.define_command('pept2taxa') do usage 'pept2taxa [options]' aliases :s summary 'Single Peptide Search' description 'Search Unipept for the given peptide and return taxons' flag :e, :equate, "equate I and L" option :s, :select, "select the attributes", :argument => :required, :multiple => true option :l, :lineage, "Show full lineage" option :x, :xml, "Download taxonomy from NCBI as xml (specify output filename)", :argument => :required runner ApiRunner end root_cmd.define_command('pept2lca') do usage 'pept2lca [options]' aliases :l summary 'Give lowest common ancestor for given peptide' description 'Search Unipept for the given peptide and return the lowest common ancestor' flag :e, :equate, "equate I and L" option :s, :select, "select the attributes", :argument => :required, :multiple => true option :l, :lineage, "Show full lineage" runner ApiRunner end root_cmd.define_command('taxa2lca') do usage 'taxa2lca [options]' aliases :t summary 'Give lowest common ancestor for taxon ids' description 'Search Unipept for the given taxon ids and return the lowest common ancestor' option :s, :select, "select the attributes", :argument => :required, :multiple => true option :l, :lineage, "Show full lineage" runner Taxa2lca end root_cmd.define_command('pept2prot') do usage 'pept2prot [options]' aliases :p summary 'Give protein information for given peptides' description 'Search Unipept for the given peptides and return the lowest common ancestor' flag :e, :equate, "equate I and L" option :s, :select, "select the attributes", :argument => :required, :multiple => true option :x, :xml, "download uniprot record in specified directory", :argument => :required runner Pept2prot end root_cmd.run(ARGV)