Sha256: 29c86006992d11b09eceeccfab83a97f9ba3e4d4991522e46ef0565f1b51073f
Contents?: true
Size: 1.47 KB
Versions: 1
Compression:
Stored size: 1.47 KB
Contents
require 'set' module Lederhosen class CLI desc 'get_reps', 'get representative reads for a uc file' method_option :input, :type => :string, :required => true method_option :database, :type => :string, :required => true method_option :output, :type => :string, :required => true def get_reps inputs = Dir[options[:input]] database = File.expand_path(options[:database]) output = File.expand_path(options[:output]) taxa = Set.new ohai "getting representative database sequences from #{database} using #{inputs.size} cluster file(s) and saving to #{output}" # parse uc file, get list of taxa we need to get # full sequences for from the database pbar = ProgressBar.new 'reading uc(s)', inputs.size inputs.each do |input| File.open(input) do |handle| pbar.inc handle.each do |line| header = parse_usearch_line(line.strip) taxa << header[:original] rescue nil end end end pbar.finish ohai "found #{taxa.size} representative sequences" # print representative sequences from database output = File.open(output, 'w') kept = 0 File.open(database) do |handle| Dna.new(handle).each do |record| if taxa.include? record.name output.puts record kept += 1 end end end output.close ohai "saved #{kept} representatives" end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
lederhosen-1.2.5 | lib/lederhosen/tasks/get_reps.rb |