Sha256: 3f6d4baf0fb9e8fe7f172cdaae8a682379217c2c3b50ec003a2e542dc92803c0
Contents?: true
Size: 1.23 KB
Versions: 1
Compression:
Stored size: 1.23 KB
Contents
require 'set' module Lederhosen class CLI desc 'get_reps', 'get representative reads for a uc file' method_option :input, :type => :string, :required => true method_option :database, :type => :string, :required => true method_option :output, :type => :string, :required => true def get_reps input = options[:input] database = options[:database] output = options[:output] taxa = Set.new ohai "getting representative database sequences from #{database} using #{input} clusters and saving to #{output}" # parse uc file, get list of taxa we need to get # full sequences for from the database File.open(input).each do |line| header = parse_usearch_line(line.strip) taxa << header[:original] rescue nil end ohai "found #{taxa.size} representative sequences" # print representative sequences from database output = File.open(output, 'w') kept = 0 File.open(database) do |handle| Dna.new(handle).each do |record| if taxa.include? record.name output.puts record kept += 1 end end end output.close ohai "saved #{kept} representatives" end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
lederhosen-1.2.0 | lib/lederhosen/tasks/get_reps.rb |