spec/cli_spec.rb in lederhosen-1.8.2 vs spec/cli_spec.rb in lederhosen-2.0.0

- old
+ new

@@ -23,37 +23,89 @@ $?.success?.should be_true File.exists?(File.join($test_dir, 'clusters.uc')).should be_false end it 'can cluster reads using usearch' do - `./bin/lederhosen cluster --input spec/data/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.95 --output #{$test_dir}/clusters.uc` + `./bin/lederhosen cluster --input spec/data/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.99 --output #{$test_dir}/clusters.uc` $?.success?.should be_true File.exists?(File.join($test_dir, 'clusters.uc')).should be_true end - it 'should build abundance matrices for each level' do - levels = "domain phylum class order FAMILY genus Species" - `./bin/lederhosen otu_table --files=spec/data/test.uc --prefix=#{$test_dir}/otu_table --levels=#{levels}` + it 'can separate unclassified reads from usearch output' do + `./bin/lederhosen separate_unclassified --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.fasta` $?.success?.should be_true + unclassified_results = File.readlines("spec/data/test.uc") + .select { |x| x =~ /^N/ } + .size + unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta") + .select { |x| x =~ /^>/ } + .size + + unclassified_results.should == unclassified_reads end + + it 'can separate unclassified reads from usearch output using strict pairing' do + `./bin/lederhosen separate_unclassified --strict=genus --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.strict_genus.fasta` + $?.success?.should be_true + File.readlines("#{$test_dir}/unclassified.strict_genus.fasta") + .select { |x| x =~ /^>/ } + .size.should be_even + end - it 'should filter OTU abundance matrices' do - `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.species.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1` + it 'can create taxonomy count tables' do + `./bin/lederhosen count_taxonomies --input=spec/data/test.uc --output=#{$test_dir}/taxonomy_count.txt` $?.success?.should be_true + File.exists?(File.join($test_dir, 'taxonomy_count.txt')).should be_true end - it 'should combine OTU abundance matrices' do - `./bin/lederhosen join_otu_tables --input=#{$test_dir}/otu_table*.csv --output=#{$test_dir}/merged.csv` + it 'generates taxonomy tables w/ comma-free taxonomic descriptions' do + File.readlines(File.join($test_dir, 'taxonomy_count.txt')) + .map(&:strip) + .map { |x| x.count(',') } + .uniq + .should == [1] + end + + %w{domain phylum class order family genus species}.each do |level| + it "generates taxonomy tables only counting pairs that agree at level: #{level}" do + `./bin/lederhosen count_taxonomies --input=spec/data/test.uc --output=#{$test_dir}/taxonomy_count.strict.#{level}.txt --strict=#{level}` + $?.success?.should be_true + + lines = File.readlines(File.join($test_dir, "taxonomy_count.strict.#{level}.txt")) + + # make sure total number of reads is even + # requires that there should be an odd number if classification is not strict + lines.select { |x| !(x =~ /^#/) } + .map(&:strip) + .map { |x| x.split(',') } + .map(&:last) + .map(&:to_i) + .inject(:+).should be_even + end + end + + %w{domain phylum class order family genus species}.each do |level| + it "should create OTU abundance matrices from taxonomy count tables at level: #{level}" do + `./bin/lederhosen otu_table --files=#{$test_dir}/taxonomy_count.strict.*.txt --level=#{level} --output=#{$test_dir}/otus_genus.strict.csv` + $?.success?.should be_true + end + end + + it 'should filter OTU abundance matrices' do + # TODO + # filtering should move filtered reads to 'unclassified_reads' so that we maintain + # our knowledge of depth of coverage throughout + # this makes normalization better later. + `./bin/lederhosen otu_filter --input=#{$test_dir}/otus_genus.strict.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1` $?.success?.should be_true end it 'should split a fasta file into smaller fasta files (optionally gzipped)' do `./bin/lederhosen split_fasta --input=spec/data/trimmed/ILT_L_9_B_001.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100` $?.success?.should be_true end it 'should print representative sequences from uc files' do `./bin/lederhosen get_reps --input=#{$test_dir}/clusters.uc --database=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/representatives.fasta` + $?.success?.should be_true end - - it 'should create a fasta file containing representative reads for each cluster' end