pipeline.sh in lederhosen-0.1.1 vs pipeline.sh in lederhosen-0.1.2
- old
+ new
@@ -1,37 +1,71 @@
#!/bin/bash
-set +e
-
# An example OTU clustering pipeline
# Austin G. Davis-Richardson
# <harekrishna at gmail dot com>
+# http://github.com/audy/lederhosen
-raw_reads='raw_reads/*.txt'
-identities='0.975'
+set -e
+
+raw_reads='spec/data/*.txt'
out_dir='pipeline'
+taxcollector='taxcollector.fa'
+min_reads=50
+min_samples=10
# trim reads
-bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
+bin/lederhosen trim \
+ --reads-dir=$raw_reads \
+ --out-dir=$out_dir/trimmed
# join reads
-bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
+bin/lederhosen join \
+ --trimmed=$out_dir/trimmed/*.fasta \
+ --output=$out_dir/joined.fasta
# filter reads
-bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
+bin/lederhosen k_filter \
+ --input=$out_dir/joined.fasta \
+ --output=$out_dir/filtered.fasta \
+ -k=10 \
+ --cutoff=50
# sort
-bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
+bin/lederhosen sort \
+ --input=$out_dir/filtered.fasta \
+ --output=$out_dir/sorted.fasta
-# cluster
-for i in $identities
+for i in 0.80 0.90 0.95
do
- bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
-done
+ # cluster
+ bin/lederhosen cluster \
+ --input=$out_dir/sorted.fasta \
+ --output=$out_dir/clusters_"$i".uc \
+ --identity=$i
-# generate otu tables
-for i in $identities
-do
- bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
+ # filter uc file
+ bin/lederhosen uc_filter \
+ --input=$out_dir/clusters_"$i".uc \
+ --output=$out_dir/clusters_"$i".uc.filtered \
+ --reads=$min_reads \
+ --samples=$min_samples \
+
+ # generate otu table
+ bin/lederhosen otu_table \
+ --clusters=$out_dir/clusters_"$i".uc.filtered \
+ --output=$out_dir/otus_"$i"
+
+ # get representative reads
+ bin/lederhosen rep_reads \
+ --clusters=$out_dir/clusters_"$i".uc.filtered \
+ --joined=$out_dir/sorted.fasta \
+ --output=$out_dir/representatives_"$i".fasta
+
+ # blast representative reads
+ bin/lederhosen name \
+ --reps=$out_dir/representatives_"$i".fasta \
+ --output=$out_dir/taxonomies_"$i".txt \
+ --database=$taxcollector
done
echo "complete!"
\ No newline at end of file