#!/usr/bin/ruby # This is my second attempt at writing a simple interface for messing with # fasta files. Acheiving simplicity (and power) is challenging. It usually # only happens on the second (or sometimes more) try. Of course, in # retrospect the simple solution seems sooo obvious. But its deceptive. # It takes work to acheive simplicity for complex tasks. That's my thought # for the day. # fasta_shaker as in a salt shaker. Shake up your fasta proteins and let them # season your dinner (hopefully a protein dinner). Mmmm. Don't they taste # good all mixed up? If you want, you can think of it as a pepper shaker. # I don't usually comment on my scripts (in my script, anyway), but this one # came out so nice and clean that I feel like I have room to spare. require 'fasta' require 'cmdparse' opt = {} opts = OptionParser.new do |op| prog = File.basename(__FILE__) op.banner = "usage: #{prog} [OPTIONS] .fasta" op.separator " = reverse | shuffle" op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v } op.on("-o", "--out ", "name of output file (default is descriptive)") {|v| opt[:out] = v } op.on("-p", "--prefix ", "give a header prefix to modified prots") {|v| opt[:prefix] = v } op.on("-f", "--fraction ", "creates some fraction of proteins") {|v| opt[:fraction] = v } op.separator " [if fraction > 1 then the tag 'f_' prefixed to proteins" op.separator " (after any given prefix) so that proteins are unique]" op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v } op.separator "EXAMPLES: " op.separator " #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta" op.separator " #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta" op.separator " #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta" op.separator " #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta" end opts.parse! if ARGV.size < 2 puts opts exit end (method, file) = ARGV if opt[:cat] && !opt[:prefix] puts "WARNING: concatenated proteins don't have unique headers" puts "[you probably wanted to use the '--prefix' option!]" end # OUT filename: unless opt[:out] filebase = file.sub(/\..*$/,'') parts = [filebase] parts << 'cat' if opt[:cat] parts << method parts << 'prefix' << opt[:prefix] if opt[:prefix] parts << 'fraction' << opt[:fraction] if opt[:fraction] parts << 'tryptic_peptides' if opt[:tryptic_peptides] opt[:out] = parts.join("_") << ".fasta" end ## READ the file fasta = Fasta.new.read_file(file) ## CAT (save an original copy) fasta_orig = fasta.dup if opt[:cat] ## FRACTION the proteins if f = opt[:fraction] prefix = nil f = f.to_f if f > 1.0 prefix = proc {|cnt| "f#{cnt}_" } end fasta = fasta.fraction_of_prots(f, prefix) end ## PREFIX the proteins if pre = opt[:prefix] fasta.header_prefix!(pre) end ## MODIFY the proteins fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides]) ## CAT (finish it up) if opt[:cat] fasta_orig << fasta fasta = fasta_orig end ## WRITE out the file fasta.write_file(opt[:out])