#!/usr/bin/env ruby require 'optparse' ############################################# ### FUNCTIONS ############################################# def create_fln_hash(path) fln_hash = {} file = File.open(path, 'r').each do |line| fields = line.chomp.split fln_hash[fields[1]] = fields[0].to_i end fln_hash['<=200seqs'] = fln_hash['good_seqs'] - fln_hash['sequences_>200'] fln_hash['>200seqs'] = fln_hash['sequences_>200'] - fln_hash['sequences_>500'] fln_hash['<=200unk'] = fln_hash['unknown'] - fln_hash['unknown_>200'] fln_hash['>200unk'] = fln_hash['unknown_>200'] - fln_hash['unknown_>500'] fln_hash['<=200cod'] = fln_hash['coding'] - fln_hash['coding_>200'] fln_hash['>200cod'] = fln_hash['coding_>200'] - fln_hash['coding_>500'] fln_hash['no_match_db'] = fln_hash['coding'] + fln_hash['unknown'] return fln_hash end def graph_table(fln_hash, output, graph_type, header_titles, categories_names, keywords, stacked_cols, titles = nil) table = [] cmd = basic_plot_command(graph_type) cmd << "set output '#{output}.png'\n" if fln_hash.class.to_s == 'Array' table << header(fln_hash.length, header_titles) table.concat(categories(categories_names)) cmd << 'plot ' count = 0 fln_hash.each_with_index do |hash,i| table = fill_table(hash, table, keywords, stacked_cols, graph_type) if i == 0 first = TRUE else first = FALSE end cmd << histogram(stacked_cols, output,titles[i], count, graph_type, first) if i < fln_hash.length-1 cmd << "\\\n" end if !graph_type.include?('clustered') count += stacked_cols else count += 1 end end else table << header(1, header_titles) table.concat(categories(categories_names)) table = fill_table(fln_hash, table, keywords, stacked_cols, graph_type) cmd << 'plot '+ histogram(stacked_cols, output, '', 0, graph_type, TRUE) end if table.length ==2 #Dummie row for rowstacked graph with a only category table << table[1].dup table[2].each_with_index do |cell, i| if i== 0 table[2][i] = '&' else table[2][i] = 0 end end end cmd.chop! write_table(table, output) write_cmd(cmd) system('gnuplot cmd.dem') end def histogram(columns, file, name, add, graph_type, first) cmd = "" if first cmd << "newhistogram \"#{name}\", '#{file}' using 2:xtic(1) t col," else cmd << "newhistogram \"#{name}\", '' using #{2 + add}:xtic(1) t col," end if !graph_type.include?('clustered') (columns-1).times do |col| cmd << " '' u #{3+col+add} t col," end end return cmd end def write_cmd(cmd) if File.exists?('cmd.dem') File.delete('cmd.dem') end file = File.open('cmd.dem', 'a') file.puts cmd file.close end def header(iterations, header_titles) header = ['Clasification'] iterations.times do header_titles.each do |title| header << title end end return header end def categories(cat) array_cat = cat.split(' ').map{|name| [name]} return array_cat end def fill_table(fln_hash, table, keywords, stacked_cols,graph_type) series = 0 keywords.each_with_index do |key, i| if graph_type.include?('clustered') row = i +1 -stacked_cols*series if (i+1) % stacked_cols == 0 series +=1 end else row = i/stacked_cols + 1 end value = fln_hash[key] if value.nil? value = 0 end if table.length == 2 table[1] << value else table[row] << value end end return table end def write_table(table, file_name) file_table = File.open(file_name, 'w') table.each do |line| file_table.puts line.join(' ') end file_table.close end def basic_plot_command(graph_type) cmd = '' if graph_type.include?('clustered') cmd << "unset key\n" else cmd << "set key under nobox\n" end cmd << "set style data histogram\n" cmd << "set style histogram #{graph_type} title offset 2,0.25\n" cmd << "set style fill solid noborder\n" cmd << "set boxwidth 0.95\n" cmd << "unset xtics\n" cmd << "set xtics nomirror rotate by -45 scale 0\n" cmd << "set xlabel \" \" offset 0,-2\n" cmd << "set ylabel \"Num sequences\"\n" cmd << "set ytics\n" cmd << "set grid y\n" cmd << "set auto y\n" cmd << "set terminal png nocrop enhanced font arial 15 size 1000,600\n" return cmd end def parse_file(file) titles = [] paths =[] File.open(file,'r').each do |line| fields = line.chomp.split("\t") if !fields[0].nil? titles << fields[0] end if !fields[1].nil? paths << fields[1] end end return titles, paths end ########################################################################################## ## OPTIONS ########################################################################################## options = {} optparse = OptionParser.new do |opts| options[:file]='samples' opts.on( '-f', '--file FILE', 'Path to FLN execution') do |file| options[:file]=file end options[:path] = File.join('fln_results','summary_stats.txt') opts.on( '-p', '--path PATH', 'Path to FLN different FLN results' ) do |path| options[:path] = File.join(path,'fln_results','summary_stats.txt') end # Set a banner, displayed at the top of the help screen. opts.banner = "Usage: plot_fln.rb [-p PATH || -f FILE] \n\n" # This displays the help screen opts.on( '-h', '--help', 'Display this screen' ) do puts opts exit end end # End opts # parse options and remove from ARGV optparse.parse! ########################################################################################## ## MAIN ########################################################################################## if File.exists?(options[:path]) fln_hash = create_fln_hash(options[:path]) end if File.exists?(options[:file]) titles, paths = parse_file(options[:file]) fln_hash = [] paths.each do |path| fln_hash << create_fln_hash(File.join(path,'fln_results','summary_stats.txt')) end end graph_table( fln_hash, 'status_report_table', 'rowstacked', %w{Sure Putative}, 'Complete N-terminal C-terminal Internal NcRNA Coding Unknown', %w{complete_sure complete_putative n_terminal_sure n_terminal_putative c_terminal_sure c_terminal_putative internal internal_putative ncrna ncrna_putative coding_sure coding_putative unknown unknown_putative}, 2, titles) graph_table( fln_hash, 'assembly_table', 'rowstacked', %w{<=200nt >200nt >500nt}, 'Unigenes Coding Unknown', %w{<=200seqs >200seqs sequences_>500 <=200cod >200cod coding_>500 <=200unk >200unk unknown_>500}, 3, titles) graph_table( fln_hash, 'database_usage', 'clustered', %w{seqs}, 'UserDB SwissProt TrEMBL ncRNA None Diff-orthologues Complete Diff-complete', %w{userdb swissprot trembl ncrna no_match_db different_orthologues complete different_completes}, 8, titles) graph_table( fln_hash, 'artifacts', 'clustered', %w{seqs}, 'Misassembled Chimeras Other', %w{misassembled chimeras other_artifacts}, 3, titles)