bin/gemma-wrapper in bio-gemma-wrapper-0.97 vs bin/gemma-wrapper in bio-gemma-wrapper-0.97.1

- old
+ new

@@ -4,11 +4,12 @@ # Author:: Pjotr Prins # License:: GPL3 # # Copyright (C) 2017,2018 Pjotr Prins <pjotr.prins@thebird.nl> -USAGE = "GEMMA wrapper example: +USAGE = " +GEMMA wrapper example: Simple caching of K computation with gemma-wrapper -- \\ -g test/data/input/BXD_geno.txt.gz \\ @@ -61,21 +62,32 @@ end require 'fileutils' require 'optparse' require 'tmpdir' +require 'tempfile' split_at = ARGV.index('--') if split_at gemma_args = ARGV[split_at+1..-1] end options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir() } opts = OptionParser.new do |o| - o.banner = "Usage: #{File.basename($0)} [options] -- [gemma-options]" + o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]" + o.on('--permutate n', Integer, 'Permutate by shuffling phenotypes') do |lst| + options[:permutate] = lst + options[:force] = true + end + + o.on('--phenotypes filen',String, 'Phenotypes to be shuffled in permutations') do |phenotypes| + options[:phenotypes] = phenotypes + raise "Phenotype input file #{phenotypes} does not exist" if !File.exist?(phenotypes) + end + o.on('--loco [x,y,1,2,3...]', Array, 'Run full LOCO') do |lst| options[:loco] = lst end o.on('--input filen',String, 'JSON input variables (used for LOCO)') do |filen| @@ -89,11 +101,11 @@ o.on('--json', 'Create output file in JSON format') do |b| options[:json] = b end - o.on("--force", "Force computation") do |q| + o.on("--force", "Force computation (override cache)") do |q| options[:force] = true end o.on("--q", "--quiet", "Run quietly") do |q| options[:quiet] = true @@ -128,41 +140,46 @@ json_out = lambda do print record.to_json if options[:json] end +# ---- Some error handlers error = lambda do |*msg| if options[:json] record[:error] = *msg.join(" ") record[:errno] = 1 json_out.call end raise *msg end + debug = lambda do |*msg| if options[:debug] record[:debug].push *msg.join("") if options[:json] OUTPUT.print "DEBUG: ",*msg,"\n" end end + warning = lambda do |*msg| record[:warnings].push *msg.join("") OUTPUT.print "WARNING: ",*msg,"\n" end info = lambda do |*msg| record[:debug].push *msg.join("") if options[:json] and options[:debug] OUTPUT.print *msg,"\n" if !options[:quiet] end +# ---- Start banner + GEMMA_K_VERSION=version GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017,2018\n" info.call GEMMA_K_BANNER # Check gemma version GEMMA_COMMAND=options[:gemma_command] gemma_version_header = `#{GEMMA_COMMAND}`.split("\n").grep(/GEMMA|Version/)[0].strip -info.call "Using GEMMA ",gemma_version_header,"\n" +info.call "Using ",gemma_version_header,"\n" gemma_version = gemma_version_header.split(/[,\s]+/)[1] v_version, v_major, v_minor = gemma_version.split(".") info.call "Found #{gemma_version}, comparing against expected v0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}" warning.call "GEMMA version is out of date. Update GEMMA to 0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}!" if v_major.to_i < GEMMA_V_MAJOR or (v_major.to_i == GEMMA_V_MAJOR and (v_minor != nil and v_minor.to_i < GEMMA_V_MINOR)) @@ -183,10 +200,11 @@ # ---- Compute HASH on inputs hashme = [] geno_idx = gemma_args.index '-g' raise "Expected GEMMA -g switch" if geno_idx == nil hashme = gemma_args +hashme += ['-p', options[:phenotypes]] if options[:phenotypes] require 'digest/sha1' debug.call "Hashing on ",hashme,"\n" hashes = [] hashme.each do | item | @@ -228,10 +246,11 @@ io.close err = $?.to_i end err else + debug.call("Invoking ",cmd) if options[:debug] system(cmd) $?.exitstatus end if errno != 0 debug.call "Gemma exit ",errno @@ -242,11 +261,11 @@ end # returns datafn, logfn, cache_hit cache = lambda do | chr, ext | inject = (chr==nil ? "" : ".#{chr}" )+ext - hashi = HASH+inject + hashi = (chr==nil ? HASH : HASH+inject) prefix = options[:cache_dir]+'/'+hashi logfn = prefix+".log.txt" datafn = prefix+ext record[:files] ||= [] record[:files].push [chr,logfn,datafn] @@ -260,10 +279,11 @@ end end return hashi,false end +# ---- Compute K kinship = lambda do | chr = nil | record[:type] = "K" ext = case (GEMMA_ARGS[GEMMA_ARGS.index('-gk')+1]).to_i when 0,1 then '.cXX.txt' when 2 then '.sXX.txt' @@ -277,20 +297,20 @@ invoke_gemma.call [ '-o', HASH ], cache_hit end end end -gwas = lambda do | chr, kfn | +# ---- Run GWA +gwas = lambda do | chr, kfn, pfn | record[:type] = "GWA" - error.call "Do not use the GEMMA -k switch!" if GEMMA_ARGS.include? '-k' + error.call "Do not use the GEMMA -k switch with gemma-wrapper!" if GEMMA_ARGS.include? '-k' # K is automatic hashi, cache_hit = cache.call chr,".assoc.txt" if not cache_hit - if chr != nil - invoke_gemma.call [ '-loco', chr, '-k', kfn, '-o', hashi ] - else - error.call "Not supported" - end + args = [ '-k', kfn, '-o', hashi ] + args << [ '-loco', chr ] if chr != nil + args << [ '-p', pfn ] if pfn + invoke_gemma.call args end end LOCO = options[:loco] if GEMMA_ARGS.include? '-gk' @@ -306,12 +326,56 @@ end else # GWAS json_in = JSON.parse(File.read(options[:input])) raise "JSON problem, file #{options[:input]} is not -gk derived" if json_in["type"] != "K" + + pfn = options[:phenotypes] # can be nil k_files = json_in["files"].map { |rec| [rec[0],rec[2]] } - k_files.each do | chr, kfn | - gwas.call(chr,kfn) + k_files.each do | chr, kfn | # call a GWA for each chromosome + gwas.call(chr,kfn,pfn) + end + # Permute + if options[:permutate] + ps = [] + raise "You should supply --phenotype with gemma-wrapper --permutate" if not pfn + File.foreach(pfn).with_index do |line, line_num| + ps << line + end + score_list = [] + debug.call(options[:permutate],"x permutations") + (1..options[:permutate]).each do |i| + $stderr.print "Iteration ",i,"\n" + # Create a shuffled phenotype file + file = File.open("phenotypes-#{i}","w") + tmp_pfn = file.path + p tmp_pfn + ps.shuffle.each do | l | + file.print(l) + end + file.close + k_files.each do | chr, kfn | # call a GWA for each chromosome + gwas.call(chr,kfn,tmp_pfn) + end + # p [:HEY,record[:files].last] + assocfn = record[:files].last[2] + debug.call("Reading ",assocfn) + score_min = 1000.0 + File.foreach(assocfn).with_index do |assoc, assoc_line_num| + if assoc_line_num > 0 + value = assoc.strip.split(/\t/).last.to_f + score_min = value if value < score_min + end + end + score_list << score_min + end + ls = score_list.sort + p ls + significant = ls[(ls.size - ls.size*0.95).floor] + suggestive = ls[(ls.size - ls.size*0.67).floor] + p ["95 percentile (significant) ",significant,(-Math.log10(significant)).round(1)] + p ["67 percentile (suggestive) ",suggestive,(-Math.log10(suggestive)).round(1)] + exit 0 end end json_out.call exit 0