bin/gemma-wrapper in bio-gemma-wrapper-0.97 vs bin/gemma-wrapper in bio-gemma-wrapper-0.97.1
- old
+ new
@@ -4,11 +4,12 @@
# Author:: Pjotr Prins
# License:: GPL3
#
# Copyright (C) 2017,2018 Pjotr Prins <pjotr.prins@thebird.nl>
-USAGE = "GEMMA wrapper example:
+USAGE = "
+GEMMA wrapper example:
Simple caching of K computation with
gemma-wrapper -- \\
-g test/data/input/BXD_geno.txt.gz \\
@@ -61,21 +62,32 @@
end
require 'fileutils'
require 'optparse'
require 'tmpdir'
+require 'tempfile'
split_at = ARGV.index('--')
if split_at
gemma_args = ARGV[split_at+1..-1]
end
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir() }
opts = OptionParser.new do |o|
- o.banner = "Usage: #{File.basename($0)} [options] -- [gemma-options]"
+ o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
+ o.on('--permutate n', Integer, 'Permutate by shuffling phenotypes') do |lst|
+ options[:permutate] = lst
+ options[:force] = true
+ end
+
+ o.on('--phenotypes filen',String, 'Phenotypes to be shuffled in permutations') do |phenotypes|
+ options[:phenotypes] = phenotypes
+ raise "Phenotype input file #{phenotypes} does not exist" if !File.exist?(phenotypes)
+ end
+
o.on('--loco [x,y,1,2,3...]', Array, 'Run full LOCO') do |lst|
options[:loco] = lst
end
o.on('--input filen',String, 'JSON input variables (used for LOCO)') do |filen|
@@ -89,11 +101,11 @@
o.on('--json', 'Create output file in JSON format') do |b|
options[:json] = b
end
- o.on("--force", "Force computation") do |q|
+ o.on("--force", "Force computation (override cache)") do |q|
options[:force] = true
end
o.on("--q", "--quiet", "Run quietly") do |q|
options[:quiet] = true
@@ -128,41 +140,46 @@
json_out = lambda do
print record.to_json if options[:json]
end
+# ---- Some error handlers
error = lambda do |*msg|
if options[:json]
record[:error] = *msg.join(" ")
record[:errno] = 1
json_out.call
end
raise *msg
end
+
debug = lambda do |*msg|
if options[:debug]
record[:debug].push *msg.join("") if options[:json]
OUTPUT.print "DEBUG: ",*msg,"\n"
end
end
+
warning = lambda do |*msg|
record[:warnings].push *msg.join("")
OUTPUT.print "WARNING: ",*msg,"\n"
end
info = lambda do |*msg|
record[:debug].push *msg.join("") if options[:json] and options[:debug]
OUTPUT.print *msg,"\n" if !options[:quiet]
end
+# ---- Start banner
+
GEMMA_K_VERSION=version
GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017,2018\n"
info.call GEMMA_K_BANNER
# Check gemma version
GEMMA_COMMAND=options[:gemma_command]
gemma_version_header = `#{GEMMA_COMMAND}`.split("\n").grep(/GEMMA|Version/)[0].strip
-info.call "Using GEMMA ",gemma_version_header,"\n"
+info.call "Using ",gemma_version_header,"\n"
gemma_version = gemma_version_header.split(/[,\s]+/)[1]
v_version, v_major, v_minor = gemma_version.split(".")
info.call "Found #{gemma_version}, comparing against expected v0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}"
warning.call "GEMMA version is out of date. Update GEMMA to 0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}!" if v_major.to_i < GEMMA_V_MAJOR or (v_major.to_i == GEMMA_V_MAJOR and (v_minor != nil and v_minor.to_i < GEMMA_V_MINOR))
@@ -183,10 +200,11 @@
# ---- Compute HASH on inputs
hashme = []
geno_idx = gemma_args.index '-g'
raise "Expected GEMMA -g switch" if geno_idx == nil
hashme = gemma_args
+hashme += ['-p', options[:phenotypes]] if options[:phenotypes]
require 'digest/sha1'
debug.call "Hashing on ",hashme,"\n"
hashes = []
hashme.each do | item |
@@ -228,10 +246,11 @@
io.close
err = $?.to_i
end
err
else
+ debug.call("Invoking ",cmd) if options[:debug]
system(cmd)
$?.exitstatus
end
if errno != 0
debug.call "Gemma exit ",errno
@@ -242,11 +261,11 @@
end
# returns datafn, logfn, cache_hit
cache = lambda do | chr, ext |
inject = (chr==nil ? "" : ".#{chr}" )+ext
- hashi = HASH+inject
+ hashi = (chr==nil ? HASH : HASH+inject)
prefix = options[:cache_dir]+'/'+hashi
logfn = prefix+".log.txt"
datafn = prefix+ext
record[:files] ||= []
record[:files].push [chr,logfn,datafn]
@@ -260,10 +279,11 @@
end
end
return hashi,false
end
+# ---- Compute K
kinship = lambda do | chr = nil |
record[:type] = "K"
ext = case (GEMMA_ARGS[GEMMA_ARGS.index('-gk')+1]).to_i
when 0,1 then '.cXX.txt'
when 2 then '.sXX.txt'
@@ -277,20 +297,20 @@
invoke_gemma.call [ '-o', HASH ], cache_hit
end
end
end
-gwas = lambda do | chr, kfn |
+# ---- Run GWA
+gwas = lambda do | chr, kfn, pfn |
record[:type] = "GWA"
- error.call "Do not use the GEMMA -k switch!" if GEMMA_ARGS.include? '-k'
+ error.call "Do not use the GEMMA -k switch with gemma-wrapper!" if GEMMA_ARGS.include? '-k' # K is automatic
hashi, cache_hit = cache.call chr,".assoc.txt"
if not cache_hit
- if chr != nil
- invoke_gemma.call [ '-loco', chr, '-k', kfn, '-o', hashi ]
- else
- error.call "Not supported"
- end
+ args = [ '-k', kfn, '-o', hashi ]
+ args << [ '-loco', chr ] if chr != nil
+ args << [ '-p', pfn ] if pfn
+ invoke_gemma.call args
end
end
LOCO = options[:loco]
if GEMMA_ARGS.include? '-gk'
@@ -306,12 +326,56 @@
end
else
# GWAS
json_in = JSON.parse(File.read(options[:input]))
raise "JSON problem, file #{options[:input]} is not -gk derived" if json_in["type"] != "K"
+
+ pfn = options[:phenotypes] # can be nil
k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
- k_files.each do | chr, kfn |
- gwas.call(chr,kfn)
+ k_files.each do | chr, kfn | # call a GWA for each chromosome
+ gwas.call(chr,kfn,pfn)
+ end
+ # Permute
+ if options[:permutate]
+ ps = []
+ raise "You should supply --phenotype with gemma-wrapper --permutate" if not pfn
+ File.foreach(pfn).with_index do |line, line_num|
+ ps << line
+ end
+ score_list = []
+ debug.call(options[:permutate],"x permutations")
+ (1..options[:permutate]).each do |i|
+ $stderr.print "Iteration ",i,"\n"
+ # Create a shuffled phenotype file
+ file = File.open("phenotypes-#{i}","w")
+ tmp_pfn = file.path
+ p tmp_pfn
+ ps.shuffle.each do | l |
+ file.print(l)
+ end
+ file.close
+ k_files.each do | chr, kfn | # call a GWA for each chromosome
+ gwas.call(chr,kfn,tmp_pfn)
+ end
+ # p [:HEY,record[:files].last]
+ assocfn = record[:files].last[2]
+ debug.call("Reading ",assocfn)
+ score_min = 1000.0
+ File.foreach(assocfn).with_index do |assoc, assoc_line_num|
+ if assoc_line_num > 0
+ value = assoc.strip.split(/\t/).last.to_f
+ score_min = value if value < score_min
+ end
+ end
+ score_list << score_min
+ end
+ ls = score_list.sort
+ p ls
+ significant = ls[(ls.size - ls.size*0.95).floor]
+ suggestive = ls[(ls.size - ls.size*0.67).floor]
+ p ["95 percentile (significant) ",significant,(-Math.log10(significant)).round(1)]
+ p ["67 percentile (suggestive) ",suggestive,(-Math.log10(suggestive)).round(1)]
+ exit 0
end
end
json_out.call
exit 0