#! /usr/bin/env ruby # # gemma-wrapper # Author:: Pjotr Prins # License:: GPL3 # # Copyright (C) 2017,2018 Pjotr Prins USAGE = " GEMMA wrapper example: Simple caching of K computation with gemma-wrapper -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -gk LOCO K computation with caching and JSON output gemma-wrapper --json \\ --loco 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -a test/data/input/BXD_snps.txt \\ -gk -debug > K.json LMM's using the K's captured in K.json using the --input switch gemma-wrapper --json --loco --input K.json -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -c test/data/input/BXD_covariates2.txt \\ -a test/data/input/BXD_snps.txt \\ -lmm 2 -maf 0.1 \\ -debug > GWA.json Gemma gets used from the path. You can override by setting env GEMMA_COMMAND=path/bin/gemma gemma-wrapper ... " # These are used for testing compatibility GEMMA_V_MAJOR = 98 GEMMA_V_MINOR = 0 basepath = File.dirname(File.dirname(__FILE__)) $: << File.join(basepath,'lib') VERSION_FILENAME=File.join(basepath,'VERSION') version = File.new(VERSION_FILENAME).read.chomp # ---- GEMMA path gemma_command = ENV['GEMMA_COMMAND'] # Look for gemma if not gemma_command ENV['PATH'].split(':').each do | path | try_bin = path + '/' + 'gemma' if File.executable?(try_bin) gemma_command = try_bin break end end end require 'fileutils' require 'optparse' require 'tmpdir' require 'tempfile' split_at = ARGV.index('--') if split_at gemma_args = ARGV[split_at+1..-1] end options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir() } opts = OptionParser.new do |o| o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]" o.on('--permutate n', Integer, 'Permutate # times by shuffling phenotypes') do |lst| options[:permutate] = lst options[:force] = true end o.on('--permute-phenotypes filen',String, 'Phenotypes to be shuffled in permutations') do |phenotypes| options[:permute_phenotypes] = phenotypes raise "Phenotype input file #{phenotypes} does not exist" if !File.exist?(phenotypes) end o.on('--loco [x,y,1,2,3...]', Array, 'Run full LOCO') do |lst| options[:loco] = lst end o.on('--input filen',String, 'JSON input variables (used for LOCO)') do |filen| options[:input] = filen raise "JSON input file #{filen} does not exist" if !File.exist?(filen) end o.on('--cache-dir path',String, 'Use a cache directory') do |path| options[:cache_dir] = path end o.on('--json', 'Create output file in JSON format') do |b| options[:json] = b end o.on("--force", "Force computation (override cache)") do |q| options[:force] = true end o.on("--q", "--quiet", "Run quietly") do |q| options[:quiet] = true end o.on("-v", "--verbose", "Run verbosely") do |v| options[:verbose] = true end o.on("--debug", "Show debug messages and keep intermediate output") do |v| options[:debug] = true end o.on('--','Anything after gets passed to GEMMA') do o.terminate() end o.separator "" o.on_tail('-h', '--help', 'display this help and exit') do options[:show_help] = true end end opts.parse!(ARGV) # ---- Output handlers OUTPUT = (options[:json] ? $stderr : $stdout ) record = { warnings: [], errno: 0, debug: [] } require 'json' json_out = lambda do print record.to_json if options[:json] end # ---- Some error handlers error = lambda do |*msg| if options[:json] record[:error] = *msg.join(" ") record[:errno] = 1 json_out.call end raise *msg end debug = lambda do |*msg| if options[:debug] record[:debug].push *msg.join("") if options[:json] OUTPUT.print "DEBUG: ",*msg,"\n" end end warning = lambda do |*msg| record[:warnings].push *msg.join("") OUTPUT.print "WARNING: ",*msg,"\n" end info = lambda do |*msg| record[:debug].push *msg.join("") if options[:json] and options[:debug] OUTPUT.print *msg,"\n" if !options[:quiet] end # ---- Start banner GEMMA_K_VERSION=version GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017,2018\n" info.call GEMMA_K_BANNER # Check gemma version GEMMA_COMMAND=options[:gemma_command] gemma_version_header = `#{GEMMA_COMMAND}`.split("\n").grep(/GEMMA|Version/)[0].strip info.call "Using ",gemma_version_header,"\n" gemma_version = gemma_version_header.split(/[,\s]+/)[1] v_version, v_major, v_minor = gemma_version.split(".") info.call "Found #{gemma_version}, comparing against expected v0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}" warning.call "GEMMA version is out of date. Update GEMMA to 0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}!" if v_major.to_i < GEMMA_V_MAJOR or (v_major.to_i == GEMMA_V_MAJOR and (v_minor != nil and v_minor.to_i < GEMMA_V_MINOR)) options[:gemma_version_header] = gemma_version_header options[:gemma_version] = gemma_version if options[:show_help] or gemma_args == nil print opts print USAGE exit 1 end if RUBY_VERSION =~ /^1/ warning "runs on Ruby 2.x only\n" end DO_COMPUTE_KINSHIP = gemma_args.include?("-gk") DO_COMPUTE_GWA = !DO_COMPUTE_KINSHIP # ---- Compute HASH on inputs hashme = [] geno_idx = gemma_args.index '-g' raise "Expected GEMMA -g genotype file switch" if geno_idx == nil pheno_idx = gemma_args.index '-p' hashme = if DO_COMPUTE_KINSHIP and pheno_idx != nil p [pheno_idx,gemma_args[pheno_idx+2..-1]] gemma_args[0..pheno_idx-1] + gemma_args[pheno_idx+2..-1] else gemma_args end if DO_COMPUTE_GWA raise "Did not expect GEMMA -p phenotype file switch" if pheno_idx hashme += ['-p', options[:permute_phenotypes]] if options[:permute_phenotypes] end require 'digest/sha1' debug.call "Hashing on ",hashme,"\n" hashes = [] hashme.each do | item | if File.exist?(item) hashes << Digest::SHA1.hexdigest(File.read(item)) debug.call [item,hashes.last] else hashes << item end end HASH = Digest::SHA1.hexdigest hashes.join(' ') options[:hash] = HASH # Create cache dir FileUtils::mkdir_p options[:cache_dir] error.call "Do not use the GEMMA -o switch!" if gemma_args.include? '-o' error.call "Do not use the GEMMA -outdir switch!" if gemma_args.include? '-outdir' gemma_args << '-outdir' gemma_args << options[:cache_dir] GEMMA_ARGS = gemma_args debug.call "Options: ",options,"\n" if !options[:quiet] invoke_gemma = lambda do |extra_args, cache_hit = false| cmd="#{GEMMA_COMMAND} #{GEMMA_ARGS.join(' ')} #{extra_args.join(' ')}" record[:gemma_command] = cmd return if cache_hit # debug.call cmd errno = if options[:json] # capture output err = 0 IO.popen(cmd) do |io| while s = io.gets $stderr.print s end io.close err = $?.to_i end err else debug.call("Invoking ",cmd) if options[:debug] system(cmd) $?.exitstatus end if errno != 0 debug.call "Gemma exit ",errno record[:errno] = errno json_out.call raise "exit on GEMMA error #{errno}" end end # returns datafn, logfn, cache_hit cache = lambda do | chr, ext | inject = (chr==nil ? "" : ".#{chr}" )+ext hashi = (chr==nil ? HASH : HASH+inject) prefix = options[:cache_dir]+'/'+hashi logfn = prefix+".log.txt" datafn = prefix+ext record[:files] ||= [] record[:files].push [chr,logfn,datafn] if !options[:force] if File.exist? logfn and File.exist? datafn if File.read(logfn).include? "total computation time" record[:cache_hit] = true info.call "#{logfn} CACHE HIT!\n" return hashi, true end end end return hashi,false end # ---- Compute K kinship = lambda do | chr = nil | record[:type] = "K" ext = case (GEMMA_ARGS[GEMMA_ARGS.index('-gk')+1]).to_i when 0,1 then '.cXX.txt' when 2 then '.sXX.txt' else error.call "Unknown kinship type" end hashi, cache_hit = cache.call chr,ext if not cache_hit if chr != nil invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit else invoke_gemma.call [ '-o', HASH ], cache_hit end end end # ---- Run GWA gwas = lambda do | chr, kfn, pfn | record[:type] = "GWA" error.call "Do not use the GEMMA -k switch with gemma-wrapper - it is automatic!" if GEMMA_ARGS.include? '-k' # K is automatic hashi, cache_hit = cache.call chr,".assoc.txt" if not cache_hit args = [ '-k', kfn, '-o', hashi ] args << [ '-loco', chr ] if chr != nil args << [ '-p', pfn ] if pfn invoke_gemma.call args end end LOCO = options[:loco] # if GEMMA_ARGS.include? '-gk' if DO_COMPUTE_KINSHIP # compute K info.call LOCO if LOCO != nil LOCO.each do |chr| info.call "LOCO for ",chr kinship.call(chr) end else kinship.call # no LOCO end else # DO_COMPUTE_GWA json_in = JSON.parse(File.read(options[:input])) raise "JSON problem, file #{options[:input]} is not -gk derived" if json_in["type"] != "K" pfn = options[:phenotypes] # can be nil k_files = json_in["files"].map { |rec| [rec[0],rec[2]] } k_files.each do | chr, kfn | # call a GWA for each chromosome gwas.call(chr,kfn,pfn) end # Permute if options[:permutate] ps = [] raise "You should supply --phenotype with gemma-wrapper --permutate" if not pfn File.foreach(pfn).with_index do |line, line_num| ps << line end score_list = [] debug.call(options[:permutate],"x permutations") (1..options[:permutate]).each do |i| $stderr.print "Iteration ",i,"\n" # Create a shuffled phenotype file file = File.open("phenotypes-#{i}","w") tmp_pfn = file.path p tmp_pfn ps.shuffle.each do | l | file.print(l) end file.close k_files.each do | chr, kfn | # call a GWA for each chromosome gwas.call(chr,kfn,tmp_pfn) end # p [:HEY,record[:files].last] assocfn = record[:files].last[2] debug.call("Reading ",assocfn) score_min = 1000.0 File.foreach(assocfn).with_index do |assoc, assoc_line_num| if assoc_line_num > 0 value = assoc.strip.split(/\t/).last.to_f score_min = value if value < score_min end end score_list << score_min end ls = score_list.sort p ls significant = ls[(ls.size - ls.size*0.95).floor] suggestive = ls[(ls.size - ls.size*0.67).floor] p ["95 percentile (significant) ",significant,(-Math.log10(significant)).round(1)] p ["67 percentile (suggestive) ",suggestive,(-Math.log10(suggestive)).round(1)] exit 0 end end json_out.call exit 0