#! /usr/bin/env ruby # # gemma-wrapper # Author:: Pjotr Prins # License:: GPL3 # # Copyright (C) 2017 Pjotr Prins USAGE = "GEMMA wrapper example: Simple caching of K computation with gemma-wrapper -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -gk LOCO K computation with caching and JSON output gemma-wrapper --json \\ --loco 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -a test/data/input/BXD_snps.txt \\ -gk -debug > K.json LMM's using the K's captured in K.json using the --input switch gemma-wrapper --json --loco --input K.json -- \\ -g test/data/input/BXD_geno.txt.gz \\ -p test/data/input/BXD_pheno.txt \\ -c test/data/input/BXD_covariates2.txt \\ -a test/data/input/BXD_snps.txt \\ -lmm 2 -maf 0.1 \\ -debug > GWA.json " GEMMA_V_MAJOR = 97 GEMMA_V_MINOR = 2 basepath = File.dirname(File.dirname(__FILE__)) $: << File.join(basepath,'lib') VERSION_FILENAME=File.join(basepath,'VERSION') version = File.new(VERSION_FILENAME).read.chomp # ---- GEMMA path gemma_command = ENV['GEMMA_COMMAND'] # Look for gemma if not gemma_command ENV['PATH'].split(':').each do | path | try_bin = path + '/' + 'gemma' if File.executable?(try_bin) gemma_command = try_bin break end end end require 'fileutils' require 'optparse' require 'tmpdir' split_at = ARGV.index('--') if split_at gemma_args = ARGV[split_at+1..-1] end options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir() } opts = OptionParser.new do |o| o.banner = "Usage: #{File.basename($0)} [options] -- [gemma-options]" o.on('--loco [x,y,1,2,3...]', Array, 'Run full LOCO') do |lst| options[:loco] = lst end o.on('--input filen',String, 'JSON input variables (used for LOCO)') do |filen| options[:input] = filen raise "JSON input file #{filen} does not exist" if !File.exist?(filen) end o.on('--cache-dir path',String, 'Use a cache directory') do |path| options[:cache_dir] = path end o.on('--json', 'Create output file in JSON format') do |b| options[:json] = b end o.on("--force", "Force computation") do |q| options[:force] = true end o.on("--q", "--quiet", "Run quietly") do |q| options[:quiet] = true end o.on("-v", "--verbose", "Run verbosely") do |v| options[:verbose] = true end o.on("--debug", "Show debug messages and keep intermediate output") do |v| options[:debug] = true end o.on('--','Anything after gets passed to GEMMA') do o.terminate() end o.separator "" o.on_tail('-h', '--help', 'display this help and exit') do options[:show_help] = true end end opts.parse!(ARGV) # ---- Output handlers OUTPUT = (options[:json] ? $stderr : $stdout ) record = { warnings: [], errno: 0, debug: [] } require 'json' json_out = lambda do print record.to_json if options[:json] end error = lambda do |*msg| if options[:json] record[:error] = *msg.join(" ") record[:errno] = 1 json_out.call end raise *msg end debug = lambda do |*msg| if options[:debug] record[:debug].push *msg.join("") if options[:json] OUTPUT.print "DEBUG: ",*msg,"\n" end end warning = lambda do |*msg| record[:warnings].push *msg.join("") OUTPUT.print "WARNING: ",*msg,"\n" end info = lambda do |*msg| record[:debug].push *msg.join("") if options[:json] and options[:debug] OUTPUT.print *msg,"\n" if !options[:quiet] end GEMMA_K_VERSION=version GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017\n" info.call GEMMA_K_BANNER # Check gemma version GEMMA_COMMAND=options[:gemma_command] gemma_version_header = `#{GEMMA_COMMAND}`.split("\n").grep(/Version/)[0].strip info.call "Using GEMMA ",gemma_version_header,"\n" gemma_version = gemma_version_header.split(/[,\s]+/)[1] v_version, v_major, v_minor = gemma_version.split(".") error.call "GEMMA version is out of date. Update GEMMA to 0.#{GEMMA_V_MAJOR}.#{GEMMA_V_MINOR}!" if v_major.to_i < GEMMA_V_MAJOR or (v_major.to_i == GEMMA_V_MAJOR and (v_minor == nil or v_minor.to_i < GEMMA_V_MINOR)) options[:gemma_version_header] = gemma_version_header options[:gemma_version] = gemma_version if options[:show_help] or gemma_args == nil print opts print USAGE exit 1 end if RUBY_VERSION =~ /^1/ warning "runs on Ruby 2.x only\n" end # ---- Compute HASH on inputs hashme = [] geno_idx = gemma_args.index '-g' raise "Expected GEMMA -g switch" if geno_idx == nil hashme = gemma_args require 'digest/sha1' debug.call "Hashing on ",hashme,"\n" hashes = [] hashme.each do | item | if File.exist?(item) hashes << Digest::SHA1.hexdigest(File.read(item)) debug.call [item,hashes.last] else hashes << item end end HASH = Digest::SHA1.hexdigest hashes.join(' ') options[:hash] = HASH # Create cache dir FileUtils::mkdir_p options[:cache_dir] error.call "Do not use the GEMMA -o switch!" if gemma_args.include? '-o' error.call "Do not use the GEMMA -outdir switch!" if gemma_args.include? '-outdir' gemma_args << '-outdir' gemma_args << options[:cache_dir] GEMMA_ARGS = gemma_args debug.call "Options: ",options,"\n" if !options[:quiet] invoke_gemma = lambda do |extra_args, cache_hit = false| cmd="#{GEMMA_COMMAND} #{GEMMA_ARGS.join(' ')} #{extra_args.join(' ')}" record[:gemma_command] = cmd return if cache_hit # debug.call cmd errno = if options[:json] # capture output err = 0 IO.popen(cmd) do |io| while s = io.gets $stderr.print s end io.close err = $?.to_i end err else system(cmd) $?.exitstatus end if errno != 0 debug.call "Gemma exit ",errno record[:errno] = errno json_out.call raise "exit on GEMMA error #{errno}" end end # returns datafn, logfn, cache_hit cache = lambda do | chr, ext | inject = (chr==nil ? "" : ".#{chr}" )+ext hashi = HASH+inject prefix = options[:cache_dir]+'/'+hashi logfn = prefix+".log.txt" datafn = prefix+ext record[:files] ||= [] record[:files].push [chr,logfn,datafn] if !options[:force] if File.exist? logfn and File.exist? datafn if File.read(logfn).include? "total computation time" record[:cache_hit] = true info.call "#{logfn} CACHE HIT!\n" return hashi, true end end end return hashi,false end kinship = lambda do | chr = nil | record[:type] = "K" ext = case (GEMMA_ARGS[GEMMA_ARGS.index('-gk')+1]).to_i when 0,1 then '.cXX.txt' when 2 then '.sXX.txt' else error.call "Unknown kinship type" end hashi, cache_hit = cache.call chr,ext if not cache_hit if chr != nil invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit else invoke_gemma.call [ '-o', HASH ], cache_hit end end end gwas = lambda do | chr, kfn | record[:type] = "GWA" error.call "Do not use the GEMMA -k switch!" if GEMMA_ARGS.include? '-k' hashi, cache_hit = cache.call chr,".assoc.txt" if not cache_hit if chr != nil invoke_gemma.call [ '-loco', chr, '-k', kfn, '-o', hashi ] else error.call "Not supported" end end end LOCO = options[:loco] if GEMMA_ARGS.include? '-gk' # compute K info.call LOCO if LOCO != nil LOCO.each do |chr| info.call "LOCO for ",chr kinship.call(chr) end else kinship.call # no LOCO end else # GWAS json_in = JSON.parse(File.read(options[:input])) raise "JSON problem, file #{options[:input]} is not -gk derived" if json_in["type"] != "K" k_files = json_in["files"].map { |rec| [rec[0],rec[2]] } k_files.each do | chr, kfn | gwas.call(chr,kfn) end end json_out.call exit 0