#!/usr/bin/env ruby # # BioRuby vcf plugin # Author:: Pjotr Prins # # Copyright (C) 2014 Pjotr Prins USAGE = "Vcf parser" gempath = File.dirname(File.dirname(__FILE__)) $: << File.join(gempath,'lib') VERSION_FILENAME=File.join(gempath,'VERSION') version = File.new(VERSION_FILENAME).read.chomp require 'bio-vcf' require 'optparse' # Uncomment when using the bio-logger # require 'bio-logger' # log = Bio::Log::LoggerPlus.new 'vcf' # log.outputters = Bio::Log::Outputter.stderr # Bio::Log::CLI.logger('stderr') # Bio::Log::CLI.trace('info') options = { show_help: false} opts = OptionParser.new do |o| o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} < test/data/input/somaticsniper.vcf" o.on('--filter cmd',String, 'Evaluate filter on each record') do |cmd| options[:filter] = cmd end o.on('-e cmd', '--eval cmd',String, 'Evaluate command on each record') do |cmd| options[:eval] = cmd end o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd| options[:eval_once] = true options[:eval] = cmd end o.on("--rdf", "Generate Turtle RDF") do |b| require 'bio-vcf/vcfrdf' options[:rdf] = true end o.on_tail("--id name", String, "Identifier") do |s| options[:id] = s end o.on_tail("--tags list", String, "Add tags") do |s| options[:tags] = eval(s) end # Uncomment the following when using the bio-logger # o.separator "" # o.on("--logger filename",String,"Log to file (default stderr)") do | name | # Bio::Log::CLI.logger(name) # end # # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s | # Bio::Log::CLI.trace(s) # end # o.on("-q", "--quiet", "Run quietly") do |q| # Bio::Log::CLI.trace('error') options[:quiet] = true end # # o.on("-v", "--verbose", "Run verbosely") do |v| # Bio::Log::CLI.trace('info') # end # # o.on("--debug", "Show debug messages") do |v| # Bio::Log::CLI.trace('debug') # end o.separator "" o.on_tail('-h', '--help', 'display this help and exit') do options[:show_help] = true end end include BioVcf begin opts.parse!(ARGV) $stderr.print "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet] if options[:show_help] print opts print USAGE exit 1 end $stderr.print "Options: ",options,"\n" if !options[:quiet] header = VcfHeader.new header_out = false STDIN.each_line do | line | if line =~ /^##fileformat=/ # ---- We have a new file header header = VcfHeader.new header.add(line) STDIN.each_line do | headerline | if headerline !~ /^#/ line = headerline break # end of header end header.add(headerline) end end # ---- Parse VCF record line # fields = VcfLine.parse(line,header.columns) fields = VcfLine.parse(line) rec = VcfRecord.new(fields,header) if !options[:filter] or (options[:filter] and eval(options[:filter])) if options[:eval] print eval(options[:eval]) exit(1) if options[:eval_once] else if options[:rdf] # Output Turtle RDF if not header_out VcfRdf::header header_out = true end VcfRdf::record(options[:id],rec,options[:tags]) else # Default behaviour prints VCF line print fields.join("\t") end end print "\n" end end end