#!/usr/bin/env ruby require 'optparse' require 'methadone' require 'fasta_read' class App include Methadone::Main include Methadone::CLILogging main do |assembly, chromosome, cstart, cend| path = "#{assembly}/#{snp_or_unmasked(options)[0]}/#{chromosome + snp_or_unmasked(options)[1]}.fa" exit_now_with!(assembly_or_chromosome(assembly, chromosome)) unless File.exist?(path) fasta = IO.read(path) debug("About to read #{assembly} #{snp_or_unmasked(options)[0]} chr#{chromosome}:#{cstart}-#{cend}") sequence = FastaRead::Sequence.new(fasta, chromosome, cstart, cend).process if options[:output] File.open(options[:output], "w") do |file| file.puts sequence end else puts sequence end info "#{sequence.length} base pairs" end def self.snp_or_unmasked(options) options[:snp] ? ["snp", ".subst"] : ["unmasked", ""] end def self.assembly_or_chromosome(assembly, chromosome) Dir.exist?("#{assembly}") ? [chromosome, "chromosome"] : [assembly, "assembly"] end def self.exit_now_with!(value_and_argument) exit_now!(1, "the '#{value_and_argument[0]}' #{value_and_argument[1]} doesn't exist in directory structure") end # supplemental methods here # Declare command-line interface here description <<-EOF Extract DNA Fasta sequence from assembly files. The program depends on being run at the top of a directory tree containing .fa files. The .fa files should be of the form where each file maps to a single chomosome. The directory tree will have separate branches for SNPs and unmasked files. For example, provided the following tree the command expects to be run inside the 'fasta' directory: └── fasta ├── hg19 │   ├── snp │   │   ├── chr1.subst.fa | │   ├── chr2.subst.fa | │   ├── chr3.subst.fa | │   └── .... │   └── unmasked │      ├── chr1.fa |    ├── chr2.fa |    ├── chr3.fa |    └── .... └── mm10 ├── snp │   ├── chr1.subst.fa │   ├── chr2.subst.fa │   ├── chr3.subst.fa │   └── .... └── unmasked       ├── chr1.fa    ├── chr2.fa    ├── chr3.fa    └── .... EOF # # Accept flags via: # on("--flag VAL","Some flag") # options[flag] will contain VAL # # Specify switches via: on "-o OUTPUTFILE", "--output", "outputs sequence to a file" on "--snp", "return the sequence from the SNP-masked assembly" # on("--[no-]switch","Some switch") # # Or, just call OptionParser methods on opts # # Require an argument arg :assembly, "assembly name (hg19, mm10, etc.)" arg :chromosome, "id of chromosome (chr1-chr22 or chrx/chry)" arg :cstart, "start coordinate (inclusive) within the chromosome" arg :cend, "end coordinate within the chromosome" # # # Make an argument optional # arg :optional_arg, :optional version FastaRead::VERSION use_log_level_option go! end