Class: Bio::BWA

Inherits:
Object
  • Object
show all
Extended by:
FFI::Library
Defined in:
lib/bio/bwa.rb,
lib/bio/bwa/library.rb

Overview

Author:

Defined Under Namespace

Classes: Library

Class Method Summary (collapse)

Class Method Details

+ (Object) build_args_for_BWA(args)

Note:

this method should not be called directly

Internal method to build argument list for BWA C functions



263
264
265
266
267
268
269
270
271
272
# File 'lib/bio/bwa.rb', line 263

def self.build_args_for_BWA(args)
  cmd_args = args.map do |arg|
    FFI::MemoryPointer.from_string(arg.to_s) # convert every parameters into a string and then into a memory pointer
  end
  exec_args = FFI::MemoryPointer.new(:pointer, cmd_args.length) # creating a pointer to an array of pointers
  cmd_args.each_with_index do |arg, i|
    exec_args[i].put_pointer(0, arg) # filling in the array of pointers
  end
  return exec_args
end

+ (Object) build_parameters(function_name, valid_params, params, last_params)

Note:

this method should not be called directly

Internal method to produce a correct parameter list for BWA functions



276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/bio/bwa.rb', line 276

def self.build_parameters(function_name,valid_params,params,last_params)
  args = [function_name]
  params.each_key do |k|
    raise ArgumentError, "Unknown parameter '#{k}'" unless valid_params.include?(k.to_s)
    unless last_params.include?(k) then # the last_params are required after the options for BWA functions
      args << "-#{k}"
      args << params[k] unless params[k] == true # skipping boolean values. just include the param name
    end
  end                
  last_params.each {|p| args << params[p]} # now adding the last_params so the parameter list is in the correct order
  return args
end

+ (Object) bwt2sa(params = {})

Generate SA file from BWT and Occ files

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the PAC file (REQUIRED)

  • (String) :file_out

    the name of the REV PAC (REQUIRED)



60
61
62
63
64
65
66
# File 'lib/bio/bwa.rb', line 60

def self.bwt2sa(params={})
  valid_params = %q(file_in file_out i)
  last_params = [:file_in,:file_out]
  check_mandatory(last_params, params)
  args = build_parameters("bwt2sa",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) bwtupdate(params = {})

Note:

this method overwrite existing BWT file

Convert a BWT file to the new BWT format

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the BWT file (REQUIRED)



36
37
38
39
40
41
42
# File 'lib/bio/bwa.rb', line 36

def self.bwtupdate(params={})
  valid_params = %w(file_in)
  last_params = [:file_in]
  check_mandatory(last_params, params)
  args = build_parameters("bwtupdate",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) call_BWA_function(args)

Note:

this method should not be called directly

Internal method to call the BWA C functions



256
257
258
259
# File 'lib/bio/bwa.rb', line 256

def self.call_BWA_function(args)
  c_args = build_args_for_BWA(args)  
  self.send("bwa_#{args[0]}".to_sym,args.size,c_args) # call the C function and pass the arguments size and parameters list (same as int argc, char *argv[])
end

+ (Object) change_arg_name(hash, key, new_key)

Note:

this method should not be called directly

Internal method used to change parameters name from Ruby to BWA functions



297
298
299
300
301
# File 'lib/bio/bwa.rb', line 297

def self.change_arg_name(hash,key,new_key)
  hash[new_key] = hash[key]
  hash.delete(key)
  return hash
end

+ (Object) check_mandatory(mandatory_params, params)

Note:

this method should not be called directly

Internal method to check if mandatory params have been set



291
292
293
# File 'lib/bio/bwa.rb', line 291

def self.check_mandatory(mandatory_params, params)
  mandatory_params.each {|mp| raise ArgumentError,"You must provide parameter '#{mp}'" unless params.include?(mp)}
end

+ (Object) fa2pac(params = {})

Convert a Fasta to Packed format

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the Fasta or FastQ file (REQUIRED)

  • (String) :prefix

    the prefix name for the PAC file



11
12
13
14
15
16
17
18
# File 'lib/bio/bwa.rb', line 11

def self.fa2pac(params={})
  valid_params = %q(file_in prefix)
  last_params = [:file_in, :prefix]
  mandatory_params = [:file_in]
  check_mandatory(mandatory_params, params)
  args = build_parameters("fa2pac",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) long_read_alignment(params = {})

Note:

Boolean arguments must be set to ‘true’

Run the alignment for long query sequences

Parameters:

  • (Hash) params (defaults to: {})

    Options

Options Hash (params):

  • (String) :file_in

    the FastQ file (REQUIRED)

  • (String) :prefix

    the prefix of the database index files (REQUIRED)

  • (String) :file_out

    the output of the alignment in SAM format (REQUIRED)

  • (Integer) :a

    score for a match [1]

  • (Integer) :b

    mismatch penalty [3]

  • (Integer) :q

    gap open penalty [5]

  • (Integer) :r

    gap extension penalty [2]

  • (Integer) :t

    number of threads [1]

  • (Integer) :w

    band width [50]

  • (Float) :m

    mask level [0.50]

  • (Integer) :T

    score threshold divided by a [30]

  • (Integer) :s

    maximum seeding interval size [3]

  • (Integer) :z

    Z-best [1]

  • (Integer) :N

    number of seeds to trigger reverse alignment [5]

  • (Float) :c

    coefficient of length-threshold adjustment [5.5]

  • (Boolean) :H

    in SAM output, use hard clipping rather than soft



203
204
205
206
207
208
209
210
211
# File 'lib/bio/bwa.rb', line 203

def self.long_read_alignment(params = {})
  valid_params = %w(q r a b t T w d z m y s c N H f prefix file_in)
  mandatory_params = [:prefix, :file_in, :file_out]
  last_params = [:prefix,:file_in]
  check_mandatory(mandatory_params, params)
  params = change_arg_name(params,:file_out,:f) if params[:file_out]  
  args = build_parameters("bwtsw2",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) make_index(params = {})

Note:

Boolean values must be set to ‘true’

Generate the BWT index for a Fasta database

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the Fasta file (REQUIRED)

  • (String) :p

    the prefix for the database files that will be generated [default is Fasta name]

  • (String) :a

    the algorithm to be used for indexing: ‘is’ (short database)[default] or ‘bwtsw’ (long database)

  • (Boolean) :c

    colorspace database index



75
76
77
78
79
80
81
82
83
# File 'lib/bio/bwa.rb', line 75

def self.make_index(params = {})
  valid_params = %w(file_in p a c)
  mandatory_params = [:file_in]
  last_params = [:file_in]
  check_mandatory(mandatory_params, params)
  params = change_arg_name(params,:prefix,:p) if params[:prefix]
  args = build_parameters("index",valid_params,params,last_params)        
  call_BWA_function(args)
end

+ (Object) pac2bwt(params = {})

Convert a Packed file format to Burrows-Wheeler Transform format

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the PAC file (REQUIRED)

  • (String) :file_out

    the name of the BWT file (REQUIRED)



24
25
26
27
28
29
30
# File 'lib/bio/bwa.rb', line 24

def self.pac2bwt(params={})
  valid_params = %q(file_in file_out)
  last_params = [:file_in,:file_out]
  check_mandatory(last_params, params)
  args = build_parameters("pac2bwt",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) pac_rev(params = {})

Generate reverse Packed format

Parameters:

  • (Hash) .

    params Options.

  • (Hash) params (defaults to: {})

    a customizable set of options

Options Hash (params):

  • (String) :file_in

    the PAC file (REQUIRED)

  • (String) :file_out

    the name of the REV PAC (REQUIRED)



48
49
50
51
52
53
54
# File 'lib/bio/bwa.rb', line 48

def self.pac_rev(params={})
  valid_params = %w(file_in file_out)
  last_params = [:file_in,:file_out]
  check_mandatory(last_params, params)
  args = build_parameters("pac_rev",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) sai_to_sam_paired(params = {})

Note:

Boolean values must be set to ‘true’

Convert the SAI alignment output into SAM format (paired ends)

Parameters:

  • (Hash) params (defaults to: {})

    Options

Options Hash (params):

  • (String) :prefix

    the prefix of the database index files (REQUIRED)

  • (Array) :sai

    the two alignment files in SAI format (REQUIRED)

  • (Array) :fastq

    the two fastq files (REQUIRED)

  • (Integer) :a

    maximum insert size [500]

  • (Integer) :o

    maximum occurrences for one end [100000]

  • (Integer) :n

    maximum hits to output for paired reads [3]

  • (Integer) :N

    maximum hits to output for discordant pairs [10]

  • (Float) :c

    prior of chimeric rate (lower bound) [1.0e-05]

  • (String) :r

    read group header line such as ’@RG\tID:foo\tSM:bar’

  • (Boolean) :P

    preload index into memory (for base-space reads only)

  • (Boolean) :s

    disable Smith-Waterman for the unmapped mate

  • (Boolean) :A

    disable insert size estimate (force :s)



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/bio/bwa.rb', line 162

def self.sai_to_sam_paired(params = {})
  valid_params = %w(a o s P n N c f A r prefix first_sai second_sai first_fastq second_fastq)
  mandatory_params = [:prefix, :sai, :fastq]
  last_params = [:prefix, :first_sai, :second_sai, :first_fastq, :second_fastq]
  check_mandatory(mandatory_params, params)
  params = change_arg_name(params,:file_out,:f) if params[:file_out]
  if params[:sai]
    raise ArgumentError,"you must provide an array with two SAI files!" unless params[:sai].is_a?(Array) and params[:sai].size == 2
    params[:first_sai] = params[:sai][0]
    params[:second_sai] = params[:sai][1]
    params.delete(:sai)
  end
  if params[:fastq]
    raise ArgumentError,"you must provide an array with two FastQ files!" unless params[:fastq].is_a?(Array) and params[:fastq].size == 2
    params[:first_fastq] = params[:fastq][0]
    params[:second_fastq] = params[:fastq][1]
    params.delete(:fastq)
  end
  args = build_parameters("sai2sam_pe",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) sai_to_sam_single(params = {})

Convert the SAI alignment output into SAM format (single end)

Parameters:

  • (Hash) params (defaults to: {})

    Options

Options Hash (params):

  • (String) :fastq

    the FastQ file (REQUIRED)

  • (String) :prefix

    the prefix of the database index files (REQUIRED)

  • (String) :sai

    the alignment file in SAI format (REQUIRED)

  • (String) :file_out

    the file name of the SAM output

  • (Integer) :n

    max_occ

  • (String) :r

    RG_line



136
137
138
139
140
141
142
143
144
# File 'lib/bio/bwa.rb', line 136

def self.sai_to_sam_single(params = {})
  valid_params = %w(n r fastq sai prefix f)
  mandatory_params = [:prefix,:sai,:fastq]
  last_params = [:prefix,:sai,:fastq]
  check_mandatory(mandatory_params, params)
  params = change_arg_name(params,:file_out,:f) if params[:file_out]
  args = build_parameters("sai2sam_se",valid_params,params,last_params)
  call_BWA_function(args)
end

+ (Object) short_read_alignment(params = {})

Note:

Boolean values must be set to ‘true’

Run the alignment for short query sequences

Parameters:

  • (Hash) params (defaults to: {})

    Options

Options Hash (params):

  • (String) :file_in

    the FastQ file (REQUIRED)

  • (String) :prefix

    the prefix of the database index files (REQUIRED)

  • (String) :file_out

    the output of the alignment in SAI format (REQUIRED)

  • (Integer) :n

    max #diff (int) or missing prob under 0.02 err rate (float) [0.04]

  • (Integer) :o

    maximum number or fraction of gap opens [1]

  • (Integer) :e

    maximum number of gap extensions, -1 for disabling long gaps [-1]

  • (Integer) :m

    maximum entries in the queue [2000000]

  • (Integer) :t

    number of threads [1]

  • (Integer) :M

    mismatch penalty [3]

  • (Integer) :O

    gap open penalty [11]

  • (Integer) :R

    stop searching when there are >INT equally best hits [30]

  • (Integer) :q

    quality threshold for read trimming down to 35bp [0]

  • (Integer) :B

    length of barcode

  • (Boolean) :c

    input sequences are in the color space

  • (Boolean) :L

    log-scaled gap penalty for long deletions

  • (Boolean) :N

    non-iterative mode: search for all n-difference hits (slow)

  • (Boolean) :I

    the input is in the Illumina 1.3+ FASTQ-like format

  • (Boolean) :b

    the input read file is in the BAM format

  • (Boolean) :single

    use single-end reads only (effective with -b)

  • (Boolean) :first

    use the 1st read in a pair (effective with -b)

  • (Boolean) :second

    use the 2nd read in a pair (effective with -b)

  • (Integer) :i

    do not put an indel within INT bp towards the ends [5]

  • (Integer) :d

    maximum occurrences for extending a long deletion [10]

  • (Integer) :l

    seed length [32]

  • (Integer) :k

    maximum differences in the seed [2]

  • (Integer) :E

    gap extension penalty [4]



114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/bio/bwa.rb', line 114

def self.short_read_alignment(params={})
    args = ["aln"]
    valid_params = %w(n o e i d l k c L R m t N M O E q f b single first second I B prefix file_in)
    mandatory_params = [:prefix,:file_in,:file_out]
    last_params = [:prefix,:file_in]
    check_mandatory(mandatory_params, params)
    params = change_arg_name(params,:file_out,:f) if params[:file_out]
    params = change_arg_name(params,:single,"0") if params[:single]
    params = change_arg_name(params,:first,"1") if params[:first]   
    params = change_arg_name(params,:second,"2") if params[:second]
    args = build_parameters("aln",valid_params,params,last_params)
    call_BWA_function(args)
end

+ (Object) simple_SW(params = {})

Note:

Boolean values must be set to ‘true’

Run the alignment between multiple short sequences and ONE long sequence

Parameters:

  • (Hash) params (defaults to: {})

    Options

Options Hash (params):

  • (String) :short_seq

    the short query sequence (REQUIRED)

  • (String) :long_seq

    the long database sequence (REQUIRED)

  • (String) :file_out

    the alignment output

  • (Integer) :T

    minimum score [1]

  • (Boolean) :p

    protein alignment (suppressing :r)

  • (Boolean) :f

    forward strand only

  • (Boolean) :r

    reverse strand only

  • (Boolean) :g

    global alignment



224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/bio/bwa.rb', line 224

def self.simple_SW(params = {})
  args = ["stdsw"]
  valid_params = %w(g T f r p file_out long_seq short_seq)
  mandatory_params = [:long_seq,:short_seq]
  last_params = mandatory_params
  check_mandatory(mandatory_params, params)
  file_out = params[:file_out]
  params.delete(:file_out)
  args = build_parameters("stdsw",valid_params,params,last_params)
  $stdout.reopen(file_out,"w") if file_out
  call_BWA_function(args)
  $stdout.reopen("/dev/tty","w") if file_out
end