Sha256: 2b3fbdcccbaa79fb1cc2c57b84a2ba685a2a8651330aa03b15e3ef4f8c98a8c8

Contents?: true

Size: 1.63 KB

Versions: 15

Compression:

Stored size: 1.63 KB

Contents

class Bio::NucleicAcid

  IUPAC_CODES = {

    'y'	=> 'ct',
    'r'	=> 'ag',
    'w'	=> 'at',
    's'	=> 'cg',
    'k'	=> 'gt',
    'm'	=> 'ac',

    'b'	=> 'cgt',
    'd'	=> 'agt',
    'h'	=> 'act',
    'v'	=> 'acg',

    'n'	=> 'acgt',

    'a'	=> 'a',
    't'	=> 't',
    'g'	=> 'g',
    'c'	=> 'c',
    'u'	=> 'u',

    'ct' => 'y',
    'ag' => 'r',
    'at' => 'w',
    'cg' => 's',
    'gt' => 'k',
    'ac' => 'm',

    'cgt' => 'b',
    'agt' => 'd',
    'act' => 'h',
    'acg' => 'v',

    'acgt' => 'n'
  }

  
  def self.is_unambiguous(base)
    "acgtACGT".match(base)
  end

  def self.to_IUAPC(bases)    
    base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
    if base == nil
      p "Invalid base! #{base}"
      base = 'n' #This is a patch... as one of the scripts failed here. 
    end
    base.upcase
  end

  def self.is_valid(code, base)
    IUPAC_CODES[code.downcase].chars.include? base.downcase
  end

end

#Monkey patching to Bio::Sequence to find snps between sequences. It assumes the
#sequences are already aligned and doesn't check if a base on the first sequence is
#valid on the second. 
class Bio::Sequence
  def self.snps_between(seq1, seq2)
    snps=0
    for i in (0..seq1.size-1)
      snps += 1 if seq1[i] != seq2[i] 
    end
    snps
  end
end

class  String
  #Monkey patching to count how many ambiguity codes are present in the string, for Nucleic Acids
  def count_ambiguities
    snps=0

    for i in (0..self.size-1)

      snps += 1 if !Bio::NucleicAcid.is_unambiguous(self[i])
    end
    snps
  end
  
  #Counts how many bases are uppercase
  def upper_case_count
    match(/[^A-Z]*/).to_s.size
  end
end

Version data entries

15 entries across 15 versions & 2 rubygems

Version Path
bio-samtools-wrapper-2.7.0 lib/bio/BIOExtensions.rb
bio-samtools-2.6.2 lib/bio/BIOExtensions.rb
bio-samtools-2.6.1 lib/bio/BIOExtensions.rb
bio-samtools-2.6.0 lib/bio/BIOExtensions.rb
bio-samtools-2.5.1 lib/bio/BIOExtensions.rb
bio-samtools-2.4.0 lib/bio/BIOExtensions.rb
bio-samtools-2.3.3 lib/bio/BIOExtensions.rb
bio-samtools-2.3.2 lib/bio/BIOExtensions.rb
bio-samtools-2.3.1 lib/bio/BIOExtensions.rb
bio-samtools-2.3.0 lib/bio/BIOExtensions.rb
bio-samtools-2.2.0 lib/bio/BIOExtensions.rb
bio-samtools-2.1.0 lib/bio/BIOExtensions.rb
bio-samtools-2.0.5 lib/bio/BIOExtensions.rb
bio-samtools-2.0.4 lib/bio/BIOExtensions.rb
bio-samtools-2.0.3 lib/bio/BIOExtensions.rb