Sha256: b4d1febc7c3e626a869f300171c5baf3ea6ff3455464bd0224cd6e1aa3ee574e

Contents?: true

Size: 1.95 KB

Versions: 1

Compression:

Stored size: 1.95 KB

Contents

require 'bio-faster'
require 'levenshtein'
require 'thread'

module Bio
  class Gadget < Thor
    namespace :bio

    desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
    def demlt(bcfile, tmpofs)

      ofs = tmpofs.to_i

      wells = Array.new
      bcs = Array.new
      bclens = Array.new
      open(bcfile).each do |line|
        cols = line.rstrip.split
        wells.push(cols[0])
        bcs.push(cols[1])
        bclens.push(cols[1].length)
      end

      bclens.uniq!
      if bclens.size != 1
        raise 'Inconsistent barcode sequences'
      end
      bclen = bclens[0]

      ts = Array.new
      qs = Array.new
      (wells + ['other']).each { |well|
        q = Queue.new
        t = Thread.new(well, q) do |well|
          fp = open("| gzip -c > #{well}.fq.gz", 'w')
          while vals = q.shift
            if vals == ""
              break
            else
              fp.puts(vals)
            end
          end
          fp.close()
        end
        qs.push(q)
        ts.push(t)
      }

      reads = Array.new(bcs.size+1, 0)
      tmpdist = Hash.new
      Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
        seqbc = seq[ofs, bclen]
        bcs.each_index do |i|
          tmpdist[i] = Levenshtein.distance(bcs[i], seqbc)
        end
        dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
        if dists[0][1] < dists[1][1] && dists[0][1] < 2
          idx = dists[0][0]
          qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
          reads[idx] = reads[idx]+1
        else
          qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
          reads[-1] = reads[-1]+1
        end
      end

      qs.each { |q| q.push('') }
      ts.each { |t| t.join }

      total = 0
      bcs.each_index { |i|
        r = reads[i]
        puts "#{bcs[i]}\t#{r}\t#{wells[i]}.fq.gz"
        total = total+r
      }
      puts "Other\t#{reads[-1]}\tother.fq.gz"
      puts '===='
      puts "Total\t#{total+reads[-1]}"

    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
bio-gadget-0.1.3 lib/bio-gadget/demlt.rb