Sha256: 73cbd1c44fc5cb0f3d5e735875502a91b46649bdfd47aa685296abdac547116d

Contents?: true

Size: 728 Bytes

Versions: 17

Compression:

Stored size: 728 Bytes

Contents

require 'bio-faster'
require 'parallel'

module Bio
  class Gadget < Thor
    namespace :bio

    desc 'dedup', 'deduplicate fastq (via STDIN)'
    def dedup

      p1in, p1out = IO.pipe

      fork {
        p1in.close
        $stdout.reopen(p1out)
        open("| sort -k 1 -r -S #{sprintf('%2d', 100/(Parallel.processor_count+1))}% -T $TMPDIR | cut -f 2- | uniq -f 2", 'w') { |fp|
          Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
            fp.puts "#{seq}#{qvs}\t#{seqid}\t#{qvs}\t#{seq}"
          end
        }
      }

      p1out.close

      p1in.each_line { |line|
        seqid, qvs, seq = line.rstrip.split
        puts "@#{seqid}\n#{seq}\n+\n#{qvs}"
      }

    end

  end
end

Version data entries

17 entries across 17 versions & 1 rubygems

Version Path
bio-gadget-0.4.8 lib/bio-gadget/dedup.rb
bio-gadget-0.4.7 lib/bio-gadget/dedup.rb
bio-gadget-0.4.6 lib/bio-gadget/dedup.rb
bio-gadget-0.4.5 lib/bio-gadget/dedup.rb
bio-gadget-0.4.4 lib/bio-gadget/dedup.rb
bio-gadget-0.4.3 lib/bio-gadget/dedup.rb
bio-gadget-0.4.2 lib/bio-gadget/dedup.rb
bio-gadget-0.4.1 lib/bio-gadget/dedup.rb
bio-gadget-0.4.0 lib/bio-gadget/dedup.rb
bio-gadget-0.3.1 lib/bio-gadget/dedup.rb
bio-gadget-0.3.0 lib/bio-gadget/dedup.rb
bio-gadget-0.2.6 lib/bio-gadget/dedup.rb
bio-gadget-0.2.5 lib/bio-gadget/dedup.rb
bio-gadget-0.2.4 lib/bio-gadget/dedup.rb
bio-gadget-0.2.3 lib/bio-gadget/dedup.rb
bio-gadget-0.2.2 lib/bio-gadget/dedup.rb
bio-gadget-0.2.1 lib/bio-gadget/dedup.rb