module Bio::PolyploidTools class Marker include Comparable #include Virgola attr_reader :template_sequence, :original, :snp attr_accessor :best_hit attr_accessor :index_90k attr_accessor :snp_id attr_accessor :snp_name attr_accessor :chr attr_accessor :coordinates_chr attr_accessor :map_order attr_accessor :chr_arm attr_accessor :distance_cm attr_accessor :sequence attr_writer :contig #after_map :parse_sequence_snp def to_fasta ">#{self.snp_name}\n#{self.template_sequence}" end def contig @contig = best_hit.target_id.chomp if best_hit @contig end def to_csv "#{index_90k},#{snp_id},#{snp_name},#{chr},#{coordinates_chr},#{map_order},#{chr_arm},#{distance_cm},#{sequence},#{contig}" end def <=>(anOter) return 0 if anOter.snp_name == @snp_name return @chr_arm <=> anOter.chr_arm if anOter.chr_arm != @chr_arm return @snp_name <=> anOter.snp_name if anOter.coordinates_chr == @coordinates_chr return @coordinates_chr <=> anOter.coordinates_chr end def initialize(line) line.chomp! @template_sequence = nil #INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE @index_90k, @snp_id, @snp_name, @chr, @coordinates_chr, @map_order, @chr_arm, @distance_cm, @sequence, @contig = line.split(',') parse_sequence_snp end def self.parse(filename) f = File.open(filename, "r").read f.each_line do |line| m = Marker.new(line) yield m if m.template_sequence end end protected def parse_sequence_snp pos = 0 @chr.upcase! match_data = /(?
\w*)\[(?[ACGT])\/(?[ACGT])\](?\w*)/.match(sequence)
      if match_data
        @position = Regexp.last_match(:pre).size + 1
        @original = Regexp.last_match(:org)
        @snp = Regexp.last_match(:snp)
        amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
        @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
        return @template_sequence
      end
      return nil
    end
  end


  #The map hast to come sorted. 
  class ArmMap
    attr_reader :markers , :global_reference, :reference
    attr_accessor :chromosome
    def initialize
      @markers = Hash.new
    end

    def align_markers(output)
      Bio::Blat.align(@reference.fasta_path, @fasta_markers, output) do |hit|
        marker = markers[hit.query_id]
        best = marker.best_hit
        unless marker.best_hit 
          markers[hit.query_id].best_hit = hit
        else
          marker.best_hit = hit if hit.score > marker.best_hit.score
        end
      end
    end

    def print_fasta_contigs_for_markers(contigs_file)
      
      contigs = Set.new
      markers.each do |k, marker|

        if marker.best_hit
          contigs << marker.best_hit.target_id
        end
      end
      
      fasta=File.open(contigs_file, "w")
        contigs.each do |contig_id|
             reg = @reference.index.region_for_entry(contig_id)
           fasta.puts ">#{contig_id}\n#{@reference.fetch_sequence(reg.get_full_region)}"
        end
      fasta.close
    end




    def print_fasta_markers(filename)
      @fasta_markers = filename
      fasta=File.open(filename, "w")

      markers.each do |k, marker|
        fasta.puts marker.to_fasta 
      end
      fasta.close
    end

    def global_reference(reference)
      @global_reference = Bio::DB::Fasta::FastaFile.new(reference)
      @global_reference.load_fai_entries
    end

    def reference(reference)
      @reference = Bio::DB::Fasta::FastaFile.new(reference)
      @reference.load_fai_entries
    end

    def print_fasta_contigs_from_reference(filename)
      if File.exist?(filename) 
        reference(filename)
        return
      end

      #puts "loaded"

      fasta=File.open(filename, "w")

      Bio::FlatFile.auto( @global_reference.fasta_path) do |ff|
        ff.each do |f|
          chr_reg = arm_selection_embl(f.entry_id)
          if chr_reg == chromosome
            fasta.puts f.entry
          end
        end
      end
      fasta.close
      reference(filename)
    end


    def print_map_with_contigs(filename)
      file = File.open(filename, "w")
      markers.values.sort { |x,y| x.map_order <=> y.map_order }.each do | marker |
        file.puts marker.to_csv
      end 
      file.close
    end

    protected
    def arm_selection_embl(contig_name)
      ret = contig_name.split('_')[2][0,2]
      return ret
    end
  end
end