#
# = bio/io/das.rb - BioDAS access module
#
# Copyright::	Copyright (C) 2003, 2004
#		Shuichi Kawashima <shuichi@hgc.jp>,
#		Toshiaki Katayama <k@bioruby.org>
# License::	LGPL
#
# $Id: das.rb,v 1.11 2005/11/14 02:01:54 k Exp $
#
#--
# == TODO
#
#  link, stylesheet
#
#++
#
#--
#
#  This library is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2 of the License, or (at your option) any later version.
#
#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this library; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
#
#++
#

begin
  require 'rexml/document'
rescue LoadError
end
require 'uri'
require 'net/http'
require 'bio/sequence'


module Bio

class DAS

  # Specify DAS server to connect
  def initialize(url = 'http://www.wormbase.org:80/db/')
    schema, user, host, port, reg, path, = URI.split(url)
    @server = Net::HTTP.new(host, port)
    @prefix = path ? path.chomp('/') : ''
  end

  # Returns an Array of Bio::DAS::DSN
  def get_dsn
    ary = []
    result, = @server.get(@prefix + '/das/dsn')
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::DSN') do |e|
      dsn = DSN.new
      e.elements.each do |e|
        case e.name
        when 'SOURCE'
          dsn.source = e.text
          dsn.source_id = e.attributes['id']
          dsn.source_version = e.attributes['version']
        when 'MAPMASTER'
          dsn.mapmaster = e.name
        when 'DESCRIPTION'
          dsn.description = e.text
          dsn.description_href = e.attributes['href']
        end
      end
      ary << dsn
    end
    ary
  end

  # Returns Bio::DAS::ENTRY_POINT.
  # The 'dsn' can be a String or a Bio::DAS::DSN object.
  def get_entry_point(dsn)
    entry_point = ENTRY_POINT.new
    dsn = dsn.source if dsn.instance_of?(Bio::DAS::DSN)
    result, = @server.get(@prefix + '/das/' + dsn + '/entry_points')
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::ENTRY_POINTS') do |e|
      entry_point.href = e.attributes['href']
      entry_point.version = e.attributes['version']
      e.elements.each do |e|
        segment = SEGMENT.new
        segment.entry_id = e.attributes['id']
        segment.start = e.attributes['start']
        segment.stop = e.attributes['stop']
        segment.stop = e.attributes['orientation']
        segment.subparts = e.attributes['subparts']
        segment.description = e.text
        entry_point.segments << segment
      end
    end
    entry_point
  end

  # Returns an Array of Bio::DAS::DNA.
  # The 'dsn' can be a String or a Bio::DAS::DSN object.
  # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
  # Bio::DAS::SEGMENT
  def get_dna(dsn, segments)
    ary = []

    dsn = dsn.source if dsn.instance_of?(DSN)
    segments = [segments] if segments.instance_of?(SEGMENT)

    opts = []
    segments.each do |s|
      opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
    end
    query = opts.join(';')

    result, = @server.get(@prefix + '/das/' + dsn + '/dna?' + query)
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::SEQUENCE') do |e|
      sequence = DNA.new
      sequence.entry_id = e.attributes['id']
      sequence.start = e.attributes['start']
      sequence.stop = e.attributes['stop']
      sequence.version = e.attributes['version']
      e.elements.each do |e|
        sequence.sequence = Bio::Sequence::NA.new(e.text)
        sequence.length = e.attributes['length'].to_i
      end
      ary << sequence
    end
    ary
  end

  # Returns an Array of Bio::DAS::SEQUENCE.
  # The 'dsn' can be a String or a Bio::DAS::DSN object.
  # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
  # Bio::DAS::SEGMENT
  def get_sequence(dsn, segments)
    ary = []

    dsn = dsn.source if dsn.instance_of?(DSN)
    segments = [segments] if segments.instance_of?(SEGMENT)

    opts = []
    segments.each do |s|
      opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
    end
    query = opts.join(';')

    result, = @server.get(@prefix + '/das/' + dsn + '/sequence?' + query)
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::SEQUENCE') do |e|
      sequence = SEQUENCE.new
      sequence.entry_id = e.attributes['id']
      sequence.start = e.attributes['start']
      sequence.stop = e.attributes['stop']
      sequence.moltype = e.attributes['moltype']
      sequence.version = e.attributes['version']
      case sequence.moltype
      when /dna|rna/i		# 'DNA', 'ssRNA', 'dsRNA'
        sequence.sequence = Bio::Sequence::NA.new(e.text)
      when /protein/i		# 'Protein
        sequence.sequence = Bio::Sequence::AA.new(e.text)
      else
        sequence.sequence = e.text
      end
      ary << sequence
    end
    ary
  end

  # Returns a Bio::DAS::TYPES object.
  # The 'dsn' can be a String or a Bio::DAS::DSN object.
  # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
  # an Array of Bio::DAS::SEGMENT
  def get_types(dsn, segments = [])	# argument 'type' is deprecated
    types = TYPES.new

    dsn = dsn.source if dsn.instance_of?(DSN)
    segments = [segments] if segments.instance_of?(SEGMENT)

    opts = []
    segments.each do |s|
      opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
    end
    query = opts.join(';')

    result, = @server.get(@prefix + '/das/' + dsn + '/types?' + query)
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::GFF') do |e|
      types.version = e.attributes['version']
      types.href = e.attributes['href']
      e.elements.each do |e|
        segment = SEGMENT.new
        segment.entry_id = e.attributes['id']
        segment.start = e.attributes['start']
        segment.stop = e.attributes['stop']
        segment.version = e.attributes['version']
        segment.label = e.attributes['label']
        e.elements.each do |e|
          t = TYPE.new
          t.entry_id = e.attributes['id']
          t.method = e.attributes['method']
          t.category = e.attributes['category']
          t.count = e.text.to_i
          segment.types << t
        end
        types.segments << segment
      end
    end
    types
  end

  # Returns a Bio::DAS::GFF object.
  # The 'dsn' can be a String or a Bio::DAS::DSN object.
  # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
  # an Array of Bio::DAS::SEGMENT
  def get_features(dsn, segments = [], categorize = false, feature_ids = [], group_ids = [])
    # arguments 'type' and 'category' are deprecated
    gff = GFF.new

    dsn = dsn.source if dsn.instance_of?(DSN)
    segments = [segments] if segments.instance_of?(SEGMENT)

    opts = []
    segments.each do |s|
      opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
    end
    if categorize
      opts << "categorize=yes"	# default is 'no'
    end
    feature_ids.each do |fid|
      opts << "feature_id=#{fid}"
    end
    group_ids.each do |gid|
      opts << "group_id=#{gid}"
    end
    query = opts.join(';')

    result, = @server.get(@prefix + '/das/' + dsn + '/features?' + query)
    doc = REXML::Document.new(result.body)
    doc.elements.each('/descendant::GFF') do |e|
      gff.version = e.attributes['version']
      gff.href = e.attributes['href']
      e.elements.each('SEGMENT') do |e|
        segment = SEGMENT.new
        segment.entry_id = e.attributes['id']
        segment.start = e.attributes['start']
        segment.stop = e.attributes['stop']
        segment.version = e.attributes['version']
        segment.label = e.attributes['label']
        e.elements.each do |e|
          feature = FEATURE.new
          feature.entry_id = e.attributes['id']
          feature.label = e.attributes['label']
          e.elements.each do |e|
            case e.name
            when 'TYPE'
              type = TYPE.new
              type.entry_id = e.attributes['id']
              type.category = e.attributes['category']
              type.reference = e.attributes['referrence']
              type.label = e.text
              feature.types << type
            when 'METHOD'
              feature.method_id = e.attributes['id']
              feature.method = e.text
            when 'START'
              feature.start = e.text
            when 'STOP', 'END'
              feature.stop = e.text
            when 'SCORE'
              feature.score = e.text
            when 'ORIENTATION'
              feature.orientation = e.text
            when 'PHASE'
              feature.phase = e.text
            when 'NOTE'
              feature.notes << e.text
            when 'LINK'
              link = LINK.new
              link.href = e.attributes['href']
              link.text = e.text
              feature.links << link
            when 'TARGET'
              target = TARGET.new
              target.entry_id = e.attributes['id']
              target.start = e.attributes['start']
              target.stop = e.attributes['stop']
              target.name = e.text
              feature.targets << target
            when 'GROUP'
              group = GROUP.new
              group.entry_id = e.attributes['id']
              group.label = e.attributes['label']
              group.type = e.attributes['type']
              e.elements.each do |e|
                case e.name
                when 'NOTE'		# in GROUP
                  group.notes << e.text
                when 'LINK'		# in GROUP
                  link = LINK.new
                  link.href = e.attributes['href']
                  link.text = e.text
                  group.links << link
                when 'TARGET'		# in GROUP
                  target = TARGET.new
                  target.entry_id = e.attributes['id']
                  target.start = e.attributes['start']
                  target.stop = e.attributes['stop']
                  target.name = e.text
                  group.targets << target
                end
              end
              feature.groups << group
            end
          end
          segment.features << feature
        end
        gff.segments << segment
      end
    end
    gff
  end


  class DSN
    attr_accessor :source, :source_id, :source_version,
      :mapmaster, :description, :description_href
  end

  class ENTRY_POINT
    def initialize
      @segments = Array.new
    end
    attr_reader :segments
    attr_accessor :href, :version

    def each
      @segments.each do |x|
        yield x
      end
    end
  end

  class SEGMENT
    def self.region(entry_id, start, stop)
      segment = self.new
      segment.entry_id = entry_id
      segment.start = start
      segment.stop = stop
      return segment
    end

    def initialize
      @features = Array.new		# for FEATURE
      @types = Array.new		# for TYPE
    end
    attr_accessor :entry_id, :start, :stop, :orientation, :description,
      :subparts,			# optional
      :features, :version, :label,	# for FEATURE
      :types				# for TYPE
  end

  class DNA
    attr_accessor :entry_id, :start, :stop, :version, :sequence, :length
  end

  class SEQUENCE
    attr_accessor :entry_id, :start, :stop, :moltype, :version, :sequence
  end

  class TYPES < ENTRY_POINT; end

  class TYPE
    attr_accessor :entry_id, :method, :category, :count,
      :reference, :label	# for FEATURE
  end

  class GFF
    def initialize
      @segments = Array.new
    end
    attr_reader :segments
    attr_accessor :version, :href
  end

  class FEATURE
    def initialize
      @notes = Array.new
      @links = Array.new
      @types = Array.new
      @targets = Array.new
      @groups = Array.new
    end
    attr_accessor :entry_id, :label,
      :method_id, :method, :start, :stop, :score, :orientation, :phase
    attr_reader :notes, :links, :types, :targets, :groups
  end

  class LINK
    attr_accessor :href, :text
  end

  class TARGET
    attr_accessor :entry_id, :start, :stop, :name
  end

  class GROUP
    def initialize
      @notes = Array.new
      @links = Array.new
      @targets = Array.new
    end
    attr_accessor :entry_id, :label, :type
    attr_reader :notes, :links, :targets
  end

end

end # module Bio


if __FILE__ == $0

# begin
#   require 'pp'
#   alias p pp
# rescue LoadError
# end

  puts "### WormBase"
  wormbase = Bio::DAS.new('http://www.wormbase.org/db/')

  puts ">>> test get_dsn"
  p wormbase.get_dsn

  puts ">>> create segment obj Bio::DAS::SEGMENT.region('I', 1, 1000)"
  seg = Bio::DAS::SEGMENT.region('I', 1, 1000)
  p seg

  puts ">>> test get_dna"
  p wormbase.get_dna('elegans', seg)

  puts "### test get_features"
  p wormbase.get_features('elegans', seg)

  puts "### KEGG DAS"
  kegg_das = Bio::DAS.new("http://das.hgc.jp/cgi-bin/")

  dsn_list = kegg_das.get_dsn
  org_list = dsn_list.collect {|x| x.source}

  puts ">>> dsn : entry_points"
  org_list.each do |org|
    print "#{org} : "
    list = kegg_das.get_entry_point(org)
    list.segments.each do |seg|
      print " #{seg.entry_id}"
    end
    puts
  end

end