# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library

#== Outstanding issues:
# * Better documentation
# * More tests

module OboParser

  require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
  require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
  require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
  require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))

  class OboParser 
    attr_accessor :terms, :typedefs

    def initialize 
      @terms = []
      @typedefs = [] 
      true
    end

    def term_strings # :yields: Array of Strings
      @terms.collect{|t| t.name.value}.sort
    end

    # Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
    def term_hash # :yields: Hash (String => String) (name => id)
      @terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
    end

    def id_hash # :yields: Hash (String => String (id => name))
      @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
    end

    # A single line in a Stanza within an OBO file
    class Tag
      attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
    end
    
    # A collection of single lines (Tags)
    class Stanza
      # Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
      attr_accessor :name, :id, :def, :other_tags

      def initialize(tags)
        @other_tags = []

        while tags.length != 0
          t = tags.shift

          new_tag = OboParser::Tag.new
    
          new_tag.tag = t.tag
          new_tag.value = t.value
          new_tag.comment = t.comment
          new_tag.xrefs = t.xrefs 

          case new_tag.tag
          when 'id' 
            @id = new_tag
          when 'name'
            @name = new_tag
          when 'def'
            @def = new_tag
          else
            if new_tag.tag == 'relationship'
              new_tag.related_term = t.related_term
              new_tag.relation = t.relation
            end

            @other_tags.push(new_tag)
          end
        end
      end

      #=== Convienience methods

      def tags_named(tag_name = nil)
        return nil if tag_name.nil?
        result = []
        @other_tags.each do |t|
          result.push(t) if (t.tag == tag_name)
        end
        result
      end

    end

    # TODO: likely deprecate and run with one model (Stanza)
    class Term < Stanza
     attr_accessor :relationships
      def initialize(tags)
       super
       @relationships = [] 
       anonymous_tags = [] 
       # Loop through "unclaimed" tags and reference those specific to Term
       while @other_tags.size != 0
         t = @other_tags.shift
         case t.tag
        
         when 'relationship'
           @relationships.push([t.relation, t.related_term])
         else
           anonymous_tags.push(t)
         end
       end
       @other_tags = anonymous_tags
      end
    end

    class Typedef < Stanza
      def initialize(tags)
        super
        #anonymous_tags = [] 
        ## Loop through "unclaimed" tags and reference those specific to Typedef
        #while @other_tags.size != 0
        #  t = @other_tags.shift
        #  case t.tag
        #  when 'def' 
        #    @def = t
        #  else
        #    anonymous_tags.push(t)
        #  end
        #  @other_tags = anonymous_tags
        #end
      end
    end

  end

  class OboParserBuilder
    def initialize
      @of =  OboParser.new 
    end

    def add_term(tags)
      @of.terms.push OboParser::Term.new(tags)
    end

    def add_typedef(tags)
      @of.typedefs.push OboParser::Typedef.new(tags)
    end

    def obo_file
      @of
    end

  end

  class ParseError < StandardError
  end

end # end module

#= Implementation

def parse_obo_file(input)
  @input = input
  raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0

  # Comments are handled now.
  # @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n")  # strip out comments - this is a kludge, likely needs fixing!!

  builder = OboParser::OboParserBuilder.new
  lexer = OboParser::Lexer.new(@input)
  OboParser::Parser.new(lexer, builder).parse_file
  return builder.obo_file  
end