module OboParser::Tokens

  class Token 
    # this allows access the to class attribute regexp, without using a class variable
    class << self; attr_reader :regexp; end
    attr_reader :value    
    def initialize(str)
      @value = str
    end
  end

  class Term < Token
    @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
  end

  class Typedef < Token
    @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
  end

  # Token needs simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
  # tokens for individual tags that don't conform to the pattern used for def: tags.
  # The code can't presently handle escaped characters (like \,), as bizzarely found in some OBO files.
  class TagValuePair < Token
    attr_reader :tag, :comment, :xrefs, :qualifier, :description
    @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) 
    def initialize(str)
      str.strip!
      tag, value = str.split(':',2)      
      value.strip!

      if tag == 'comment'
        @tag = tag.strip
        @value = value.strip
        return
      end

      @xrefs = []

      # Handle inline comments  
      if value =~ /(\s+!\s*.+)\s*\n*\z/i
        @comment = $1
        value.gsub!(@comment, '')
        @comment.strip!
        @comment.gsub!(/\A!\s*/, '')
      end 

      value.strip!

      # Qualifier for the whole tag 
      if value =~ /(\{[^{]*?\})\s*\n*\z/
        @qualifier = $1
        value.gsub!(@qualifier, '')
        @qualifier.strip!
      end 

      value.strip!

      # Handle a xref list TODO: Tokenize
      if value =~ /(\[.*\])/i 
        xref_list = $1
        value.gsub!(xref_list, '')

        xref_list.strip!
        xref_list = xref_list[1..-2] # [] off

        qq = 0 # some failsafes
        while xref_list.length > 0
          qq += 1
          debugger if qq == 499
          raise "#{xref_list}" if qq > 500
          xref_list.gsub!(/\A\s*,\s*/, '')

          xref_list =~ /\A(.+?:[^\"|\{|\,]+)/i 
          v = $1

          if !(v == "") && !v.nil? 
            v.strip!
            r = Regexp.escape v 
            xref_list.gsub!(/\A#{r}\s*/, '')
            @xrefs.push(v) if !v.nil?
          end
         
          xref_list.strip!

          # A description
          if xref_list =~ /\A(\s*".*?")/i
            d = $1
            r = Regexp.escape d 
            xref_list.gsub!(/\A#{r}/, '') 
            xref_list.strip!
          end

          # A optional modifier
          if xref_list =~ /\A(\s*\{[^\}]*?\})/ 
            m = $1
            r = Regexp.escape m
            xref_list.gsub!(/\A#{r}/, '') 
            xref_list.strip!
          end

          xref_list.strip!
        end
      end

      value.strip!

      # At this point we still might have a '"foo" QUALIFIER' combination
      if value =~ /\A(\"[^\"]*\")\s+(.*)/
        @value = $1.strip
        @qualifier = $2.strip if !$2.nil?
      else
        @value = value.strip
      end
      
      @value = @value[1..-2].strip if @value[0..0] == "\"" 
      @tag = tag.strip
      @value.strip!
    end
  end

  class XrefList < Token
    @regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      str.strip!
      @value = {}
      str.split(",").each do |s| 
        i = s.split(":")
        @value.merge!(i[0].strip => i[1].strip)
      end
    end
  end

  class RelationshipTag < Token
    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
    @regexp = Regexp.new(/\A\s*relationship:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      @tag = 'relationship'
      @xrefs = [] 
      @relation, @related_term = str.split(/\s/,3)
      
      str =~ /\s+!\s+(.*)\s*\n*/i
      @comment = $1

      @comment ||= ""
      [@relation, @related_term, @comment].map(&:strip!)
    end
  end

  class IsATag < Token
    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
    @regexp = Regexp.new(/\A\s*is_a:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      @tag = 'relationship'
      @relation = 'is_a'
      @related_term, @comment = str.split(/\s/,2)
      @comment ||= ""
      @comment.gsub!(/\A!\s*/, '')
      [@relation, @related_term, @comment].map(&:strip!)
      @xrefs = [] 
    end
  end

  class DisjointFromTag < Token
    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
    @regexp = Regexp.new(/\A\s*disjoint_from:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      @tag = 'relationship'
      @relation = 'disjoint_from'
      @related_term, @comment = str.split(/\s/,2)
      @comment ||= ""
      @comment.gsub!(/\A!\s*/, '')
      [@relation, @related_term, @comment].map(&:strip!)
      @xrefs = [] 
    end
  end


  class NameValuePair < Token
    @regexp = Regexp.new('fail')
  end

  class Dbxref < Token
    @regexp = Regexp.new('fail')
  end

  # same as ID
  class Label < Token
    @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') #  matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
    def initialize(str)
      str.strip!
      str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
      str = str[1..-2] if str[0..0] == '"' 
      str.strip! 
      @value = str
    end
  end

  # note we grab EOL and ; here 
  class ValuePair < Token
    @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      str.strip!
      str = str.split(/=/)
      str[1].strip!
      str[1] = str[1][1..-2] if str[1][0..0] == "'" 
      str[1] = str[1][1..-2] if str[1][0..0] ==  "\"" 
      @value = {str[0].strip.downcase.to_sym => str[1].strip}
    end
  end

  class EndOfFile < Token
    @regexp = Regexp.new('\A(\s*\n*)\Z')
  end

  ## punctuation

  class LBracket < Token
    @regexp = Regexp.new('\A\s*(\[)\s*')
  end

  #class LParen < Token
  #  @regexp = Regexp.new('\A\s*(\()\s*')
  #end
  
  #class RBracket < Token
  #  @regexp = Regexp.new('\A\s*(\])\s*')
  #end
  
  #class RParen < Token
  #  @regexp = Regexp.new('\A\s*(\))\s*')
  #end
  
  #class Equals < Token
  #  @regexp = Regexp.new('\A\s*(=)\s*')
  #end
  
  #class BckSlash < Token
  #  @regexp = Regexp.new('\A\s*(\/)\s*')
  #end
  
  #class Colon < Token
  #  @regexp = Regexp.new('\A\s*(:)\s*')
  #end
  
  #class SemiColon < Token
  #  @regexp = Regexp.new('\A\s*(;)\s*')
  #end
  
  #class Comma < Token
  #  @regexp = Regexp.new('\A\s*(\,)\s*')
  #end
 
  #class Number < Token
  #  @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
  #  def initialize(str)
  #    # a little oddness here, in some case we don't want to include the .0
  #    # see issues with numbers as labels
  #    if str =~ /\./
  #      @value = str.to_f
  #    else
  #      @value = str.to_i
  #    end
  #  end
  #end

  # This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
  def self.obo_file_token_list
    [ 
      OboParser::Tokens::Term,
      OboParser::Tokens::Typedef,
      OboParser::Tokens::LBracket,
      OboParser::Tokens::DisjointFromTag,
      OboParser::Tokens::IsATag,
      OboParser::Tokens::RelationshipTag,
      OboParser::Tokens::TagValuePair,
      OboParser::Tokens::XrefList,
      OboParser::Tokens::EndOfFile
      # OboParser::Tokens::NameValuePair,  # not implemented
      # OboParser::Tokens::Dbxref,         # not implemented
    ]   
  end

end