module OboParser::Tokens

  class Token 
    # this allows access the the class attribute regexp, without using a class variable
    class << self; attr_reader :regexp; end
    attr_reader :value    
    def initialize(str)
      @value = str
    end
  end

  class Term < Token
    @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
  end

  class Typedef < Token
    @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
  end

  class TagValuePair < Token
    attr_reader :tag, :comment, :xrefs
    @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) 
    def initialize(str)
      str.strip!
      tag, value = str.split(':',2)      

      value.strip!

      # Handle comments 
      if value =~ /(!\s*.+)\Z/i
        @comment = $1
        value.gsub!(@comment, '')
        @comment.gsub!(/\A!\s*/, '')
      end 

      # Break out the xrefs, could be made made robust 
      # Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
      if value =~ /(\s*\[.*\]\s*)/i 
        xref_list = $1
        value.gsub!(xref_list, '')
        xref_list.strip!
        xref_list = xref_list[1..-2] # strip []
        @xrefs = xref_list.split(",") 
      end

      @tag = tag.strip
      @value = value.strip

      @value = @value[1..-2] if @value[0..0] == "\"" # get rid of quote marks
      @value = @value[1..-2] if @value[0..0] == "'"  # get rid of quote marks

      @tag = @tag.strip
      @value = @value.strip
    end
  end

  class XrefList < Token
    @regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      str.strip!
      @value = {}
      str.split(",").each do |s| 
        i = s.split(":")
        @value.merge!(i[0].strip => i[1].strip)
      end
    end
  end

  class NameValuePair < Token
    @regexp = Regexp.new('fail')
  end

  class Dbxref < Token
    @regexp = Regexp.new('fail')
  end

  # same as ID
  class Label < Token
    @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') #  matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
    def initialize(str)
      str.strip!
      str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
      str = str[1..-2] if str[0..0] == '"' 
      str.strip! 
      @value = str
    end
  end

  # note we grab EOL and ; here 
  class ValuePair < Token
    @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
    def initialize(str)
      str.strip!
      str = str.split(/=/)
      str[1].strip!
      str[1] = str[1][1..-2] if str[1][0..0] == "'" 
      str[1] = str[1][1..-2] if str[1][0..0] ==  "\"" 
      @value = {str[0].strip.downcase.to_sym => str[1].strip}
    end
  end

  class EndOfFile < Token
    @regexp = Regexp.new('\A(\s*\n*)\Z')
  end

  ## punctuation

  class LBracket < Token
    @regexp = Regexp.new('\A\s*(\[)\s*')
  end

  #class LParen < Token
  #  @regexp = Regexp.new('\A\s*(\()\s*')
  #end
  
  #class RBracket < Token
  #  @regexp = Regexp.new('\A\s*(\])\s*')
  #end
  
  #class RParen < Token
  #  @regexp = Regexp.new('\A\s*(\))\s*')
  #end
  
  #class Equals < Token
  #  @regexp = Regexp.new('\A\s*(=)\s*')
  #end
  
  #class BckSlash < Token
  #  @regexp = Regexp.new('\A\s*(\/)\s*')
  #end
  
  #class Colon < Token
  #  @regexp = Regexp.new('\A\s*(:)\s*')
  #end
  
  #class SemiColon < Token
  #  @regexp = Regexp.new('\A\s*(;)\s*')
  #end
  
  #class Comma < Token
  #  @regexp = Regexp.new('\A\s*(\,)\s*')
  #end
 
  #class Number < Token
  #  @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
  #  def initialize(str)
  #    # a little oddness here, in some case we don't want to include the .0
  #    # see issues with numbers as labels
  #    if str =~ /\./
  #      @value = str.to_f
  #    else
  #      @value = str.to_i
  #    end
  #  end
  #end

  # This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
  def self.obo_file_token_list
    [ 
      OboParser::Tokens::Term,
      OboParser::Tokens::Typedef,
      OboParser::Tokens::LBracket,
      OboParser::Tokens::TagValuePair,
      OboParser::Tokens::XrefList,
      OboParser::Tokens::EndOfFile
      # OboParser::Tokens::NameValuePair,  # not implemented
      # OboParser::Tokens::Dbxref,         # not implemented
    ]   
  end

end