# Simple Declarative Language (SDL) for Ruby
# Copyright 2005 Ikayzo, inc.
#
# This program is free software. You can distribute or modify it under the
# terms of the GNU Lesser General Public License version 2.1 as published by
# the Free Software Foundation.
#
# This program is distributed AS IS and WITHOUT WARRANTY. OF ANY KIND,
# INCLUDING MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, contact the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.


module SDL4R

  require File.dirname(__FILE__) + '/reader'
  require File.dirname(__FILE__) + '/token'

  class Parser

    # Tokenizer of the SDL parser
    class Tokenizer

      TOKEN_TYPES = [
        :IDENTIFIER,

        # punctuation
        :COLON, :SEMICOLON, :EQUALS, :START_BLOCK, :END_BLOCK,

        # literals
        :STRING, :CHARACTER, :BOOLEAN, :NUMBER, :DATE, :TIME, :BINARY, :NULL ]


      # Creates an SDL tokenizer on the specified +IO+.
      def initialize(io)
        raise ArgumentError, "io == nil" if io.nil?

        @reader = Parser::Reader.new(io)
        @token_start = 0
        @startEscapedQuoteLine = false
        @tokens = nil
        @tokenText = nil
      end

      # Closes this Tokenizer and its underlying +Reader+.
      def close
        @reader.close
      end

      # Close the reader and throw a SdlParseError.
      def parse_error(description, line_no = nil, position = nil)
        begin
          @reader.close()
        rescue IOError
          # no recourse
        end

        line_no = @reader.line_no if line_no.nil?
        position = @reader.pos if position.nil?

        # We add one because editors typically start with line 1 and position 1
        # rather than 0...
        raise SdlParseError.new(description, line_no + 1, position + 1, @reader.line)
      end

      # Close the reader and throw a SdlParseError using the format
      # Was expecting X but got Y.
      #
      def expecting_but_got(expecting, got, line, position)
        parse_error("Was expecting #{expecting} but got #{got}", line, position)
      end

      # Returns the next line as tokens or nil if the end of the stream has been reached.
      # This method handles line continuations both within and outside String literals.
      # The line of tokens is assigned to @tokens.
      #
      # Returns a logical line as a list of Tokens.
      #
      def read_line_tokens
        begin
          read_line_tokens_even_if_empty()
        end until @tokens.nil? or !@tokens.empty?
        return @tokens
      end

      def line_no
        @reader.line_no
      end

      def pos
        @reader.pos
      end

      def line
        @reader.line
      end

      private

      # Returns the next line as tokens or nil if the end of the stream has been reached.
      # This method handles line continuations both within and outside String literals.
      # The line of tokens is assigned to @tokens.
      #
      # Returns a logical line as a list of Tokens.
      # Returns an empty array if the line was empty.
      #
      def read_line_tokens_even_if_empty
        @tokens = nil
        @tokenText = nil
        @token_start = nil

        @reader.read_line() if @reader.end_of_line?
        return @tokens unless @reader.line

        @tokens = []
        @token_start = @reader.pos

        while not @reader.end_of_line?
          if @tokenText
            @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
            @tokenText = nil
          end

          c = @reader.current_char
          next_c = @reader.get_line_char(@reader.pos + 1)
          case c
          when "\""
            # handle "" style strings including line continuations
            handle_double_quote_string()

          when "'"
            handle_character_literal()

          when "{", "}", "=", ":", ";"
            # handle punctuation
            punctuation_token = Token.new(c, @reader.line_no, @reader.pos)
            @tokenText = nil

            if punctuation_token.type == :SEMICOLON
              @reader.skip_char()
              break
            else
              @tokens << punctuation_token
            end

          when "#"
            # skip : hash comment at end of line
            @reader.skip_line()

          when "/"
            # handle // and /**/ style comments
            if next_c == "/"
              # skip : // comment
              @reader.skip_line()
            else
              handle_slash_comment()
            end

          when "`"
            # handle multiline `` style strings
            handle_back_quote_string()

          when "["
            # handle binary literals
            handle_binary_literal()

          when "\s", "\t"
            @reader.skip_whitespaces()

          when "\\"
            # line continuations (outside a string literal)
            handle_line_continuation();

          when /^[0-9\-\.]$/
            if c == "-" and next_c == "-"
              # -- comments : ignore
              @reader.skip_line()
            else
              # handle numbers, dates, and time spans
              handle_number_date_or_time_span()
            end

          when /^[a-zA-Z\$_]$/
            # FIXME Here, the Java code specifies isJavaIdentifierStart() but
            # this is not easily implemented (at least as of Ruby 1.8).
            # So, we implement a subset of these characters.
            handle_identifier()

          when "\n", "\r"
            # end of line
            @reader.skip_line()

          else
            parse_error("Unexpected character '#{c}'")
          end

          @reader.skip_char()
        end

        if @tokenText
          @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
        end

        return @tokens
      end

      # Adds the current escaped character (represented by ((|c|))) to @tokenText.
      # This method assumes the previous char was a backslash.
      #
      def add_escaped_char_in_string(c)
        case c
        when "\\", "\""
          @tokenText << c
        when "n"
          @tokenText << ?\n
        when "r"
          @tokenText << ?\r
        when "t"
          @tokenText << ?\t
        else
          parse_error("Illegal escape character in string literal: '#{c.chr}'.")
        end
      end

      def handle_double_quote_string
        escaped = false
        @startEscapedQuoteLine = false

        @tokenText = "\""
        @reader.skip_char()

        while not @reader.end_of_line?
          c = @reader.current_char

          if "\s\t".include?(c) and @startEscapedQuoteLine
            # we continue
          else
            @startEscapedQuoteLine = false;

            if escaped
              add_escaped_char_in_string(c)
              escaped = false

            elsif c == "\\"
              # check for String broken across lines
              if @reader.rest_of_line =~ /^\\\s*$/
                handle_escaped_double_quoted_string()
                next # as we are at the beginning of a new line
              else
                escaped = true;
              end

            else
              @tokenText << c
              if c == "\""
                # end of double-quoted string detected
                @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
                @tokenText = nil
                return
              end
            end
          end

          @reader.skip_char()
        end

        # detection of ill-terminated literals
        if @tokenText =~ /^".*[^"]$/
          parse_error(
            "String literal \"#{@tokenText}\" not terminated by end quote.", @reader.line_no, @reader.line_length);
        elsif @tokenText == "\""
          parse_error("Orphan quote (unterminated string)", @reader.line_no, @reader.line_length);
        end
      end

      def handle_escaped_double_quoted_string
        # '\' can be followed by whitespaces
        if @reader.rest_of_line =~ /^\\\s*$/
          @reader.read_line()
          parse_error("Escape at end of file.") if @reader.end_of_file?

          @startEscapedQuoteLine = true

        else
          parse_error(
            "Malformed string literal - escape followed by whitespace " +
              "followed by non-whitespace.")
        end
      end

      def handle_character_literal
        if not @reader.more_chars_in_line?
          parse_error("Got ' at end of line")
        end

        c2 = @reader.read_char()

        if c2 == "\\"
          if @reader.end_of_line?
            parse_error("Got '\\ at end of line")
          end

          c3 = @reader.read_char()

          if not @reader.more_chars_in_line?
            parse_error("Got '\\#{c3} at end of line")
          end

          case c3
          when "\\"
            @tokens << Token.new("'\\'", @reader.line_no, @reader.pos)
          when "'"
            @tokens << Token.new("'''", @reader.line_no, @reader.pos)
          when "n"
            @tokens << Token.new("'\n'", @reader.line_no, @reader.pos)
          when "r"
            @tokens << Token.new("'\r'", @reader.line_no, @reader.pos)
          when "t"
            @tokens << Token.new("'\t'", @reader.line_no, @reader.pos)
          else
            parse_error("Illegal escape character #{@reader.current_char}")
          end

          @reader.skip_char()
          if @reader.current_char != "'"
            expecting_but_got("single quote (')", "\"#{@reader.current_char}\"")
          end
        else
          @tokens << Token.new("'#{c2}'", @reader.line_no, @reader.pos)
          if not @reader.more_chars_in_line?
            parse_error("Got '#{c2} at end of line")
          end
          @reader.skip_char()
          if @reader.current_char != "'"
            expecting_but_got(
              "quote (')", "\"#{@reader.current_char}\"", @reader.line_no, @reader.pos)
          end
        end
      end

      def handle_slash_comment
        if not @reader.more_chars_in_line?
          parse_error("Got slash (/) at end of line.")
        end

        if @reader.get_line_char(@reader.pos + 1) == "*"
          end_index = @reader.find_next_in_line("*/")
          if end_index
            # handle comment on same line
            @reader.skip_to(end_index + 1)
          else
            # handle multiline comments
            loop do
              @reader.read_raw_line()
              if @reader.end_of_file?
                parse_error("/* comment not terminated.", @reader.line_no, -2)
              end

              end_index = @reader.find_next_in_line("*/", 0)

              if end_index
                @reader.skip_to(end_index + 1)
                break
              end
            end
          end
        elsif @reader.get_line_char(@reader.pos + 1) == "/"
          parse_error("Got slash (/) in unexpected location.")
        end
      end

      def handle_back_quote_string
        end_index = @reader.find_next_in_line("`")

        if end_index
          # handle end quote on same line
          @tokens << Token.new(@reader.substring(@reader.pos, end_index), @reader.line_no, @reader.pos)
          @tokenText = nil
          @reader.skip_to(end_index)

        else
          @tokenText = @reader.rest_of_line
          @token_start = @reader.pos
          # handle multiline quotes
          loop do
            @reader.read_raw_line()
            if @reader.end_of_file?
              parse_error("` quote not terminated.", @reader.line_no, -2)
            end

            end_index = @reader.find_next_in_line("`", 0)
            if end_index
              @tokenText << @reader.substring(0, end_index)
              @reader.skip_to(end_index)
              break
            else
              @tokenText << @reader.line
            end
          end

          @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
          @tokenText = nil
        end
      end

      def handle_binary_literal
        end_index = @reader.find_next_in_line("]")

        if end_index
          # handle end quote on same line
          @tokens << Token.new(@reader.substring(@reader.pos, end_index), @reader.line_no, @reader.pos)
          @tokenText = nil
          @reader.skip_to(end_index)
        else
          @tokenText = @reader.substring(@reader.pos)
          @token_start = @reader.pos
          # handle multiline quotes
          loop do
            @reader.read_raw_line()
            if @reader.end_of_file?
              parse_error("[base64] binary literal not terminated.", @reader.line_no, -2)
            end

            end_index = @reader.find_next_in_line("]", 0)
            if end_index
              @tokenText << @reader.substring(0, end_index)
              @reader.skip_to(end_index)
              break
            else
              @tokenText << @reader.line
            end
          end

          @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
          @tokenText = nil
        end
      end

      # handle a line continuation (not inside a string)
      def handle_line_continuation
        # backslash line continuation outside of a String literal
        # can only occur at the end of a line
        if not @reader.rest_of_line =~ /^\\\s*$/
          parse_error("Line continuation (\\) before end of line")
        else
          @line = @reader.read_line()
          if @line.nil?
            parse_error("Line continuation at end of file.", @reader.line_no, @reader.pos)
          end
        end
      end

      def handle_number_date_or_time_span
        @token_start = @reader.pos
        @tokenText = ""

        while not @reader.end_of_line?
          c = @reader.current_char

          if c =~ /[\w\.\-+:]/
            @tokenText << c
          elsif c == "/" and not @reader.get_line_char(@reader.pos + 1) == "*"
            @tokenText << c
          else
            @reader.previous_char()
            break
          end

          @reader.skip_char()
        end

        @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
        @tokenText = nil
      end

      def handle_identifier
        @token_start = @reader.pos;
        @tokenText = ""

        while not @reader.end_of_line?
          c = @reader.current_char

          # FIXME here we are stricter than the Java version because there is no
          # easy way to implement Character.isJavaIdentifierPart() in Ruby :)
          if c =~ /[\w_$-\.]/
            @tokenText << c
          else
            @reader.previous_char()
            break
          end

          @reader.skip_char()
        end

        @tokens << Token.new(@tokenText, @reader.line_no, @token_start)
        @tokenText = nil
      end

    end

  end
  
end