# Simple Declarative Language (SDL) for Ruby
# Copyright 2005 Ikayzo, inc.
#
# This program is free software. You can distribute or modify it under the 
# terms of the GNU Lesser General Public License version 2.1 as published by	
# the Free Software Foundation.
#
# This program is distributed AS IS and WITHOUT WARRANTY. OF ANY KIND,
# INCLUDING MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, contact the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.


module SDL4R

  require 'base64'

  begin
    # Try to use the Flt library, which defines DecNum
    require "flt"
  rescue LoadError
    # Well, shouganai.
  end

  require File.dirname(__FILE__) + '/sdl_binary'
  require File.dirname(__FILE__) + '/sdl_time_span'
  require File.dirname(__FILE__) + '/sdl_parse_error'
  require File.dirname(__FILE__) + '/tokenizer'

  # The SDL parser.
  #
  # Authors: Daniel Leuck, Philippe Vosges
  #
  # In Ruby 1.8, in order to enable UTF-8 support, you may have to declare the following lines:
  #
  #   $KCODE = 'u'
  #   require 'jcode'
  #
  # This will give you correct input and output and correct UTF-8 "general" sorting.
  # Alternatively you can use the following options when launching the Ruby interpreter:
  #
  #   /path/to/ruby -Ku -rjcode
  #
  class Parser
			
    # Passed to parse_error() in order to specify an error that occured on no specific position
    # (column).
    UNKNOWN_POSITION = -2
	 	
    # Creates an SDL parser on the specified +IO+.
    def initialize(io)
      raise ArgumentError, "io == nil" if io.nil?
			
      @tokenizer = Tokenizer.new(io)
    end
		
    # Parses the underlying +IO+ and returns an +Array+ of +Tag+.
    # 
    # ==Errors
    # [IOError] If a problem is encountered with the IO
    # [SdlParseError] If the document is malformed
    def parse
      tags = []
			
      while tokens = @tokenizer.read_line_tokens()
        if tokens.last.type == :START_BLOCK
          # tag with a block
          tag = construct_tag(tokens[0...-1])
          add_children(tag)
          tags << tag

        elsif tokens.first.type == :END_BLOCK
          # we found an block end token that should have been consumed by
          # add_children() normally
          parse_error(
            "No opening block ({) for close block (}).",
            tokens.first.line,
            tokens.first.position)
        else
          # tag without block
          tags << construct_tag(tokens)
        end
      end
			
      @tokenizer.close()
			
      return tags
    end
		
    private
		
    # Parses the children tags of +parent+ until an end of block is found.
    def add_children(parent)
      while tokens = @tokenizer.read_line_tokens()
        if tokens.first.type == :END_BLOCK
          return
					
        elsif tokens.last.type == :START_BLOCK
          # found a child with a block
          tag = construct_tag(tokens[0...-1]);
          add_children(tag)
          parent.add_child(tag)
					
        else
          parent.add_child(construct_tag(tokens))
        end
      end
			
      parse_error("No close block (}).", @tokenizer.line_no, UNKNOWN_POSITION)
    end
		
    # Construct a Tag (but not its children) from a string of tokens
    # 
    # Throws SdlParseError if some bad syntax is found.
    def construct_tag(tokens)
      raise ArgumentError, "tokens == nil" if tokens.nil?
      if tokens.empty?
        parse_error("Internal Error: empty token list", @tokenizer.line_no, UNKNOWN_POSITION)
      end
			
      first_token = tokens.first
      if first_token.literal?
        first_token = Token.new("content")
        tokens.insert(0, first_token)
				
      elsif first_token.type != :IDENTIFIER
        expecting_but_got(
          "IDENTIFIER",
          "#{first_token.type} (#{first_token.text})",
          first_token.line,
          first_token.position)
      end
			
      tag = nil
      if tokens.size == 1
        tag = Tag.new(first_token.text)
				
      else
        values_start_index = 1
        second_token = tokens[1]
				
        if second_token.type == :COLON
          if tokens.size == 2 or tokens[2].type != :IDENTIFIER
            parse_error(
              "Colon (:) encountered in unexpected location.",
              second_token.line,
              second_token.position)
          end
					
          third_token = tokens[2];
          tag = Tag.new(third_token.text, first_token.text)
          values_start_index = 3
					
        else
          tag = Tag.new(first_token.text)
        end
				
        # read values
        attribute_start_index = add_tag_values(tag, tokens, values_start_index)
				
        # read attributes
        if attribute_start_index < tokens.size
          add_tag_attributes(tag, tokens, attribute_start_index)
        end
      end
			
      return tag
    end
		
    #
    # @return The position at the end of the value list
    #
    def add_tag_values(tag, tokens, start)
      size = tokens.size()
      i = start;
			
      while i < size
        token = tokens[i]
				
        if token.literal?
          # if a DATE token is followed by a TIME token combine them
          next_token = ((i + 1) < size)? tokens[i + 1] : nil
          if token.type == :DATE && next_token && next_token.type == :TIME
            date = token.object_for_literal()
            time_zone_with_zone = next_token.object_for_literal()
						
            if time_zone_with_zone.day != 0
              # as there are days specified, it can't be a full precision date
              tag.add_value(date);
              tag.add_value(
                SdlTimeSpan.new(
                  time_zone_with_zone.day,
                  time_zone_with_zone.hour,
                  time_zone_with_zone.min,
                  time_zone_with_zone.sec))
							
							
              if time_zone_with_zone.time_zone_offset
                parse_error("TimeSpan cannot have a timeZone", t.line, t.position)
              end
							
            else
              tag.add_value(combine(date, time_zone_with_zone))
            end
						
            i += 1
						
          else
            value = token.object_for_literal()
            if value.is_a?(TimeSpanWithZone)
              # the literal looks like a time zone
              if value.time_zone_offset
                expecting_but_got(
                  "TIME SPAN",
                  "TIME (component of date/time)",
                  token.line,
                  token.position)
              end
							
              tag.add_value(
                SdlTimeSpan.new(
                  value.day,
                  value.hour,
                  value.min,
                  value.sec))
            else
              tag.add_value(value)
            end
          end
        elsif token.type == :IDENTIFIER
          break
        else
          expecting_but_got(
            "LITERAL or IDENTIFIER", token.type, token.line, token.position)
        end
				
        i += 1
      end
			
      return i
    end
		
    #
    # Add attributes to the given tag
    #
    def add_tag_attributes(tag, tokens, start)
      i = start
      size = tokens.size
			
      while i < size
        token = tokens[i]
        if token.type != :IDENTIFIER
          expecting_but_got("IDENTIFIER", token.type, token.line, token.position)
        end
        name_or_namespace = token.text;
				
        if i == (size - 1)
          expecting_but_got(
            "\":\" or \"=\" \"LITERAL\"",
            "END OF LINE.",
            token.line,
            token.position)
        end
				
        i += 1
        token = tokens[i]
        if token.type == :COLON
          if i == (size - 1)
            expecting_but_got(
              "IDENTIFIER", "END OF LINE", token.line, token.position)
          end
					
          i += 1
          token = tokens[i]
          if token.type != :IDENTIFIER
            expecting_but_got(
              "IDENTIFIER", token.type, token.line, token.position)
          end
          name = token.text
					
          if i == (size - 1)
            expecting_but_got("\"=\"", "END OF LINE", token.line, token.position)
          end
					
          i += 1
          token = tokens[i]
          if token.type != :EQUALS
            expecting_but_got("\"=\"", token.type, token.line, token.position)
          end
					
          if i == (size - 1)
            expecting_but_got("LITERAL", "END OF LINE", token.line, token.position)
          end
					
          i += 1
          token = tokens[i]
          if !token.literal?
            expecting_but_got("LITERAL", token.type, token.line, token.position)
          end
					
          if token.type == :DATE and (i + 1) < size and tokens[i + 1].type == :TIME
            date = token.get_object_for_literal()
            time_span_with_zone = tokens[i + 1].get_object_for_literal()
						
            if time_span_with_zone.days != 0
              expecting_but_got(
                "TIME (component of date/time) in attribute value",
                "TIME SPAN",
                token.line,
                token.position)
            else
              tag.set_attribute(
                name, combine(date, time_span_with_zone), name_or_namespace)
            end
						
            i += 1
          else
            value = token.object_for_literal();
            if value.is_a?(TimeSpanWithZone)
              time_span_with_zone = value
							
              if time_span_with_zone.time_zone_offset
                expecting_but_got(
                  "TIME SPAN",
                  "TIME (component of date/time)",
                  token.line,
                  token.position)
              end
							
              time_span = SdlTimeSpan.new(
                time_span_with_zone.day,
                time_span_with_zone.hour,
                time_span_with_zone.min,
                time_span_with_zone.sec)
							
              tag.set_attribute(name, time_span, name_or_namespace)
            else
              tag.set_attribute(name, value, name_or_namespace);
            end
          end
        elsif token.type == :EQUALS
          if i == (size - 1)
            expecting_but_got("LITERAL", "END OF LINE", token.line, token.position)
          end
					
          i += 1
          token = tokens[i]
          if !token.literal?
            expecting_but_got("LITERAL", token.type, token.line, token.position)
          end
					
          if token.type == :DATE and (i + 1) < size and tokens[i + 1].type == :TIME
            date = token.object_for_literal()
            time_span_with_zone = tokens[i + 1].object_for_literal()
						
            if time_span_with_zone.day != 0
              expecting_but_got(
                "TIME (component of date/time) in attribute value",
                "TIME SPAN",
                token.line,
                token.position)
            end
            tag.set_attribute(
              name_or_namespace, combine(date, time_span_with_zone))
						
            i += 1
          else
            value = token.object_for_literal()
            if value.is_a?(TimeSpanWithZone)
              time_span_with_zone = value
              if time_span_with_zone.time_zone_offset
                expecting_but_got(
                  "TIME SPAN",
                  "TIME (component of date/time)",
                  token.line,
                  token.position)
              end
							
              time_span = SdlTimeSpan.new(
                time_span_with_zone.day,
                time_span_with_zone.hour,
                time_span_with_zone.min,
                time_span_with_zone.sec)
              tag.set_attribute(name_or_namespace, time_span)
            else
              tag.set_attribute(name_or_namespace, value);
            end			
          end
        else
          expecting_but_got(
            "\":\" or \"=\"", token.type, token.line, token.position)
        end
				
        i += 1
      end
    end
		
    # Combines a simple Date with a TimeSpanWithZone to create a DateTime
    #
    def combine(date, time_span_with_zone)
      time_zone_offset = time_span_with_zone.time_zone_offset
      time_zone_offset = TimeSpanWithZone.default_time_zone_offset if time_zone_offset.nil?
			
      return DateTime.new(
        date.year,
        date.month,
        date.day,
        time_span_with_zone.hour,
        time_span_with_zone.min,
        time_span_with_zone.sec,
        time_zone_offset)
    end
		
    # An intermediate object used to store a timeSpan or the time
    # component of a date/time instance. The types are disambiguated at a later stage.
    #
    # +seconds+ can have a fraction
    # +time_zone_offset+ is a fraction of a day (equal to nil if not specified)
    class TimeSpanWithZone

      private

        SECONDS_IN_DAY = 24 * 60 * 60

      public
			
      def initialize(day, hour, minute, second, time_zone_offset)
        @day = day
        @hour = hour
        @min = minute
        @sec = second
        @time_zone_offset = time_zone_offset
      end
			
      attr_reader :day, :hour, :min, :sec, :time_zone_offset

      # Returns the UTC offset as a fraction of a day on the current machine
      def TimeSpanWithZone.default_time_zone_offset
        return Rational(Time.now.utc_offset, SECONDS_IN_DAY)
      end
    end
		
    private
    ############################################################################
    ## Parsers for types
    ############################################################################
		
    def Parser.parse_string(literal)
      unless literal =~ /(^`.*`$)|(^\".*\"$)/m
        raise ArgumentError,
          "Malformed string <#{literal}>." +
          "	Strings must start and end with \" or `"
      end
			
      return literal[1..-2]
    end
		
    def Parser.parse_character(literal)
      unless literal =~ /(^'.*'$)/
        raise ArgumentError,
          "Malformed character <#{literal}>." +
          "	Character must start and end with single quotes"
      end
			
      return literal[1]
    end
		
    def Parser.parse_number(literal)
      # we use the fact that Kernel.Integer() and Kernel.Float() raise ArgumentErrors
      if literal =~ /(.*)(L)$/i
        return Integer($1)
      elsif literal =~ /([^BDF]*)(BD)$/i
        return (defined? Flt::DecNum) ? Flt::DecNum($1) : Float($1)
      elsif literal =~ /([^BDF]*)(F|D)$/i
        return Float($1)
      elsif literal.count(".e") == 0
        return Integer(literal)
      else
        return Float(literal)
      end
    end
		
    # Parses the given literal into a returned array
    # [days, hours, minutes, seconds, time_zone_offset].
    # 'days', 'hours' and 'minutes' are integers.
    # 'seconds' and 'time_zone_offset' are rational numbers.
    # 'days' and 'seconds' are equal to 0 if they're not specified in ((|literal|)).
    # 'time_zone_offset' is equal to nil if not specified.
    #
    # ((|allowDays|)) indicates whether the specification of days is allowed
    # in ((|literal|))
    # ((|allowTimeZone|)) indicates whether the specification of the timeZone is
    # allowed in ((|literal|))
    #
    # All components are returned disregarding the values of ((|allowDays|)) and
    # ((|allowTimeZone|)).
    #
    # Raises an ArgumentError if ((|literal|)) has a bad format.
    def Parser.parse_time_span_and_time_zone(literal, allowDays, allowTimeZone)
      overall_sign = (literal =~ /^-/)? -1 : +1

      if literal =~ /^(([+\-]?\d+)d:)/
        if allowDays
          days = Integer($2)
          days_specified = true
          time_part = literal[($1.length)..-1]
        else
          # detected a day specification in a pure time literal
          raise ArgumentError, "unexpected day specification in #{literal}"
        end
      else
        days = 0;
        days_specified = false
        time_part = literal
      end
			
      # We have to parse the string ourselves because AFAIK :
      #	- strptime() can't parse milliseconds
      #	- strptime() can't parse the time zone custom offset (CET+02:30)
      #	- strptime() accepts trailing chars
      #		(e.g. "12:24-xyz@" ==> "xyz@" is obviously wrong but strptime()
      #		 won't mind)
      if time_part =~ /^([+-]?\d+):(\d+)(?::(\d+)(?:\.(\d+))?)?(?:(?:-([a-zA-Z]+))?(?:([\+\-]\d+)(?::(\d+))?)?)?$/i
        hours = $1.to_i
        minutes = $2.to_i
        # seconds and milliseconds are implemented as one rational number
        # unless there are no milliseconds
        millisecond_part = ($4)? $4.ljust(3, "0") : nil
        if millisecond_part
          seconds = Rational(($3 + millisecond_part).to_i, 10 ** millisecond_part.length)
        else
          seconds = ($3)? Integer($3) : 0
        end
				
        if ($5 or $6) and not allowTimeZone
          raise ArgumentError, "unexpected time zone specification in #{literal}"
        end
				
        time_zone_code = $5 # might be nil

        if $6
          zone_custom_minute_offset = $6.to_i * 60
          if $7
            if zone_custom_minute_offset > 0
              zone_custom_minute_offset = zone_custom_minute_offset + $7.to_i
            else
              zone_custom_minute_offset = zone_custom_minute_offset - $7.to_i
            end
          end
        end

        time_zone_offset = get_time_zone_offset(time_zone_code, zone_custom_minute_offset)
				
        if not allowDays and $1 =~ /^[+-]/
          # unexpected timeSpan syntax
          raise ArgumentError, "unexpected sign on hours : #{literal}"
        end

        # take the sign into account
        hours *= overall_sign if days_specified # otherwise the sign is already applied to the hours
        minutes *= overall_sign
        seconds *= overall_sign
				
        return [ days, hours, minutes, seconds, time_zone_offset ]
				
      else
        raise ArgumentError, "bad time component : #{literal}"
      end
    end
		
    # Parses the given literal (String) into a returned DateTime object.
    #
    # Raises an ArgumentError if ((|literal|)) has a bad format.
    def Parser.parse_date_time(literal)
      raise ArgumentError("date literal is nil") if literal.nil?
			
      begin
        parts = literal.split(" ")
        if parts.length == 1
          return parse_date(literal)
        else
          date = parse_date(parts[0]);
          time_part = parts[1]
					
          days, hours, minutes, seconds, time_zone_offset =
            parse_time_span_and_time_zone(time_part, false, true)
					
          return DateTime.civil(
            date.year,
            date.month,
            date.day,
            hours,
            minutes,
            seconds,
            time_zone_offset)
        end
				
      rescue ArgumentError
        raise ArgumentError, "Bad date/time #{literal} : #{$!.message}"
      end
    end

    ##
    # Returns the time zone offset (Rational) corresponding to the provided parameters as a fraction
    # of a day. This method adds the two offsets if they are both provided.
    # 
    # +time_zone_code+: can be nil
    # +custom_minute_offset+: can be nil
    #
    def Parser.get_time_zone_offset(time_zone_code, custom_minute_offset)
      return nil unless time_zone_code or custom_minute_offset

      time_zone_offset = custom_minute_offset ? Rational(custom_minute_offset, 60 * 24) : 0
      
      return time_zone_offset unless time_zone_code
			
      # we have to provide some bogus year/month/day in order to parse our time zone code
      d = DateTime.strptime("1999/01/01 #{time_zone_code}", "%Y/%m/%d %Z")
      # the offset is a fraction of a day
      return d.offset() + time_zone_offset
    end
		
    # Parses the +literal+ into a returned Date object.
    #
    # Raises an ArgumentError if +literal+ has a bad format.
    
    def Parser.parse_date(literal)
      # here, we're being stricter than strptime() alone as we forbid trailing chars
      if literal =~ /^(\d+)\/(\d+)\/(\d+)$/
        begin
          return Date.strptime(literal, "%Y/%m/%d")
        rescue ArgumentError
          raise ArgumentError, "Malformed Date <#{literal}> : #{$!.message}"
        end
      end

      raise ArgumentError, "Malformed Date <#{literal}>"
    end
		
    # Returns a String that contains the binary content corresponding to ((|literal|)).
    #
    # ((|literal|)) : a base-64 encoded literal (e.g.
    # "[V2hvIHdhbnRzIHRvIGxpdmUgZm9yZXZlcj8=]")
    def Parser.parse_binary(literal)
      clean_literal = literal[1..-2] # remove square brackets
      return SdlBinary.decode64(clean_literal)
    end
		
    # Parses +literal+ (String) into the corresponding SDLTimeSpan, which is then
    # returned.
    #
    # Raises an ArgumentError if the literal is not a correct timeSpan literal.
    def Parser.parse_time_span(literal)
      days, hours, minutes, seconds, time_zone_offset =
        parse_time_span_and_time_zone(literal, true, false)
			
      milliseconds = ((seconds - seconds.to_i) * 1000).to_i
      seconds = seconds.to_i
			
      return SDLTimeSpan.new(days, hours, minutes, seconds, milliseconds)
			
      raise ArgumentError,
        "Malformed time span <#{literal}>. Time spans must use the format " +
        "(d:)hh:mm:ss(.xxx) Note: if the day component is " +
        "included it must be suffixed with lower case \"d\""
    end

    # Close the reader and throw a SdlParseError using the format
    # Was expecting X but got Y.
    #
    def expecting_but_got(expecting, got, line, position)
      @tokenizer.expecting_but_got(expecting, got, line, position)
    end
  end
end