# Simple Declarative Language (SDL) for Ruby # Copyright 2005 Ikayzo, inc. # # This program is free software. You can distribute or modify it under the # terms of the GNU Lesser General Public License version 2.1 as published by # the Free Software Foundation. # # This program is distributed AS IS and WITHOUT WARRANTY. OF ANY KIND, # INCLUDING MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, contact the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. module SDL4R require File.dirname(__FILE__) + '/reader' require File.dirname(__FILE__) + '/token' class Parser # Tokenizer of the SDL parser class Tokenizer TOKEN_TYPES = [ :IDENTIFIER, # punctuation :COLON, :SEMICOLON, :EQUALS, :START_BLOCK, :END_BLOCK, # literals :STRING, :CHARACTER, :BOOLEAN, :NUMBER, :DATE, :TIME, :BINARY, :NULL ] # Creates an SDL tokenizer on the specified +IO+. def initialize(io) raise ArgumentError, "io == nil" if io.nil? @reader = Parser::Reader.new(io) @token_start = 0 @startEscapedQuoteLine = false @tokens = nil @tokenText = nil end # Closes this Tokenizer and its underlying +Reader+. def close @reader.close end # Close the reader and throw a SdlParseError. def parse_error(description, line_no = nil, position = nil) begin @reader.close() rescue IOError # no recourse end line_no = @reader.line_no if line_no.nil? position = @reader.pos if position.nil? # We add one because editors typically start with line 1 and position 1 # rather than 0... raise SdlParseError.new(description, line_no + 1, position + 1, @reader.line) end # Close the reader and throw a SdlParseError using the format # Was expecting X but got Y. # def expecting_but_got(expecting, got, line, position) parse_error("Was expecting #{expecting} but got #{got}", line, position) end # Returns the next line as tokens or nil if the end of the stream has been reached. # This method handles line continuations both within and outside String literals. # The line of tokens is assigned to @tokens. # # Returns a logical line as a list of Tokens. # def read_line_tokens begin read_line_tokens_even_if_empty() end until @tokens.nil? or !@tokens.empty? return @tokens end def line_no @reader.line_no end def pos @reader.pos end def line @reader.line end private # Returns the next line as tokens or nil if the end of the stream has been reached. # This method handles line continuations both within and outside String literals. # The line of tokens is assigned to @tokens. # # Returns a logical line as a list of Tokens. # Returns an empty array if the line was empty. # def read_line_tokens_even_if_empty @tokens = nil @tokenText = nil @token_start = nil @reader.read_line() if @reader.end_of_line? return @tokens unless @reader.line @tokens = [] @token_start = @reader.pos while not @reader.end_of_line? if @tokenText @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil end c = @reader.current_char next_c = @reader.get_line_char(@reader.pos + 1) case c when "\"" # handle "" style strings including line continuations handle_double_quote_string() when "'" handle_character_literal() when "{", "}", "=", ":", ";" # handle punctuation punctuation_token = Token.new(c, @reader.line_no, @reader.pos) @tokenText = nil if punctuation_token.type == :SEMICOLON @reader.skip_char() break else @tokens << punctuation_token end when "#" # skip : hash comment at end of line @reader.skip_line() when "/" # handle // and /**/ style comments if next_c == "/" # skip : // comment @reader.skip_line() else handle_slash_comment() end when "`" # handle multiline `` style strings handle_back_quote_string() when "[" # handle binary literals handle_binary_literal() when "\s", "\t" @reader.skip_whitespaces() when "\\" # line continuations (outside a string literal) handle_line_continuation(); when /^[0-9\-\.]$/ if c == "-" and next_c == "-" # -- comments : ignore @reader.skip_line() else # handle numbers, dates, and time spans handle_number_date_or_time_span() end when /^[a-zA-Z\$_]$/ # FIXME Here, the Java code specifies isJavaIdentifierStart() but # this is not easily implemented (at least as of Ruby 1.8). # So, we implement a subset of these characters. handle_identifier() when "\n", "\r" # end of line @reader.skip_line() else parse_error("Unexpected character '#{c}'") end @reader.skip_char() end if @tokenText @tokens << Token.new(@tokenText, @reader.line_no, @token_start) end return @tokens end # Adds the current escaped character (represented by ((|c|))) to @tokenText. # This method assumes the previous char was a backslash. # def add_escaped_char_in_string(c) case c when "\\", "\"" @tokenText << c when "n" @tokenText << ?\n when "r" @tokenText << ?\r when "t" @tokenText << ?\t else parse_error("Illegal escape character in string literal: '#{c.chr}'.") end end def handle_double_quote_string escaped = false @startEscapedQuoteLine = false @tokenText = "\"" @reader.skip_char() while not @reader.end_of_line? c = @reader.current_char if "\s\t".include?(c) and @startEscapedQuoteLine # we continue else @startEscapedQuoteLine = false; if escaped add_escaped_char_in_string(c) escaped = false elsif c == "\\" # check for String broken across lines if @reader.rest_of_line =~ /^\\\s*$/ handle_escaped_double_quoted_string() next # as we are at the beginning of a new line else escaped = true; end else @tokenText << c if c == "\"" # end of double-quoted string detected @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil return end end end @reader.skip_char() end # detection of ill-terminated literals if @tokenText =~ /^".*[^"]$/ parse_error( "String literal \"#{@tokenText}\" not terminated by end quote.", @reader.line_no, @reader.line_length); elsif @tokenText == "\"" parse_error("Orphan quote (unterminated string)", @reader.line_no, @reader.line_length); end end def handle_escaped_double_quoted_string # '\' can be followed by whitespaces if @reader.rest_of_line =~ /^\\\s*$/ @reader.read_line() parse_error("Escape at end of file.") if @reader.end_of_file? @startEscapedQuoteLine = true else parse_error( "Malformed string literal - escape followed by whitespace " + "followed by non-whitespace.") end end def handle_character_literal if not @reader.more_chars_in_line? parse_error("Got ' at end of line") end c2 = @reader.read_char() if c2 == "\\" if @reader.end_of_line? parse_error("Got '\\ at end of line") end c3 = @reader.read_char() if not @reader.more_chars_in_line? parse_error("Got '\\#{c3} at end of line") end case c3 when "\\" @tokens << Token.new("'\\'", @reader.line_no, @reader.pos) when "'" @tokens << Token.new("'''", @reader.line_no, @reader.pos) when "n" @tokens << Token.new("'\n'", @reader.line_no, @reader.pos) when "r" @tokens << Token.new("'\r'", @reader.line_no, @reader.pos) when "t" @tokens << Token.new("'\t'", @reader.line_no, @reader.pos) else parse_error("Illegal escape character #{@reader.current_char}") end @reader.skip_char() if @reader.current_char != "'" expecting_but_got("single quote (')", "\"#{@reader.current_char}\"") end else @tokens << Token.new("'#{c2}'", @reader.line_no, @reader.pos) if not @reader.more_chars_in_line? parse_error("Got '#{c2} at end of line") end @reader.skip_char() if @reader.current_char != "'" expecting_but_got( "quote (')", "\"#{@reader.current_char}\"", @reader.line_no, @reader.pos) end end end def handle_slash_comment if not @reader.more_chars_in_line? parse_error("Got slash (/) at end of line.") end if @reader.get_line_char(@reader.pos + 1) == "*" end_index = @reader.find_next_in_line("*/") if end_index # handle comment on same line @reader.skip_to(end_index + 1) else # handle multiline comments loop do @reader.read_raw_line() if @reader.end_of_file? parse_error("/* comment not terminated.", @reader.line_no, -2) end end_index = @reader.find_next_in_line("*/", 0) if end_index @reader.skip_to(end_index + 1) break end end end elsif @reader.get_line_char(@reader.pos + 1) == "/" parse_error("Got slash (/) in unexpected location.") end end def handle_back_quote_string end_index = @reader.find_next_in_line("`") if end_index # handle end quote on same line @tokens << Token.new(@reader.substring(@reader.pos, end_index), @reader.line_no, @reader.pos) @tokenText = nil @reader.skip_to(end_index) else @tokenText = @reader.rest_of_line @token_start = @reader.pos # handle multiline quotes loop do @reader.read_raw_line() if @reader.end_of_file? parse_error("` quote not terminated.", @reader.line_no, -2) end end_index = @reader.find_next_in_line("`", 0) if end_index @tokenText << @reader.substring(0, end_index) @reader.skip_to(end_index) break else @tokenText << @reader.line end end @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil end end def handle_binary_literal end_index = @reader.find_next_in_line("]") if end_index # handle end quote on same line @tokens << Token.new(@reader.substring(@reader.pos, end_index), @reader.line_no, @reader.pos) @tokenText = nil @reader.skip_to(end_index) else @tokenText = @reader.substring(@reader.pos) @token_start = @reader.pos # handle multiline quotes loop do @reader.read_raw_line() if @reader.end_of_file? parse_error("[base64] binary literal not terminated.", @reader.line_no, -2) end end_index = @reader.find_next_in_line("]", 0) if end_index @tokenText << @reader.substring(0, end_index) @reader.skip_to(end_index) break else @tokenText << @reader.line end end @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil end end # handle a line continuation (not inside a string) def handle_line_continuation # backslash line continuation outside of a String literal # can only occur at the end of a line if not @reader.rest_of_line =~ /^\\\s*$/ parse_error("Line continuation (\\) before end of line") else @line = @reader.read_line() if @line.nil? parse_error("Line continuation at end of file.", @reader.line_no, @reader.pos) end end end def handle_number_date_or_time_span @token_start = @reader.pos @tokenText = "" while not @reader.end_of_line? c = @reader.current_char if c =~ /[\w\.\-+:]/ @tokenText << c elsif c == "/" and not @reader.get_line_char(@reader.pos + 1) == "*" @tokenText << c else @reader.previous_char() break end @reader.skip_char() end @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil end def handle_identifier @token_start = @reader.pos; @tokenText = "" while not @reader.end_of_line? c = @reader.current_char # FIXME here we are stricter than the Java version because there is no # easy way to implement Character.isJavaIdentifierPart() in Ruby :) if c =~ /[\w_$-\.]/ @tokenText << c else @reader.previous_char() break end @reader.skip_char() end @tokens << Token.new(@tokenText, @reader.line_no, @token_start) @tokenText = nil end end end end