# encoding: utf-8 require "strscan" require "antelope/ace/scanner/argument" require "antelope/ace/scanner/first" require "antelope/ace/scanner/second" require "antelope/ace/scanner/third" module Antelope module Ace # Scans a given input. The input should be a properly formatted # ACE file; see the Ace module for more information. This scanner # uses the StringScanner class internally; see the ruby # documentation for more on that. This scanner seperates scanning # into three seperate stages: First, Second, and Third, for each # section of the file, respectively. # # @see Ace # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html class Scanner IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*" include First include Second include Third # The string scanner that we're using to scan the string with. # # @return [StringScanner] attr_reader :scanner # An array of the tokens that the scanner scanned. # # @return [Array>] attr_reader :tokens # The boundry between each section. Placed here to be easily. # modifiable. **MUST** be a regular expression. # # @return [RegExp] CONTENT_BOUNDRY = /%%/ # The value regular expression. It should match values; for # example, things quoted in strings or word letters without # quotes. Must respond to #to_s, since it is embedded within # other regular expressions. The regular expression should # place the contents of the value in the groups 2 or 3. # # @return [#to_s] VALUE = %q{(?: (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1) | ([A-Za-z0-9_.<>*-]+) )} # Scans a file. It returns the tokens resulting from scanning. # # @param source [String] the source to scan. This should be # compatible with StringScanner. # @param name [String] the name of the source file. This is # primarilyused in backtrace information. # @return [Array>] # @see #tokens def self.scan(source, name = "(ace file)") new(source, name).scan_file end # Initialize the scanner with the input. # # @param input [String] The source to scan. # @param source [String] the source file. This is primarily # used in backtrace information. def initialize(input, source = "(ace file)") @source = source @scanner = StringScanner.new(input) @tokens = [] end # Scans the file in parts. # # @raise [SyntaxError] if the source is malformed in some way. # @return [Array>] the tokens that # were scanned in this file. # @see #scan_first_part # @see #scan_second_part # @see #scan_third_part # @see #tokens def scan_file @line = 1 scan_first_part scan_second_part scan_third_part tokens rescue SyntaxError => e start = [@scanner.pos - 8, 0].max stop = [@scanner.pos + 8, @scanner.string.length].min snip = @scanner.string[start..stop].strip.inspect char = @scanner.string[@scanner.pos] char = if char char.inspect else "EOF" end new_line = "#{@source}:#{@line}: unexpected #{char} " \ "(near #{snip})" raise e, e.message, [new_line, *e.backtrace] end # Scans for whitespace. If the next character is whitespace, it # will consume all whitespace until the next non-whitespace # character. # # @return [Boolean] if any whitespace was matched. def scan_whitespace if @scanner.scan(/(\s+)/) @line += @scanner[1].count("\n") end end # Scans for a comment. If the next token is a number sign (#), # it will consume all characters until the next newline. # # @return [Boolean] if a comment was matched. def scan_comment if @scanner.scan(/\#(.*)\n/) @line += 1 end end private # Raises an error. # # @raise [SyntaxError] always. # @return [void] def error! raise SyntaxError, "invalid syntax" end end end end