lib/antelope/ace/scanner.rb in antelope-0.3.2 vs lib/antelope/ace/scanner.rb in antelope-0.4.0

- old
+ new

@@ -1,144 +1,144 @@ -# encoding: utf-8 - -require "strscan" -require "antelope/ace/scanner/argument" -require "antelope/ace/scanner/first" -require "antelope/ace/scanner/second" -require "antelope/ace/scanner/third" - -module Antelope - module Ace - - # Scans a given input. The input should be a properly formatted - # ACE file; see the Ace module for more information. This scanner - # uses the StringScanner class internally; see the ruby - # documentation for more on that. This scanner seperates scanning - # into three seperate stages: First, Second, and Third, for each - # section of the file, respectively. - # - # @see Ace - # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html - class Scanner - - IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*" - - include First - include Second - include Third - - # The string scanner that we're using to scan the string with. - # - # @return [StringScanner] - attr_reader :scanner - - # An array of the tokens that the scanner scanned. - # - # @return [Array<Array<(Symbol, Object, ...)>>] - attr_reader :tokens - - # The boundry between each section. Placed here to be easily. - # modifiable. **MUST** be a regular expression. - # - # @return [RegExp] - CONTENT_BOUNDRY = /%%/ - - # The value regular expression. It should match values; for - # example, things quoted in strings or word letters without - # quotes. Must respond to #to_s, since it is embedded within - # other regular expressions. The regular expression should - # place the contents of the value in the groups 2 or 3. - # - # @return [#to_s] - VALUE = %q{(?: - (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1) - | ([A-Za-z0-9_.<>*-]+) - )} - - # Scans a file. It returns the tokens resulting from scanning. - # - # @param source [String] the source to scan. This should be - # compatible with StringScanner. - # @param name [String] the name of the source file. This is - # primarilyused in backtrace information. - # @return [Array<Array<(Symbol, Object, ...)>>] - # @see #tokens - def self.scan(source, name = "(ace file)") - new(source, name).scan_file - end - - # Initialize the scanner with the input. - # - # @param input [String] The source to scan. - # @param source [String] the source file. This is primarily - # used in backtrace information. - def initialize(input, source = "(ace file)") - @source = source - @scanner = StringScanner.new(input) - @tokens = [] - end - - # Scans the file in parts. - # - # @raise [SyntaxError] if the source is malformed in some way. - # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that - # were scanned in this file. - # @see #scan_first_part - # @see #scan_second_part - # @see #scan_third_part - # @see #tokens - def scan_file - @line = 1 - scan_first_part - scan_second_part - scan_third_part - tokens - rescue SyntaxError => e - start = [@scanner.pos - 8, 0].max - stop = [@scanner.pos + 8, @scanner.string.length].min - snip = @scanner.string[start..stop].strip.inspect - char = @scanner.string[@scanner.pos] - char = if char - char.inspect - else - "EOF" - end - - new_line = "#{@source}:#{@line}: unexpected #{char} " \ - "(near #{snip})" - - raise e, e.message, [new_line, *e.backtrace] - end - - # Scans for whitespace. If the next character is whitespace, it - # will consume all whitespace until the next non-whitespace - # character. - # - # @return [Boolean] if any whitespace was matched. - def scan_whitespace - if @scanner.scan(/(\s+)/) - @line += @scanner[1].count("\n") - end - end - - # Scans for a comment. If the next token is a number sign (#), - # it will consume all characters until the next newline. - # - # @return [Boolean] if a comment was matched. - def scan_comment - if @scanner.scan(/\#(.*)\n/) - @line += 1 - end - end - - private - - # Raises an error. - # - # @raise [SyntaxError] always. - # @return [void] - def error! - raise SyntaxError, "invalid syntax" - end - end - end -end +# encoding: utf-8 + +require "strscan" +require "antelope/ace/scanner/argument" +require "antelope/ace/scanner/first" +require "antelope/ace/scanner/second" +require "antelope/ace/scanner/third" + +module Antelope + module Ace + + # Scans a given input. The input should be a properly formatted + # ACE file; see the Ace module for more information. This scanner + # uses the StringScanner class internally; see the ruby + # documentation for more on that. This scanner seperates scanning + # into three seperate stages: First, Second, and Third, for each + # section of the file, respectively. + # + # @see Ace + # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html + class Scanner + + IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*" + + include First + include Second + include Third + + # The string scanner that we're using to scan the string with. + # + # @return [StringScanner] + attr_reader :scanner + + # An array of the tokens that the scanner scanned. + # + # @return [Array<Array<(Symbol, Object, ...)>>] + attr_reader :tokens + + # The boundry between each section. Placed here to be easily. + # modifiable. **MUST** be a regular expression. + # + # @return [RegExp] + CONTENT_BOUNDRY = /%%/ + + # The value regular expression. It should match values; for + # example, things quoted in strings or word letters without + # quotes. Must respond to #to_s, since it is embedded within + # other regular expressions. The regular expression should + # place the contents of the value in the groups 2 or 3. + # + # @return [#to_s] + VALUE = %q{(?: + (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1) + | ([A-Za-z0-9_.<>*-]+) + )} + + # Scans a file. It returns the tokens resulting from scanning. + # + # @param source [String] the source to scan. This should be + # compatible with StringScanner. + # @param name [String] the name of the source file. This is + # primarilyused in backtrace information. + # @return [Array<Array<(Symbol, Object, ...)>>] + # @see #tokens + def self.scan(source, name = "(ace file)") + new(source, name).scan_file + end + + # Initialize the scanner with the input. + # + # @param input [String] The source to scan. + # @param source [String] the source file. This is primarily + # used in backtrace information. + def initialize(input, source = "(ace file)") + @source = source + @scanner = StringScanner.new(input) + @tokens = [] + end + + # Scans the file in parts. + # + # @raise [SyntaxError] if the source is malformed in some way. + # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that + # were scanned in this file. + # @see #scan_first_part + # @see #scan_second_part + # @see #scan_third_part + # @see #tokens + def scan_file + @line = 1 + scan_first_part + scan_second_part + scan_third_part + tokens + rescue SyntaxError => e + start = [@scanner.pos - 8, 0].max + stop = [@scanner.pos + 8, @scanner.string.length].min + snip = @scanner.string[start..stop].strip.inspect + char = @scanner.string[@scanner.pos] + char = if char + char.inspect + else + "EOF" + end + + new_line = "#{@source}:#{@line}: unexpected #{char} " \ + "(near #{snip})" + + raise e, e.message, [new_line, *e.backtrace] + end + + # Scans for whitespace. If the next character is whitespace, it + # will consume all whitespace until the next non-whitespace + # character. + # + # @return [Boolean] if any whitespace was matched. + def scan_whitespace + if @scanner.scan(/(\s+)/) + @line += @scanner[1].count("\n") + end + end + + # Scans for a comment. If the next token is a number sign (#), + # it will consume all characters until the next newline. + # + # @return [Boolean] if a comment was matched. + def scan_comment + if @scanner.scan(/\#(.*)\n/) + @line += 1 + end + end + + private + + # Raises an error. + # + # @raise [SyntaxError] always. + # @return [void] + def error! + raise SyntaxError, "invalid syntax" + end + end + end +end