lib/rouge/reader.rb in rouge-lang-0.0.7 vs lib/rouge/reader.rb in rouge-lang-0.0.8

- old
+ new

@@ -1,11 +1,13 @@ # encoding: utf-8 require 'rouge/wrappers' class Rouge::Reader class UnexpectedCharacterError < StandardError; end + class NumberFormatError < StandardError; end class EndOfDataError < StandardError; end + class EOFError < StandardError; end attr_accessor :ns @@gensym_counter = 0 @@ -15,58 +17,183 @@ @n = 0 @gensyms = [] end def lex - r = - case peek - when MAYBE_NUMBER - number - when /:/ - keyword - when /"/ - string - when /\(/ - Rouge::Seq::Cons[*list(')')] - when /\[/ - list ']' - when /#/ - dispatch - when SYMBOL - # SYMBOL after \[ and #, because it includes both - symbol_or_number - when /{/ - map - when /'/ - quotation - when /`/ - syntaxquotation - when /~/ - dequotation - when /\^/ - metadata - when /@/ - deref - when nil - reader_raise EndOfDataError, "in #lex" + case peek + when MAYBE_NUMBER + number + when /:/ + keyword + when /"/ + string + when /\(/ + Rouge::Seq::Cons[*list(')')] + when /\[/ + list ']' + when /#/ + dispatch + when SYMBOL + # SYMBOL after \[ and #, because it includes both + symbol_or_number + when /{/ + map + when /'/ + quotation + when /`/ + syntaxquotation + when /~/ + dequotation + when /\^/ + metadata + when /@/ + deref + when nil + reader_raise EOFError, "in #lex" + else + reader_raise UnexpectedCharacterError, "#{peek.inspect} in #lex" + end + end + + private + + # Loose expression for a possible numeric literal. + MAYBE_NUMBER = /^[+\-]?\d[\da-fA-FxX._+\-\/]*/ + + # Ruby integer. + INT = /\d+(?:_\d+)*/ + + # Strict expression for a numeric literal. + NUMBER = / + [+\-]? + (?: + (?:#{INT}(?:(?:\.#{INT})?(?:[eE][+\-]?#{INT})?)?) (?# Integers and floats) + | (?:0 + (?: + (?:[xX][\da-fA-F]+) (?# Hexadecimal integer) + | (?:[bB][01]+) (?# Binary integer) + | (?:[0-7]+) (?# Octal integer) + )? + ) + ) + /ox + + RATIONAL = /#{NUMBER}\/#{NUMBER}/o + + SYMBOL = / + ^(\.\[\]) + |(\.?[-+]@) + |([a-zA-Z0-9\-_!&\?\*\/\.\+\|=%$<>#]+) + /x + + # Advances the current character position by n characters and returns the + # updated character position. + def advance! n = 1 + @n += n.abs + end + + # Retracts the current character position by n characters and returns the + # updated character position. + def retract! n = 1 + pos = @n - n.abs + + if pos > 0 + @n = pos + else + @n = 0 + end + end + + # Returns the character currently beneath the cursor. + def current_char + @src[@n] + end + + # Returns the string of characters matching the regular expression re relative + # to the cursor position. The cursor position is then advanced by n characters + # where n is the length of the returned string. + def slurp re + re.match(@src[@n..-1]) + + if $& + advance!($&.length) + $& + else + reader_raise UnexpectedCharacterError, "#{current_char} in #slurp #{re}" + end + end + + # Advances the cursor position beyond whitespace and comments and returns the + # resulting character. + def peek + while /[\s,;]/.match(current_char) + if $& == ";" + while /[^\n]/.match(current_char) + advance! + end else - reader_raise UnexpectedCharacterError, "#{peek.inspect} in #lex" + advance! end + end - r + current_char end - private + # Returns the result of peek and advances the cursor position by character. + def consume + c = peek + advance! + c + end - def number - read_number(slurp(MAYBE_NUMBER)) + # Raises the exception ex with the message msg including line information + # where the error occured. If the optional string cause is given, a more + # detailed report will be displayed. + def reader_raise ex, msg, cause = nil + # Locate the beginning of the line. + n = @n + until n == 0 || @src[n - 1] == "\n" + n -= 1 + end + + lines = @src[n..-1].lines.to_a + line = lines.first + line_no = (@src.lines.to_a.index(line) || 0) + 1 + + if cause + error_position = line.index(cause) + indicator = (" " * error_position) << ("^" * cause.length) + info = "on line #{line_no} at char #{error_position}" + parts = [msg, line.chomp, indicator, info] + else + info = "on line #{line_no}" + parts = [msg, info] + end + + raise ex, parts.join("\n") end + def number s = slurp(MAYBE_NUMBER) + if /\A#{NUMBER}\z/o.match(s) + # Match decimal numbers but not hexadecimal numbers. + if /[.eE]/.match(s) && /[^xX]/.match(s) + Float(s) + else + Integer(s) + end + elsif /\A#{RATIONAL}\z/o.match(s) + numerator, denominator = s.split("/").map {|s| number(s) } + Rational(numerator, denominator) + else + reader_raise NumberFormatError, "Invalid number #{s}", s + end + end + def keyword begin slurp(/:"/) - @n -= 1 + retract! s = string s.intern rescue UnexpectedCharacterError slurp(/^:[a-zA-Z0-9\-_!\?\*\/]+/)[1..-1].intern end @@ -74,110 +201,116 @@ def string s = "" t = consume while true - c = @src[@n] + c = current_char if c.nil? - reader_raise EndOfDataError, "in string, got: #{s}" + reader_raise EndOfDataError, "in string, got: \"#{s}" end - @n += 1 + advance! if c == t break end if c == ?\\ c = consume case c when nil - reader_raise EndOfDataError, "in escaped string, got: #{s}" + reader_raise EndOfDataError, "in escaped string, got: \"#{s}" when /[abefnrstv]/ - c = {?a => ?\a, - ?b => ?\b, - ?e => ?\e, - ?f => ?\f, - ?n => ?\n, - ?r => ?\r, - ?s => ?\s, - ?t => ?\t, - ?v => ?\v}[c] + c = { + ?a => ?\a, + ?b => ?\b, + ?e => ?\e, + ?f => ?\f, + ?n => ?\n, + ?r => ?\r, + ?s => ?\s, + ?t => ?\t, + ?v => ?\v + }[c] else # Just leave it be. end end - s += c + s << c end s.freeze end def list(ending) consume r = [] - while true - if peek == ending - break - end + until peek == ending r << lex end consume r.freeze + rescue EOFError + reader_raise EndOfDataError, "in #list" end def symbol_or_number s = slurp(SYMBOL) - if (s[0] == ?- or s[0] == ?+) and s[1..-1] =~ NUMBER - read_number(s) + + if MAYBE_NUMBER.match(s) + number(s) else Rouge::Symbol[s.intern] end end def map consume r = {} - while true - if peek == '}' - break - end - k = lex - v = lex + until peek == '}' + k, v = lex, lex r[k] = v end consume r.freeze + rescue EOFError + reader_raise EndOfDataError, "in #map" end def quotation consume Rouge::Seq::Cons[Rouge::Symbol[:quote], lex] + rescue EOFError + reader_raise EndOfDataError, "in #quotation" end def syntaxquotation consume @gensyms.unshift(@@gensym_counter += 1) r = dequote(lex) @gensyms.shift r + rescue EOFError + reader_raise EndOfDataError, "in #syntaxquotation" end def dequotation consume if peek == ?@ consume Rouge::Splice[lex].freeze else Rouge::Dequote[lex].freeze end + rescue EOFError + reader_raise EndOfDataError, "in #dequotation" end def dequote form case form when Rouge::Seq::ISeq, Array @@ -248,17 +381,17 @@ def regexp expression = "" terminator = '"' while true - char = @src[@n] + char = current_char if char.nil? reader_raise EndOfDataError, "in regexp, got: #{expression}" end - @n += 1 + advance! if char == terminator break end @@ -279,16 +412,17 @@ def set s = Set.new until peek == '}' - el = lex - s.add el + s.add(lex) end consume s.freeze + rescue EOFError + reader_raise EndOfDataError, "in #set" end def dispatch consume case peek @@ -312,10 +446,12 @@ consume regexp else reader_raise UnexpectedCharacterError, "#{peek.inspect} in #dispatch" end + rescue EOFError + reader_raise EndOfDataError, "in #dispatch" end def dispatch_rewrite_fn form, count case form when Rouge::Seq::Cons, Array @@ -368,87 +504,18 @@ else attach.meta = extant.merge(meta) end attach + rescue EOFError + reader_raise EndOfDataError, "in #meta" end def deref consume Rouge::Seq::Cons[Rouge::Symbol[:"rouge.core/deref"], lex] + rescue EOFError + reader_raise EndOfDataError, "in #deref" end - - def slurp re - @src[@n..-1] =~ re - reader_raise UnexpectedCharacterError, "#{@src[@n]} in #slurp #{re}" if !$& - @n += $&.length - $& - end - - def peek - while @src[@n] =~ /[\s,;]/ - if $& == ";" - while @src[@n] =~ /[^\n]/ - @n += 1 - end - else - @n += 1 - end - end - - @src[@n] - end - - def consume - c = peek - @n += 1 - c - end - - def reader_raise ex, m - around = - "#{@src[[@n - 3, 0].max...[@n, 0].max]}" + - "#{@src[@n]}" + - "#{(@src[@n + 1..@n + 3] || "").gsub(/\n.*$/, '')}" - - line = @src[0...@n].count("\n") + 1 - char = @src[0...@n].reverse.index("\n") || 0 + 1 - - raise ex, - "around: #{around}\n" + - " ^\n" + - "line #{line} char #{char}: #{m}" - end - - def read_number s - if NUMBER.match s - if s =~ /[.eE]/ - Float(s) - else - Integer(s) - end - else - reader_raise UnexpectedCharacterError, "#{s} in #read_number" - end - end - - # Loose expression for a possible numeric literal. - MAYBE_NUMBER = /^[+-]?\d[\da-fA-FxX\._+-]*/ - - # Ruby integer. - INT = /\d+(?:_\d+)*/ - - # Strict expression for a numeric literal. - NUMBER = / - ^[+-]? - (?: - (?:0[xX][\da-fA-F]+) (?# Hexadecimal integer) - | (?:0[bB][01]+) (?# Binary integer) - | (?:0\d+) (?# Octal integer) - | (?:#{INT}(?:(?:\.#{INT})?(?:[eE][+-]?#{INT})?)?) (?# Integers and floats) - )\z - /ox - - SYMBOL = /^(\.\[\])|(\.?[-+]@)|([a-zA-Z0-9\-_!&\?\*\/\.\+\|=%$<>#]+)/ end # vim: set sw=2 et cc=80: