lib/turmali/grammar.y in turmali-0.0.1 vs lib/turmali/grammar.y in turmali-0.0.2

- old
+ new

@@ -1,9 +1,7 @@ class Parser -# We need to tell the parser what tokens to expect. So each type of token produced -# by our lexer needs to be declared here. token IF token DEF token CLASS token NEWLINE token NUMBER @@ -11,13 +9,10 @@ token TRUE FALSE NIL token IDENTIFIER token CONSTANT token INDENT DEDENT -# Here is the Operator Precedence Table. As presented before, it tells the parser in -# which order to parse expressions containing operators. -# This table is based on the [C and C++ Operator Precedence Table](http://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B#Operator_precedence). prechigh left '.' right '!' left '*' '/' left '+' '-' @@ -27,53 +22,24 @@ left '||' right '=' left ',' preclow -# In the following `rule` section, we define the parsing rules. -# All rules are declared using the following format: -# -# RuleName: -# OtherRule TOKEN AnotherRule { result = Node.new } -# | OtherRule { ... } -# ; -# -# In the action section (inside the `{...}` on the right), you can do the following: -# -# * Assign to `result` the value returned by the rule, usually a node for the AST. -# * Use `val[index of expression]` to get the `result` of a matched -# expressions on the left. + rule - # First, parsers are dumb, we need to explicitly tell it how to handle empty - # programs. This is what the first rule does. Note that everything between `/* ... */` is - # a comment. Program: /* nothing */ { result = Nodes.new([]) } | Expressions { result = val[0] } ; - # Next, we define what a list of expressions is. Simply put, it's series of expressions separated by a - # terminator (a new line or `;` as defined later). But once again, we need to explicitly - # define how to handle trailing and orphans line breaks (the last two lines). - # - # One very powerful trick we'll use to define variable rules like this one - # (rules which can match any number of tokens) is *left-recursion*. Which means we reference - # the rule itself, directly or indirectly, on the left side **only**. This is true for the current - # type of parser we're using (LR). For other types of parsers like ANTLR (LL), it's the opposite, - # you can only use right-recursion. - # - # As you'll see bellow, the `Expressions` rule references `Expressions` itself. - # In other words, a list of expressions can be another list of expressions followed by - # another expression. Expressions: Expression { result = Nodes.new(val) } | Expressions Terminator Expression { result = val[0] << val[2] } | Expressions Terminator { result = val[0] } | Terminator { result = Nodes.new([]) } ; - # Every type of expression supported by our language is defined here. Expression: Literal | Call | Operator | GetConstant @@ -84,40 +50,23 @@ | Class | If | '(' Expression ')' { result = val[1] } ; - # Notice how we implement support for parentheses using the previous rule. - # `'(' Expression ')'` will force the parsing of `Expression` in its - # entirety first. Parentheses will then be discarded leaving only the fully parsed expression. - # - # Terminators are tokens that can terminate an expression. - # When using tokens to define rules, we simply reference them by their type which we defined in - # the lexer. Terminator: NEWLINE | ";" ; - - # Literals are the hard-coded values inside the program. If you want to add support - # for other literal types, such as arrays or hashes, this it where you'd do it. + Literal: NUMBER { result = NumberNode.new(val[0]) } | STRING { result = StringNode.new(val[0]) } | TRUE { result = TrueNode.new } | FALSE { result = FalseNode.new } | NIL { result = NilNode.new } ; - - # Method calls can take three forms: - # - # * Without a receiver (`self` is assumed): `method(arguments)`. - # * With a receiver: `receiver.method(arguments)`. - # * And a hint of syntactic sugar so that we can drop - # the `()` if no arguments are given: `receiver.method`. - # - # Each one of those is handled by the following rule. + Call: IDENTIFIER Arguments { result = CallNode.new(nil, val[0], val[1]) } | Expression "." IDENTIFIER Arguments { result = CallNode.new(val[0], val[2], val[3]) } | Expression "." IDENTIFIER { result = CallNode.new(val[0], val[2], []) } @@ -131,17 +80,10 @@ ArgList: Expression { result = val } | ArgList "," Expression { result = val[0] << val[2] } ; - - # In our language, like in Ruby, operators are converted to method calls. - # So `1 + 2` will be converted to `1.+(2)`. - # `1` is the receiver of the `+` method call, passing `2` - # as an argument. - # Operators need to be defined individually for the Operator Precedence Table to take - # action. Operator: Expression '||' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } | Expression '&&' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } | Expression '==' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } | Expression '!=' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } @@ -153,11 +95,10 @@ | Expression '-' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } | Expression '*' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } | Expression '/' Expression { result = CallNode.new(val[0], val[1], [val[2]]) } ; - # Then we have rules for getting and setting values of constants and local variables. GetConstant: CONSTANT { result = GetConstantNode.new(val[0]) } ; SetConstant: @@ -170,24 +111,14 @@ SetLocal: IDENTIFIER "=" Expression { result = SetLocalNode.new(val[0], val[2]) } ; - # Our language uses indentation to separate blocks of code. But the lexer took care of all - # that complexity for us and wrapped all blocks in `INDENT ... DEDENT`. A block - # is simply an increment in indentation followed by some code and closing with an equivalent - # decrement in indentation. - # - # If you'd like to use curly brackets or `end` to delimit blocks instead, you'd - # simply need to modify this one rule. - # You'll also need to remove the indentation logic from the lexer. Block: INDENT Expressions DEDENT { result = val[1] } ; - - # The `def` keyword is used for defining methods. Once again, we're introducing - # a bit of syntactic sugar here to allow skipping the parentheses when there are no parameters. + Def: DEF IDENTIFIER Block { result = DefNode.new(val[1], [], val[2]) } | DEF IDENTIFIER "(" ParamList ")" Block { result = DefNode.new(val[1], val[3], val[5]) } ; @@ -195,27 +126,22 @@ ParamList: /* nothing */ { result = [] } | IDENTIFIER { result = val } | ParamList "," IDENTIFIER { result = val[0] << val[2] } ; - - # Class definition is similar to method definition. - # Class names are also constants because they start with a capital letter. + Class: CLASS CONSTANT Block { result = ClassNode.new(val[1], val[2]) } ; - - # Finally, `if` is similar to `class` but receives a *condition*. + If: IF Expression Block { result = IfNode.new(val[1], val[2]) } ; end -# The final code at the bottom of this Racc file will be put as-is in the generated `Parser` class. -# You can put some code at the top (`header`) and some inside the class (`inner`). ---- header - require "lexer" - require "nodes" + require "turmali/lexer" + require "turmali/nodes" ---- inner def parse(code, show_tokens=false) @tokens = Lexer.new.tokenize(code) # Tokenize the code using our lexer puts @tokens.inspect if show_tokens \ No newline at end of file