lib/rus3/parser/scheme_parser.rb in rus3-0.1.2 vs lib/rus3/parser/scheme_parser.rb in rus3-0.2.0

- old
+ new

@@ -1,368 +1,657 @@ # frozen_string_literal: true -module Rus3::Parser +module Rus3 - # A simple parser to read a s-expression for Scheme. - class SchemeParser < Parser + module Parser - # Indicates the version of the parser class - PARSER_VERSION = "0.1.1" + # A simple parser to read a s-expression for Scheme. + class SchemeParser < Parser - # Constructs the version string. + # Indicates the version of the parser class + PARSER_VERSION = "0.2.0" - def version - vmsg = "(scheme-parser-version . #{PARSER_VERSION})" - vmsg += " (scheme-lexer-version . #{Lexer::LEXER_VERSION})" - super + " (#{vmsg})" - end + # Constructs the version string. - def initialize - super - end + def version + vmsg = "(scheme-parser :version #{PARSER_VERSION})" + vmsg += " #{Lexer.version}" + super + " (#{vmsg})" + end - # Set the prompt which includes "scheme" label. + def initialize + super + @lexer = nil + @curr_token = @peek_token = nil + end - def prompt=(str) - index = str.index(">") - @prompt = str[0...index] + "(scheme)" + str[index..-1] - end + # Parses a portion of or the whole program source then returns a + # AST structure. - # Parses a s-expression then translates it to a expression for Ruby. - # - # Converts a s-expression (scheme-expression or just S expression) - # to an i-expression (intermediate-expression), then translate an - # i-expression into a r-expression (ruby-expression). An - # i-expression is represented with an Array object in Ruby. A - # r-expression is a String object which could be directly - # evaluated by `Kernel.eval`. - # - # Supported S-expression type: - # - # - primitive expression (s_exp -> r_exp) - # - empty list - # - "()" -> "[]" - # - # - variable reference - # - identifier (symbol) -> "foo" - # - # - literal expression - # - boolean: #f or #t -> `false` or `true` - # - string -> "hoge", "bogo", ... - # - number -> 1.23, Rational(4, 5), Complex(6, 7), ... - # - # - procedure call (s_exp -> i_exp -> r_exp) - # - (+ (* 3 4) (/ 5 6)) ... s_exp - # -> ["add", ["mult", "3", "4"], - # ["div", "5", "6"]] ... i_exp - # -> "add(mul(3, 4), div(5, 6))" ... r_exp - # - # - ((lambda (x) (+ x x)) 4) ... s_exp - # -> [["lambda", ["x"], ["add", "x", "x"]], 4] ... i_exp - # -> "lambda { |x| add(x, x) }.call(4)" ... r_exp - # - # - procedure (s_exp -> i_exp -> r_exp) - # - (lambda (x) (+ x x)) ... s_exp - # -> ["lambda", ["x"], ["add", "x", "x"]] ... i_exp - # -> "lambda { |x| add(x, x) }" ... r_exp - # - # - conditionals (s_exp -> i_exp -> r_exp) - # - (if (= n 0) 1 (* n (- n 1))) ... s_exp - # -> ["if", ["eqv?", "n", "0"], - # "1" ["mul", "n", ["subtract", "n", "1"]]] ... i_exp - # -> "if eqv?(n, 0); - # 1; - # else; - # mul(n, subtract(n, 1)); - # end" ... r_exp - # - # - assignment (s_exp -> i_exp -> r_exp) - # - (set! x 2) ... s_exp - # -> ["set!", "x", "2"] ... i_exp - # -> "x = 2" ... r_exp - # - # - define procedure (s_exp -> i_exp -> r_exp) - # - (define (fact n) (if (= n 0) 1 (* n (fact (- n 1))))) ... s_exp - # -> ["define", ["fact", "n"], - # ["if", ["eqv?", "n", "0"], "1", - # ["mul", "n", ["fact", ["subtract", "n", "1"]]]]] ... i_exp - # -> "def fact(n); - # if n == 0; - # 1; - # else; - # n * fact((n - 1)); - # end" ... r_exp - # - # - derived expression - # - conditionals (s_exp -> i_exp -> r_exp) - # - (cond ((> 3 2) "greater") - # ((< 3 2) "less") - # (else "equal")) ... s_exp - # -> ["cond", [[["gt?", "3", "2"], "\"greater\""], - # [["lt?", "3", "2"], "\"less\""], - # ["else", "\"equal\""]]] ... i_exp - # -> "if gt?(3,2); - # 'greater'; - # elsif lt?(3,2); - # 'less'; - # else; - # 'equal'; - # end" ... r_exp - # - # - building construct (s_exp -> i_exp -> r_exp) - # - (let ((x 2) (y 3)) - # (* x y)) ... s_exp - # -> ["let", [["x", "2"], ["y", "3"]], - # ["mul", "x", "y"]] ... i_exp - # -> "lambda { |x, y| mul(x, y) }.call(2, 3)" ... r_exp - # - # - vector (s_exp -> r_exp) - # - #(1 2 #(3 4) 5) - # -> "vector(1, 2, vector(3, 4), 5)" - # - # - list (s_exp -> r_exp) - # - (1 2 3 (4 5) (6 7 8) 9 0) - # -> "[1, 2, 3, [4, 5], [6, 7, 8], 9, 0]" + def parse(program_source) + @lexer = Lexer.new(program_source) + parse_program + end - def parse(s_exp) - parse_tokens(Lexer.new(s_exp)) - end + # :stopdoc: - def parse_tokens(lexer) # :nodoc: - r_exps = [] - loop { r_exps << parse_s_exp(lexer) } - r_exps.join("\n") - end + private - def parse_s_exp(lexer) - r_exp = nil - token = lexer.next - case token.type - when :lparen - i_exp = parse_compound(lexer) - r_exp = translate(i_exp) - when :vec_lparen - r_exp = parse_vector(lexer) - else - r_exp = parse_primitive(token) + def next_token + if @peek_token + @curr_token = @peek_token + @peek_token = nil + else + @curr_token = @lexer.next + end + @curr_token end - r_exp - end - def parse_primitive(token) - r_exp = nil - case token.type - when *Lexer::KEYWORDS.values - r_exp = translate_ident(token.literal) - when :string - r_exp = token.literal - when :ident, :boolean, :char, :number, :op_proc - trans_method_name = "translate_#{token.type}".intern - r_exp = self.send(trans_method_name, token.literal) - else - raise Rus3::SchemeSyntaxError, token.literal + def peek_token + if @peek_token.nil? + @peek_token = @lexer.next + end + @peek_token end - r_exp - end - def parse_vector(lexer) - r_exp = "vector(" - Kernel.loop { - token = lexer.peek - case token.type - when :lparen - lexer.next - i_exp = parse_compound(lexer) - r_exp += translate(i_exp) - when :vec_lparen - lexer.next - r_exp += parse_vector(lexer) - when :rparen - lexer.next - r_exp = r_exp.sub(/,\s*\Z/, "") + ")" - break + def current_token + @curr_token + end + + def push_back_token(token) + @peek_token = token + end + + # <program> -> <expression>* + def parse_program + program = Rus3::AST.instantiate(:program) + Kernel.loop { + node = parse_expression + program << node + } + program + end + + TOKEN_START_DELIMITERS = [ # :nodoc: + :lparen, # list: ( ... ) + :vec_lparen, # vector: #( ... ) + :bytevec_lparen, # bytevector: #u8( ... ) + :quotation, # quotation: '<something> + :backquote, # quasiquote: `<something> + :comma, # used in quasiquote + :comma_at, # used in quasiquote + :comment_lparen, # comment start + ] + + def start_delimiter?(token) + TOKEN_START_DELIMITERS.include?(token.type) + end + + # <expression> -> <simple expression> | <compound expression> + # <compound expression> -> <start delimiter> <expression>* <end delimiter> + # <start delimiter> -> <lparen> | <vec lparen> | <bytevec lparen> | + # <quotation> | + # <backquote> | <comma> | <comma at> | + # <comment lparen> + # <end delimier> -> <rparen> | <comment_rparen> + def parse_expression + if start_delimiter?(peek_token) + parse_compound_expression else - r_exp += parse_s_exp(lexer) + parse_simple_expression end - r_exp += ", " - } - r_exp - end + end - def parse_compound(lexer) - i_exp = [] - Kernel.loop { - token = lexer.next + # <simple expression> -> <identifier> | <self evaluating> + # <self evaluating> -> <boolean> | <number> | <character> | <string> + def parse_simple_expression + type, literal = *next_token + type = :peculiar_identifier if type == :op_proc + Rus3::AST.instantiate(type, literal) + end + + def parse_identifier + Rus3::AST.instantiate(:identifier, next_token.literal) + end + + # <compound expression> -> <quotation> | + # <vector> | + # <list expression> + def parse_compound_expression + node = nil + token = peek_token case token.type + when :vec_lparen + node = parse_vector when :lparen - i_exp << parse_compound(lexer) - when :rparen - break + node = parse_list_expression + when :quotation + node = parse_quotation else - i_exp << parse_primitive(token) + raise Rus3::SchemeSynaxError, token.to_a end - } - i_exp - end + node + end - def translate_ident(s_exp_literal) - "#{s_exp_literal}" - end + # <list expression> -> + # <procedure call> | + # <lambda expression> | + # <conditional> | + # <assignment> | + # <macro use> | + # <macro block> | + # <definition> | + # <derived expression> | + # <includer> + def parse_list_expression + node = nil + next_token # read :lparen + case peek_token.type + when :rparen + # an empty list + node = Rus3::AST.instantiate(:list) + next_token # skip :rparen + when :identifier + case peek_token.literal + when "lambda" # lambda expression + node = parse_lambda_expression + when "if" # conditional + node = parse_conditional + when "set!" # assignment + node = parse_assignment + when "let-syntax", "letrec-syntax" # macro block + node = parse_macro_block + when "define", "define-syntax", "define-values", "define-record-type", "begin" + node = parse_definition + else + node = parse_derived_expression + node = parse_macro_use if node.nil? + end + end + node || parse_procedure_call + end - def translate_boolean(s_exp_literal) - # literal == "#f" or #t" - (s_exp_literal[1] == "f") ? "false" : "true" - end + def parse_macro_use + nil + end - def translate_char(s_exp_literal) - c = s_exp_literal[2..-1] - case c - when "space" - c = " " - when "newline" - c = "\n" + # <procedure call> -> ( <operator> <operand>* ) + def parse_procedure_call + proc_call_node = Rus3::AST.instantiate(:procedure_call, + current_token.literal) + proc_call_node.operator = parse_operator + + Kernel.loop { + if peek_token.type == :rparen + next_token # skip :rparen + break + end + proc_call_node.add_operand(parse_operand) + } + proc_call_node end - "Char.new(\"#{c}\")" - end - def translate_number(s_exp_literal) - if s_exp_literal.include?("/") # rational? - denominator, numerator = s_exp_literal.split("/").map{|s| Kernel.eval(s)} - "Rational(#{denominator}, #{numerator})" - else - Kernel.eval(s_exp_literal).to_s + # <operator> -> <expression> + def parse_operator + parse_expression end - end - OP_PROCS = { - "+" => "add", - "-" => "subtract", - "*" => "mul", - "/" => "div", - "%" => "mod", - "<" => "lt?", - "<=" => "le?", - ">" => "gt?", - ">=" => "ge?", - "==" => "eqv?", - } + # <operand> -> <expression> + def parse_operand + parse_expression + end - def translate_op_proc(s_exp_literal) - OP_PROCS[s_exp_literal] - end + # <lambda expression> -> ( lambda <formals> <body> ) + # <sequence> -> <command>* <expression> + # <command> -> <expression> + # <definition> ... see parse_definition + def parse_lambda_expression + lambda_node = Rus3::AST.instantiate(:lambda_expression, + next_token.literal) + lambda_node.formals = parse_formals + lambda_node.body = read_body + next_token # skip :rparen + lambda_node + end - def translate(i_exp) - r_exp = nil + # <formals> -> ( <identifier>* ) | <identifier> | + # ( <identifier>+ . <identifier> ) + def parse_formals + token = next_token + formals = nil + if token.type == :lparen + formals = Rus3::AST.instantiate(:list) + Kernel.loop { + if peek_token.type == :rparen + next_token + break + end + formals << Rus3::AST.instantiate(:identifier, next_token.literal) + } + else + formals = Rus3::AST.instantiate(:identifier, token.literal) + end + formals + end - if i_exp.instance_of?(Array) - return "[]" if i_exp.empty? # an empty list + # <body> -> <definition>* <sequence> + def read_body + body = [] + Kernel.loop { + break if peek_token.type == :rparen # end of <lambda expression> + body << parse_expression + } + body + end - case i_exp[0] - when "lambda", "if", "set!", "define", "cond", "let" - keyword = i_exp[0] - trans_method_name = "translate_#{keyword}".intern - r_exp = self.send(trans_method_name, i_exp) - else # procedure call - r_exp = translate_proc_call(i_exp) + # <conditional> -> ( if <test> <consequent> <alternamte> ) + def parse_conditional + if_node = Rus3::AST.instantiate(:conditional, next_token.literal) + if_node.test = parse_test + if_node.consequent = parse_consequent + if peek_token.type != :rparen + if_node.alternate = parse_alternate end - else - r_exp = i_exp + next_token # skip :rparen + if_node end - r_exp - end - def translate_proc_call(i_exp) - proc = i_exp[0] + # <test> -> <expression> + def parse_test + parse_expression + end - if proc.instance_of?(Array) - raise Rus3::SchemeSyntaxError, i_exp if i_exp[0][0] != "lambda" - lambda_proc = translate_lambda(proc) - proc = "#{lambda_proc}.call" + # <consequent> -> <expression> + def parse_consequent + parse_expression end - args = i_exp[1..-1].map {|e| translate(e) } + # <alternate> -> <expression> | <empty> + # <empty> -> "" + def parse_alternate + parse_expression + end - "#{proc}(#{args.join(', ')})" - end + # <assignment> -> ( set! <identifier> <expression> ) + def parse_assignment + assignment_node = Rus3::AST.instantiate(:assignment, next_token.literal) + assignment_node.identifier = parse_identifier + assignment_node.expression = parse_expression + next_token # skip :rparen + assignment_node + end - def translate_lambda(i_exp) - formals = i_exp[1] - body = i_exp[2..-1] + def parse_macro_block + nil + end - if body.instance_of?(Array) - body = translate_body(body) + # <definition> -> ( define <identifier> <expression> ) | + # ( define ( <identifier> <def formals> ) <body> ) | + # <syntax definition> | + # ( define-values <formals> <body> ) | + # ( define-record-type <identifier> + # <constructor> <identifier> <field spec>* ) | + # ( begin <definition>* ) + # + # <def formals> -> <identifier>* | + # <identifier>* . <identifier> + # <constructor> -> ( <identifier> <field name>* ) + # <field spec> -> ( <field name> <accessor> ) | + # ( <field name> <accessor> <mutator> ) + # <field name> -> <identifier> + # <accessor> -> <identifier> + # <mutator> -> <identifier> + # <syntax definition> -> ( define-syntax <keyword> <transformer spec> ) + # <transformer spec> -> ... + # : + def parse_definition + case peek_token.literal + when "define" + parse_identifier_definition + when "define-syntax" + nil # not implemented yet + when "define-values" + nil # not implemented yet + when "define-record-type" + nil # not implemented yet + when "begin" + nil # not implemented yet + else + raise Rus3::SchemeSynaxError, token.to_a + end end - "lambda {|#{formals.join(', ')}| #{body}}" - end + # ( define <identifier> <expression> ) + # ( define ( <identifier> <def formals> ) <body> ) + # + # ( define foo 3 ) + # ( define bar ( lambda ( x y ) ( + x y ))) + # ( deifne ( hoge n m ) ( * n m )) - def translate_if(i_exp) - test = translate(i_exp[1]) - consequent = translate(i_exp[2]) - alternate = translate(i_exp[3]) + def parse_identifier_definition + token = next_token # define + define_node = Rus3::AST.instantiate(:identifier_definition, token.literal) + case peek_token.type + when :lparen + next_token # skip :lparen + # procedure name + define_node.identifier = parse_identifier - if_exp = "if #{test}; #{consequent}" - if_exp += "; else; #{alternate}" if alternate - if_exp += "; end" + def_formals = Rus3::AST.instantiate(:list) + Kernel.loop { + if peek_token.type == :rparen + next_token + break + end + def_formals << parse_identifier + } - if_exp - end + lambda_node = Rus3::AST.instantiate(:lambda_expression, nil) + lambda_node.formals = def_formals + lambda_node.body = read_body + next_token # skip :rparen - def translate_set!(i_exp) - ident = i_exp[1] - value = translate(i_exp[2]) - "#{ident} = #{value}" - end + define_node.expression = lambda_node + when :identifier + define_node.identifier = parse_identifier + define_node.expression = parse_expression + next_token + else + raise Rus3::SchemeSynaxError, current_token.to_a + end + define_node + end - def translate_define(i_exp) - if i_exp[1].instance_of?(Array) - name = i_exp[1][0] - params = i_exp[1][1..-1] - body = translate_body(i_exp[2..-1]) + # <quotation> -> '<datum> | ( quote <datum> ) + def parse_quotation + token = next_token + quote_node = Rus3::AST.instantiate(:quotation, token.literal) + quote_node << parse_datum + quote_node + end - "def #{name}(#{params.join(', ')}); #{body}; end" - else - ident = i_exp[1] - value = translate(i_exp[2]) - "#{ident} = #{value}" + # <vetor> -> #( <datum>* ) + def parse_vector + parse_data_to_rparen end - end - def translate_cond(i_exp) - test = translate(i_exp[1][0]) - exp = translate(i_exp[1][1]) - r_exp = "if #{test}; #{exp}" + def parse_data_to_rparen + token = next_token + ast_type = nil + case token.type + when :lparen + ast_type = :list + when :vec_lparen + ast_type = :vector + else + ast_type = :list + end + node = Rus3::AST.instantiate(ast_type, nil) + raise Rus3::SchemeSyntaxError, token.to_a unless node.branch? + Kernel.loop { + token = peek_token + break if token.type == :rparen + node << parse_datum + } + next_token # skip :rparen + node + end - i_exp[2..-1].each { |clause| - exp = translate(clause[1]) - if clause[0] == "else" - r_exp += "; else; #{exp}" + # <datum> -> <simple datum> | <compound datum> + def parse_datum + if start_delimiter?(peek_token) + parse_compound_datum else - test = translate(clause[0]) - r_exp += "; elsif #{test}; #{exp}" + parse_simple_datum end - } - r_exp += "; end" + end - r_exp - end + # <simple datum> -> <boolean> | <number> | <character> | + # <string> | <symbol> + # <symbol> -> <identifier> + # + # See `parse_simple_expression`. + def parse_simple_datum + parse_simple_expression + end - def translate_let(i_exp) - bindings = i_exp[1].to_h - body = translate(i_exp[2]) + # <compound datum> -> <list> | <vector> | <bytevector> | <abbreviation> + # <abbreviation> -> <abbrev prefix> <datum> + def parse_compound_datum + case peek_token.type + when :lparen + parse_list + when :vec_lparen + parse_vector + else + raise Rus3::SchemeSyntaxError, peek_token.to_a + end + end - params = bindings.keys.join(", ") - args = bindings.values.map{|e| translate(e)}.join(", ") + # <list> -> ( <datum>* ) | ( <datum>+ . <datum> ) + def parse_list + parse_data_to_rparen + end - "lambda {|#{params}| #{body}}.call(#{args})" - end + DERIVED_IDENTIFIERS = [ + "cond", "case", "and", "or", "when", "unless", + "let", "let*", "letrec", "letrec*", + "let-values", "let*-values", + "begin", "do", + "delay", "delay-force", + "parameterize", + "guard", + "case-lambda", + ] - def translate_body(i_exps) - r_exps = [] - i_exps.map { |i_exp| - r_exps << translate(i_exp) + # <derived expression> -> + # ( cond <cond clause>+ ) | + # ( cond <cond cluase>* ( else <sequence> ) ) | + # ( case <expression> <case caluse>+ ) | + # ( case <expression> <case caluse>* ( else <sequence> ) ) | + # ( case <expression> <case caluse>* ( else => <recipient> ) ) | + # ( and <test>* ) | + # ( or <test>* ) | + # ( when <test> <sequence> ) | + # ( unless <test> <sequence> ) | + # ( let ( <binding spec>* ) <body> ) | + # ( let <identifier> ( <binding spec>* ) <body> ) | + # ( let* ( <binding spec>* ) <body> ) | + # ( letrec ( <binding spec>* ) <body> ) | + # ( letrec* ( <binding spec>* ) <body> ) | + # ( let-values ( <my binding spec>* ) <body> ) | + # ( let*-values ( <my binding spec>* ) <body> ) | + # ( begin <sequence> ) | + # ( do ( <iteration spec>* ) ( <test> <do result> ) <command>* ) | + # ( delay <expression> ) | + # ( delay-force <expression> ) | + # ( parameterize ( ( <expression> <expression> )* ) <body> ) | + # ( guard ( <identifier> <cond clause>* ) <body> ) | + # ( case-lambda <case-lambda clause>* ) | + # <quasiquotation> + def parse_derived_expression + node = nil + token = next_token + if token.type == :identifier + if DERIVED_IDENTIFIERS.include?(token.literal) + method_name = compose_method_name("parse_", token.literal).intern + method = self.method(method_name) + node = method.call + else + node = parse_quasiquotation + end + end + push_back_token(token) if node.nil? + node + end + + SCM_CHAR_TO_RB_MAP = { + "*" => "_star", + "-" => "_", } - r_exps.join(";") - end + def compose_method_name(prefix, type_name) + converted_name = type_name.gsub(/[*\-]/, SCM_CHAR_TO_RB_MAP) + prefix + converted_name + end + + def not_implemented_yet(feature) + raise Rus3::NotImplementedYetError, feature + end + + # ( cond <cond clause>+ ) + # ( cond <cond cluase>* ( else <sequence> ) ) + def parse_cond + cond_node = Rus3::AST.instantiate(:cond, current_token.literal) + + Kernel.loop { + if peek_token.type == :rparen + next_token # skip :rparen + break + end + cond_node.add_clause(parse_cond_clause) + } + cond_node + end + + # <cond cluase> -> ( <test> <sequence> ) | + # ( <test> ) | + # ( <test> => <recipient> ) + # <test> -> <expression> + # <recipient> -> <expression> + # <sequence> -> <command>* <expression> + # <command> -> <expression> + def parse_cond_clause + clause_node = Rus3::AST.instantiate(:cond_clause) + next_token # skip :lparen + + clause_node.test = parse_test + + Kernel.loop { + if peek_token.type == :rparen + next_token + break + end + clause_node.add_expression(parse_expression) + } + clause_node + end + + def prase_test + parse_expression + end + + def parse_case + not_implemented_yet("case") + end + + def parse_and + not_implemented_yet("and") + end + + def parse_or + not_implemented_yet("or") + end + + def parse_when + not_implemented_yet("when") + end + + def parse_unless + not_implemented_yet("unless") + end + + # ( let ( <binding spec>* ) <body> ) | + # <bind spec> -> ( <identifier> <expression> ) + # + # `Named let` has not supported yet. + # ( let <identifier> ( <binding spec>* ) <body> ) + def parse_let + let_node = Rus3::AST.instantiate(:let, current_token.literal) + + let_node.bind_specs = parse_bind_specs + let_node.body = read_body + next_token # skip :rparen + + let_node + end + + def parse_bind_specs + specs_node = Rus3::AST.instantiate(:list) + next_token # skip :lparen + + Kernel.loop { + if peek_token.type == :rparen + next_token # skip :rparen + break + end + specs_node << parse_bind_spec + } + specs_node + end + + def parse_bind_spec + spec_node = Rus3::AST::instantiate(:bind_spec) + next_token # skip :lpraren + spec_node.identifier = parse_identifier + spec_node.expression = parse_expression + next_token # skip :rparen + spec_node + end + + def parse_let_star + not_implemented_yet("let*") + end + + def parse_letrec + not_implemented_yet("letrec") + end + + def parse_letrec_star + not_implemented_yet("letrec*") + end + + def parse_let_values + not_implemented_yet("let-values") + end + + def parse_let_star_values + not_implemented_yet("let*-values") + end + + def parse_begin + not_implemented_yet("begin") + end + + def parse_do + not_implemented_yet("do") + end + + def parse_delay + not_implemented_yet("delay") + end + + def parse_delay_force + not_implemented_yet("delay-force") + end + + def parse_parameterize + not_implemented_yet("parameterize") + end + + def parse_guard + not_implemented_yet("guard") + end + + def parse_case_lambda + not_implemented_yet("case-lambda") + end + + def parse_quasiquotation + nil + end + + # :startdoc: + + end end end