# -*- encoding : utf-8 -*- require 'bychar' module Tracksperanto::ShakeGrammar class WrongInputError < RuntimeError; end # Since Shake uses a C-like language for it's scripts we rig up a very sloppy # but concise C-like lexer to cope class Lexer # Parsed stack attr_reader :stack # Access to the sentinel object attr_reader :sentinel STOP_TOKEN = :__stop #:nodoc: MAX_BUFFER_SIZE = 32000 MAX_STACK_DEPTH = 127 # The first argument is the IO handle to the data of the Shake script. # The second argument is a "sentinel" that is going to be passed # to the downstream lexers instantiated for nested data structures. # You can use the sentinel to collect data from child nodes for example. def initialize(with_io, sentinel = nil, limit_to_one_stmt = false, stack_depth = 0) # We parse byte by byte, but reading byte by byte is very slow. We therefore use a buffering reader # that will cache in chunks, and then read from there byte by byte. # This yields a substantial speedup (4.9 seconds for the test # as opposed to 7.9 without this). We do check for the proper class only once so that when we use nested lexers # we only wrap the passed IO once, and only if necessary. with_io = Bychar.wrap(with_io) unless with_io.respond_to?(:read_one_char) @io, @stack, @buf, @sentinel, @limit_to_one_stmt, @stack_depth = with_io, [], '', sentinel, limit_to_one_stmt, stack_depth catch(STOP_TOKEN) do loop { parse } end @in_comment ? consume_comment! : consume_atom! end private def push_comment push [:comment, @buf.gsub(/(\s+?)\/\/{1}/, '')] end def consume_comment! push_comment erase_buffer end def parse if @buf.length > MAX_BUFFER_SIZE # Wrong format and the buffer is filled up, bail raise WrongInputError, "Atom buffer overflow at #{MAX_BUFFER_SIZE} bytes, this is definitely not a Shake script" end if @stack_depth > MAX_STACK_DEPTH # Wrong format - parentheses overload raise WrongInputError, "Stack overflow at level #{MAX_STACK_DEPTH}, this is probably a LISP program uploaded by accident" end c = @io.read_one_char throw :__stop if c.nil? # IO has run out if c == '/' && (@buf[-1].chr rescue nil) == '/' # Comment start # If some other data from this line has been accumulated we first consume that @buf = @buf[0..-2] # everything except the opening slash of the comment consume_atom! erase_buffer @in_comment = true elsif @in_comment && c == "\n" # Comment end consume_comment! @in_comment = false elsif @in_comment @buf << c elsif !@buf.empty? && (c == "(") # Funcall push([:funcall, @buf.strip] + self.class.new(@io, @sentinel, limit_to_one_stmt = false, @stack_depth + 1).stack) erase_buffer elsif c == '{' # OFX curly braces or a subexpression in a node's knob # Discard subexpr substack = self.class.new(@io, @sentinel, limit_to_one_stmt = true, @stack_depth + 1).stack push(:expr) elsif c == "[" # Array, booring push([:arr, self.class.new(@io).stack]) elsif c == "}"# && @limit_to_one_stmt throw STOP_TOKEN elsif (c == "]" || c == ")" || c == ";" && @limit_to_one_stmt) # Bailing out of a subexpression consume_atom! throw STOP_TOKEN elsif (c == "," && @limit_to_one_stmt) consume_atom! throw STOP_TOKEN elsif (c == ",") consume_atom! elsif (c == "@") consume_atom! @buf << c elsif (c == ";" || c == "\n") # Skip these - the subexpression already is expanded anyway elsif (c == "=") vardef_atom = vardef(@buf.strip) push [:assign, vardef_atom, self.class.new(@io, @sentinel, limit_to_one_stmt = true, @stack_depth + 1).stack.shift] erase_buffer else @buf << c end end INT_ATOM = /^(\d+)$/ FLOAT_ATOM = /^([\-\d\.]+)$/ STR_ATOM = /^\"/ AT_FRAME = /^@(-?\d+)/ # Grab the minimum atomic value def consume_atom! at = @buf.strip erase_buffer return if at.empty? the_atom = case at when INT_ATOM at.to_i when STR_ATOM unquote_s(at) when FLOAT_ATOM at.to_f when AT_FRAME if $1.include?(".") [:value_at, $1.to_f, @stack.pop] else [:value_at, $1.to_i, @stack.pop] end else [:atom, at] end push(the_atom) end def unquote_s(string) string.strip.gsub(/^\"/, '').gsub(/\"$/, '').gsub(/\\\"/, '"') end # In the default impl. this just puts things on the stack. However, # if you want to unwrap structures as they come along (whych you do for big files) # you have to override this def push(atom_array) @stack << atom_array end def vardef(var_specifier) # Since we can have two-word pointers as typedefs (char *) we only use the last # part of the thing as varname. Nodes return the :image type implicitly. varname_re = /\w+$/ varname = var_specifier.scan(varname_re).flatten.join typedef = var_specifier.gsub(varname_re, '').strip typedef = :image if typedef.empty? [:vardef, typedef, varname] end def erase_buffer @buf = '' end end end