lib/rouge/regex_lexer.rb in rouge-0.4.0 vs lib/rouge/regex_lexer.rb in rouge-0.5.0

- old
+ new

@@ -163,10 +163,11 @@ # reset this lexer to its initial state. This runs all of the # start_procs. def reset! @stack = nil + @current_stream = nil self.class.start_procs.each do |pr| instance_eval(&pr) end end @@ -184,19 +185,21 @@ # # @see #step #step (where (2.) is implemented) def stream_tokens(str, &b) stream = StringScanner.new(str) + @current_stream = stream + until stream.eos? debug { "lexer: #{self.class.tag}" } debug { "stack: #{stack.map(&:name).inspect}" } debug { "stream: #{stream.peek(20).inspect}" } success = step(get_state(state), stream, &b) if !success debug { " no match, yielding Error" } - b.call(Token['Error'], stream.getch) + b.call(Token::Tokens::Error, stream.getch) end end end # Runs one step of the lex. Rules in the current state are tried @@ -229,13 +232,11 @@ # @private def run_callback(stream, callback, &output_stream) with_output_stream(output_stream) do @group_count = 0 - @last_match = stream instance_exec(stream, &callback) - @last_match = nil end end # The number of successive scans permitted without consuming # the input stream. If this is exceeded, the match fails. @@ -272,24 +273,26 @@ # the token type # @param val # (optional) the string value to yield. If absent, this defaults # to the entire last match. def token(tok, val=:__absent__) - val = @last_match[0] if val == :__absent__ - val ||= '' - - raise 'no output stream' unless @output_stream - - @output_stream << [Token[tok], val] unless val.empty? + val = @current_stream[0] if val == :__absent__ + yield_token(tok, val) end # Yield a token with the next matched group. Subsequent calls # to this method will yield subsequent groups. def group(tok) - token(tok, @last_match[@group_count += 1]) + yield_token(tok, @current_stream[@group_count += 1]) end + def groups(*tokens) + tokens.each_with_index do |tok, i| + yield_token(tok, @current_stream[i+1]) + end + end + # Delegate the lex to another lexer. The #lex method will be called # with `:continue` set to true, so that #reset! will not be called. # In this way, a single lexer can be repeatedly delegated to while # maintaining its own internal state stack. # @@ -297,11 +300,11 @@ # The lexer or lexer class to delegate to # @param [String] text # The text to delegate. This defaults to the last matched string. def delegate(lexer, text=nil) debug { " delegating to #{lexer.inspect}" } - text ||= @last_match[0] + text ||= @current_stream[0] lexer.lex(text, :continue => true) do |tok, val| debug { " delegated token: #{tok.inspect}, #{val.inspect}" } token(tok, val) end @@ -334,20 +337,29 @@ stack.pop(times) nil end + # replace the head of the stack with the given state + def goto(state_name) + raise 'empty stack!' if stack.empty? + stack[-1] = get_state(state_name) + end + # reset the stack back to `[:root]`. def reset_stack debug { ' resetting stack' } stack.clear stack.push get_state(:root) end # Check if `state_name` is in the state stack. def in_state?(state_name) - stack.map(&:name).include? state_name.to_s + state_name = state_name.to_s + stack.any? do |state| + state.name == state_name.to_s + end end # Check if `state_name` is the state on top of the state stack. def state?(state_name) state_name.to_s == state.name @@ -355,16 +367,21 @@ private def with_output_stream(output_stream, &b) old_output_stream = @output_stream @output_stream = Enumerator::Yielder.new do |tok, val| - debug { " yielding #{tok.to_s.inspect}, #{val.inspect}" } - output_stream.call(Token[tok], val) + debug { " yielding #{tok.qualname}, #{val.inspect}" } + output_stream.call(tok, val) end yield ensure @output_stream = old_output_stream + end + + def yield_token(tok, val) + return if val.nil? || val.empty? + @output_stream.yield(tok, val) end end end