lib/rouge/lexer.rb in rouge-0.1.0.rc1 vs lib/rouge/lexer.rb in rouge-0.1.0

- old
+ new

@@ -212,123 +212,10 @@ StateDSL.new(rules).instance_eval(&@defn) self end end - class ScanState - def self.delegate(m, target) - define_method(m) do |*a, &b| - send(target).send(m, *a, &b) - end - end - - attr_accessor :scanner - attr_accessor :stack - attr_accessor :lexer - def initialize(lexer, scanner=nil, stack=nil) - @lexer = lexer - @scanner = scanner - @stack = stack || [lexer.get_state(:root)] - end - - def pop! - raise 'empty stack!' if stack.empty? - - debug { " popping stack" } - stack.pop - end - - def push(state_name=nil, &b) - # use the top of the stack by default - if state_name || b - push_state = state.relative_state(state_name, &b) - else - push_state = self.state - end - - debug { " pushing #{push_state.name}" } - stack.push(push_state) - end - - def in_state?(state_name) - stack.map(&:name).include? state_name.to_s - end - - def state?(state_name) - state_name.to_s == state.name - end - - delegate :debug, :lexer - - delegate :[], :scanner - delegate :captures, :scanner - delegate :peek, :scanner - delegate :eos?, :scanner - - def run_callback(&callback) - Enumerator.new do |y| - @output_stream = y - @group_count = 0 - instance_exec(self, &callback) - @output_stream = nil - end - end - - def token(tok, val=:__absent__) - val = scanner[0] if val == :__absent__ - val ||= '' - - raise 'no output stream' unless @output_stream - - @output_stream << [Token[tok], val] - end - - def group(tok) - token(tok, scanner[@group_count += 1]) - end - - def delegate(lexer, text=nil) - debug { " delegating to #{lexer.inspect}" } - text ||= scanner[0] - - lexer.lex(text, :continue => true) do |tok, val| - debug { " delegated token: #{tok.inspect}, #{val.inspect}" } - token(tok, val) - end - end - - def state - raise 'empty stack!' if stack.empty? - stack.last - end - - MAX_NULL_STEPS = 5 - def scan(re, &b) - @null_steps ||= 0 - - if @null_steps >= MAX_NULL_STEPS - debug { " too many scans without consuming the string!" } - return false - end - - scanner.scan(re) - - if scanner.matched? - if scanner.matched_size == 0 - @null_steps += 1 - else - @null_steps = 0 - end - - yield self - return true - end - - return false - end - end - class StateDSL attr_reader :rules def initialize(rules) @rules = rules end @@ -389,64 +276,151 @@ def get_state(name) self.class.get_state(name) end - def scan_state - @scan_state ||= ScanState.new(self) + def stack + @stack ||= [get_state(:root)] end + def state + stack.last or raise 'empty stack!' + end + def reset! @scan_state = nil self.class.start_procs.each do |pr| - scan_state.instance_eval(&pr) + instance_eval(&pr) end end def stream_tokens(stream, &b) - scan_state.scanner = stream - - until scan_state.eos? + until stream.eos? debug { "lexer: #{self.class.tag}" } - debug { "stack: #{scan_state.stack.map(&:name).inspect}" } - debug { "stream: #{scan_state.scanner.peek(20).inspect}" } - success = step(get_state(scan_state.state), scan_state, &b) + debug { "stack: #{stack.map(&:name).inspect}" } + debug { "stream: #{stream.peek(20).inspect}" } + success = step(get_state(state), stream, &b) if !success debug { " no match, yielding Error" } - b.call(Token['Error'], scan_state.scanner.getch) + b.call(Token['Error'], stream.getch) end end end - def step(state, scan_state, &b) + def step(state, stream, &b) state.rules.each do |rule| - return true if run_rule(rule, scan_state, &b) + return true if run_rule(rule, stream, &b) end false end - private - def run_rule(rule, scan_state, &b) + def run_rule(rule, stream, &b) case rule when String debug { " entering mixin #{rule}" } - res = step(get_state(rule), scan_state, &b) + res = step(get_state(rule), stream, &b) debug { " exiting mixin #{rule}" } res when Rule debug { " trying #{rule.inspect}" } - scan_state.scan(rule.re) do |match| - debug { " got #{match[0].inspect}" } + scan(stream, rule.re) do + debug { " got #{stream[0].inspect}" } - scan_state.run_callback(&rule.callback).each do |tok, res| + run_callback(stream, &rule.callback).each do |tok, res| debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" } b.call(Token[tok], res) end end end + end + + def run_callback(stream, &callback) + Enumerator.new do |y| + @output_stream = y + @group_count = 0 + @last_matches = stream + instance_exec(stream, &callback) + @last_matches = nil + @output_stream = nil + end + end + + MAX_NULL_STEPS = 5 + def scan(scanner, re, &b) + @null_steps ||= 0 + + if @null_steps >= MAX_NULL_STEPS + debug { " too many scans without consuming the string!" } + return false + end + + scanner.scan(re) + + if scanner.matched? + if scanner.matched_size == 0 + @null_steps += 1 + else + @null_steps = 0 + end + + yield self + return true + end + + return false + end + + def token(tok, val=:__absent__) + val = @last_matches[0] if val == :__absent__ + val ||= '' + + raise 'no output stream' unless @output_stream + + @output_stream << [Token[tok], val] + end + + def group(tok) + token(tok, @last_matches[@group_count += 1]) + end + + def delegate(lexer, text=nil) + debug { " delegating to #{lexer.inspect}" } + text ||= @last_matches[0] + + lexer.lex(text, :continue => true) do |tok, val| + debug { " delegated token: #{tok.inspect}, #{val.inspect}" } + token(tok, val) + end + end + + def push(state_name=nil, &b) + # use the top of the stack by default + if state_name || b + push_state = state.relative_state(state_name, &b) + else + push_state = self.state + end + + debug { " pushing #{push_state.name}" } + stack.push(push_state) + end + + def pop! + raise 'empty stack!' if stack.empty? + + debug { " popping stack" } + stack.pop + end + + def in_state?(state_name) + stack.map(&:name).include? state_name.to_s + end + + def state?(state_name) + state_name.to_s == state.name end end end