require 'strscan' module Nagoro class Scanner < StringScanner TEXT = /[^<>]+/m DOCTYPE = /]+)>/m TAG_START = /<([^\s>]+)/ TAG_END = /<\/([^>]*)>/ TAG_OPEN_END = /\s*>/ TAG_CLOSING_END = /\s*\/>/ TAG_PARAMETER = /\s*([^\s]*)=((['"])(.*?)\3)/um INSTRUCTION_START = /<\?(\S+)/ INSTRUCTION_END = /(.*?)(\?>)/um RUBY_INTERP_START = /\s*#\{/m RUBY_INTERP_TEXT = /[^\{\}]+/m RUBY_INTERP_NEST = /\{[^\}]*\}/m RUBY_INTERP_END = /(?=\})/ def initialize(string, callback) @callback = callback super(string) end def stream until eos? pos = self.pos run raise(Stuck, "Scanner didn't move: %p" % self) if pos == self.pos end end def run if scan(DOCTYPE ); doctype(self[1]) elsif scan(INSTRUCTION_START); instruction(self[1]) elsif scan(TAG_END ); tag_end(self[1]) elsif scan(RUBY_INTERP_START); ruby_interp(matched) elsif scan(TAG_START ); tag_start(self[1]) elsif scan(TEXT ); text(matched) end end def instruction(name) scan(INSTRUCTION_END) @callback.instruction(name, self[1]) end def ruby_interp(string) done = false until done if scan(RUBY_INTERP_TEXT) string << matched elsif scan(RUBY_INTERP_NEST) string << matched elsif scan(RUBY_INTERP_END) done = true end end @callback.text(string) end def tag_start(name) original_attrs = {} value_attrs = {} while scan(TAG_PARAMETER) original_attrs[self[1]] = self[2] # gives 'href'=>'"foo"' value_attrs[ self[1]] = self[4] # gives 'href'=>'foo' end @callback.tag_start(name, original_attrs, value_attrs) return @callback.tag_end(name) if scan(TAG_CLOSING_END) scan(TAG_OPEN_END) end def tag_end(name) @callback.tag_end(name) end def text(string) @callback.text(string) end def doctype(string) @callback.doctype(string) end class Stuck < Error; end end end