# # _easyscanner.rb - simple scanner for LangScan # # Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org> # All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # You can redistribute it and/or modify it under the terms of # the GNU General Public License version 2. # require 'langscan/_common' module LangScan class EasyScanner def initialize(pattern, types, keywords) @pattern = pattern # build regexp regexp = "(\n)|" + @pattern.map {|v| "(" + v[1] + ")"}.join("|") @regexp = Regexp.new(regexp) # build type hash @type = {} types.each {|k| @type[k] = true } # build keyword hash @keyword = {} keywords.each {|k| @keyword[k] = true } end def scan(input, &block) lineno = 0 offset = 0 while match = @regexp.match(input[offset..-1]) if match[1] # newline lineno += 1 offset += match.end(0) else for i in 2..match.size-1 if match[i] type = @pattern[i-2][0] byteno = offset + match.begin(0) if @pattern[i-2][2] # pattern with terminator start = offset + match.end(0) end_match = input[start..-1].match(@pattern[i-2][2]) if end_match offset = start + end_match.end(0) else # not terminated! what should we do? offset = start end else # simple pattern offset += match.end(0) end text = input[byteno..offset-1] if type == :ident case true when @type[text] type = :type when @keyword[text] type = :keyword end end yield(Fragment.new(type, text, lineno, byteno)) lineno += text.count("\n") break end end end end end end end