#!/usr/bin/ruby =begin legalia rubylexer - a ruby lexer written in ruby Copyright (C) 2004,2005,2008, 2011 Caleb Clausen This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end $Debug=true require "rubylexer" require "getoptlong" require "pp" class RubyLexer class Token def verify_offset(fd,lexer); false end def check_for_error; end end class LexerError'%x', '['=>'%w', '{'=>'%W', '"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'} def verify_offset(fd,lexer) fd.read(open.size)==open or return false # str=fd.read(2) # @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false verify_subtoken_offsets(fd,lexer) end def verify_subtoken_offsets(fd,lexer) #verify offsets of subtokens @elems.each{|elem| case elem when String #get string data to compare against, #translating dos newlines to unix. #(buffer mgt is a PITA) goal=elem.size saw=fd.read(goal) saw.gsub!("\r\n","\n") now_at=nil loop do now_at=saw.size saw.chomp!("\r") and fd.pos-=1 and now_at-=1 break if now_at>=goal more=fd.read([goal-now_at,2].max) more.gsub!("\r\n","\n") saw<= #{tok.offset} token #{tok.to_s.gsub("\n","\n ")}:#{tok.class}" end file.pos=tok.offset unless tok.verify_offset(file,self) @offset_failures ||= 0 @offset_failures += 1 @offset_first_failure ||= tok end case tok when RubyLexer::StringToken,RubyLexer::NumberToken, RubyLexer::HereBodyToken,RubyLexer::SymbolToken, RubyLexer::HerePlaceholderToken, RubyLexer::FileAndLineToken #do nothing else file.pos==endpos or allow_ooo or $stderr.puts "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok.to_s.gsub("\n","\n ") }" end file.pos=oldpos return end end def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout) input ||= File.open(name) if output!=$stdout output=File.open(output,'w') end input=input.read if IO===input and not File===input fd=input #File.open(name) {|fd| lxr=lexertype.new(name,fd,1) begin tok=lxr.get1token lxr.check_offset(tok) tok.check_for_error pprinter.pprint(tok,output) end until RubyLexer::EoiToken===tok if lxr.offset_failures first=lxr.offset_first_failure $stderr.puts "failed to check offset in #{lxr.offset_failures} cases. first=#{first.class}: #{first.to_s.gsub("\n","\n ")} at #{first.offset}" end #hack for SimpleTokenPrinter.... print "\n" if RubyLexer::NewlineToken===lxr.last_operative_token and RubyLexer::SimpleTokenPrinter===pprinter # unless lxr.balanced_braces? # raise "unbalanced braces at eof" # end #} output.close unless output==$stdout end #$ShowImplicit=false if __FILE__==$0 sep,line,showzw='',1,0 # lexertype= RumaLexer if defined? RumaLexer lexertype=RubyLexer insertnils=fd=name=loop=nil pprinter=RubyLexer::SimpleTokenPrinter opts=GetoptLong.new \ ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT], # ["--ruby","-r", GetoptLong::NO_ARGUMENT], ["--keepws","-k", GetoptLong::NO_ARGUMENT], ["--maxws","-m", GetoptLong::NO_ARGUMENT], ["--implicit","-i", GetoptLong::NO_ARGUMENT], ["--implicit-all", GetoptLong::NO_ARGUMENT], ["--loop", GetoptLong::NO_ARGUMENT] saweval=nil opts.each do|opt,arg| case opt when '--eval' then tokentest('-e',lexertype,pprinter.new(sep,line,showzw),arg) saweval=arg # when '--ruby' then lexertype=RubyLexer when '--keepws' then pprinter= RubyLexer::KeepWsTokenPrinter when '--maxws' then pprinter= RubyLexer::KeepWsTokenPrinter;sep=' ' when '--implicit' then showzw=1 when '--implicit-all' then showzw=2 when '--loop' then loop=true else raise :impossible end end pprinter =pprinter.new(sep,line,showzw) begin if ARGV.empty? saweval ? tokentest('-e',lexertype,pprinter,saweval) : tokentest('-',lexertype,pprinter,$stdin) else ARGV.each{|fn| tokentest(fn,lexertype,pprinter) } end # ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer #filename with _rb are special hack end while loop end