module Rouge module Lexers class Ruby < RegexLexer desc "The Ruby programming language (ruby-lang.org)" tag 'ruby' aliases 'rb' filenames '*.rb', '*.ruby', '*.rbw', '*.rake', '*.gemspec', 'Rakefile', 'Guardfile', 'Gemfile', 'Capfile', 'Vagrantfile', '*.ru', '*.prawn' mimetypes 'text/x-ruby', 'application/x-ruby' def self.analyze_text(text) return 1 if text.shebang? 'ruby' end state :strings do # symbols rule %r( : # initial : @{0,2} # optional ivar, for :@foo and :@@foo [a-z_]\w*[!?]? # the symbol )xi, 'Literal.String.Symbol' # special symbols rule %r(:(?:\*\*|[-+]@|[/\%&\|^`~]|\[\]=?|<<|>>|<=?>|<=?|===?)), 'Literal.String.Symbol' rule /:'(\\\\|\\'|[^'])*'/, 'Literal.String.Symbol' rule /\b[a-z_]\w*?:\s+/, 'Literal.String.Symbol' rule /'(\\\\|\\'|[^'])*'/, 'Literal.String.Single' rule /:"/, 'Literal.String.Symbol', :simple_sym rule /"/, 'Literal.String.Double', :simple_string rule /(?<!\.)`/, 'Literal.String.Backtick', :simple_backtick # %-style delimiters # %(abc), %[abc], %<abc>, %.abc., %r.abc., etc delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' } rule /%([rqswQWxiI])?([^\w\s])/ do |m| open = Regexp.escape(m[2]) close = Regexp.escape(delimiter_map[m[2]] || m[2]) interp = /[rQWxI]/ === m[1] toktype = 'Literal.String.Other' debug { " open: #{open.inspect}" } debug { " close: #{close.inspect}" } # regexes if m[1] == 'r' toktype = 'Literal.String.Regex' push :regex_flags end token toktype push do rule /\\[##{open}#{close}\\]/, 'Literal.String.Escape' # nesting rules only with asymmetric delimiters if open != close rule /#{open}/ do token toktype push end end rule /#{close}/, toktype, :pop! if interp mixin :string_intp_escaped rule /#/, toktype else rule /[\\#]/, toktype end rule /[^##{open}#{close}\\]+/m, toktype end end end state :regex_flags do rule /[mixounse]*/, 'Literal.String.Regex', :pop! end # double-quoted string and symbol [[:string, 'Literal.String.Double', '"'], [:sym, 'Literal.String.Symbol', '"'], [:backtick, 'Literal.String.Backtick', '`']].each do |name, tok, fin| state :"simple_#{name}" do mixin :string_intp_escaped rule /[^\\#{fin}#]+/m, tok rule /[\\#]/, tok rule /#{fin}/, tok, :pop! end end keywords = %w( BEGIN END alias begin break case defined\? do else elsif end ensure for if in next redo rescue raise retry return super then undef unless until when while yield ) keywords_pseudo = %w( initialize new loop include extend raise attr_reader attr_writer attr_accessor attr catch throw private module_function public protected true false nil __FILE__ __LINE__ ) builtins_g = %w( Array Float Integer String __id__ __send__ abort ancestors at_exit autoload binding callcc caller catch chomp chop class_eval class_variables clone const_defined\? const_get const_missing const_set constants display dup eval exec exit extend fail fork format freeze getc gets global_variables gsub hash id included_modules inspect instance_eval instance_method instance_methods instance_variable_get instance_variable_set instance_variables lambda load local_variables loop method method_missing methods module_eval name object_id open p print printf private_class_method private_instance_methods private_methods proc protected_instance_methods protected_methods public_class_method public_instance_methods public_methods putc puts raise rand readline readlines require scan select self send set_trace_func singleton_methods sleep split sprintf srand sub syscall system taint test throw to_a to_s trace_var trap untaint untrace_var warn ) builtins_q = %w( autoload block_given const_defined eql equal frozen include instance_of is_a iterator kind_of method_defined nil private_method_defined protected_method_defined public_method_defined respond_to tainted ) builtins_b = %w(chomp chop exit gsub sub) start do push :expr_start @heredoc_queue = [] end state :root do rule /\n\s*/m, 'Text', :expr_start rule /\s+/, 'Text' # NB: NOT /m rule /#.*$/, 'Comment.Single' rule %r(=begin\b.*?end\b)m, 'Comment.Multiline' rule /(?:#{keywords.join('|')})\b/, 'Keyword', :expr_start rule /(?:#{keywords_pseudo.join('|')})\b/, 'Keyword.Pseudo', :expr_start rule %r( (module) (\s+) ([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*) )x do group 'Keyword' group 'Text' group 'Name.Namespace' end rule /def\s+/, 'Keyword', :funcname rule /class\s+/, 'Keyword', :classname rule /(?:#{builtins_q.join('|')})\?/, 'Name.Builtin', :expr_start rule /(?:#{builtins_b.join('|')})!/, 'Name.Builtin', :expr_start rule /(?<!\.)(?:#{builtins_g.join('|')})\b/, 'Name.Builtin', :method_call rule /__END__/, 'Comment.Preproc', :end_part rule /0_?[0-7]+(?:_[0-7]+)*/, 'Literal.Number.Oct' rule /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/, 'Literal.Number.Hex' rule /0b[01]+(?:_[01]+)*/, 'Literal.Number.Bin' rule /[\d]+(?:_\d+)*/, 'Literal.Number.Integer' # names rule /@@[a-z_]\w*/i, 'Name.Variable.Class' rule /@[a-z_]\w*/i, 'Name.Variable.Instance' rule /\$\w+/, 'Name.Variable.Global' rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), 'Name.Variable.Global' rule /\$-[0adFiIlpvw]/, 'Name.Variable.Global' rule /::/, 'Operator' mixin :strings # char operator. ?x evaulates to "x", unless there's a digit # beforehand like x>=0?n[x]:"" rule %r( \?(\\[MC]-)* # modifiers (\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S) (?!\w) )x, 'Literal.String.Char' mixin :has_heredocs rule /[A-Z][a-zA-Z0-9_]+/, 'Name.Constant', :method_call rule /(\.|::)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/, 'Name.Function', :expr_start rule /[a-zA-Z_]\w*[?!]/, 'Name', :expr_start rule /[a-zA-Z_]\w*/, 'Name', :method_call rule /\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\.{1,3}/, 'Operator', :expr_start rule /[-+\/*%=<>&!^|~]=?/, 'Operator', :expr_start rule %r<[({,?:\\;/]>, 'Punctuation', :expr_start rule %r<[)}]>, 'Punctuation' end state :has_heredocs do rule /(?<!\w)(<<-?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m| token 'Operator', m[1] token 'Name.Constant', "#{m[2]}#{m[3]}#{m[4]}" @heredoc_queue << [m[1] == '<<-', m[3]] push :heredoc_queue unless state? :heredoc_queue end rule /(<<-?)(["'])(\2)/ do |m| token 'Operator', m[1] token 'Name.Constant', "#{m[2]}#{m[3]}#{m[4]}" @heredoc_queue << [m[1] == '<<-', ''] push :heredoc_queue unless state? :heredoc_queue end end state :heredoc_queue do rule /(?=\n)/ do pop!; push :resolve_heredocs end mixin :root end state :resolve_heredocs do mixin :string_intp_escaped rule /(\n)([^#\\\n]*)$/ do |m| tolerant, heredoc_name = @heredoc_queue.first check = tolerant ? m[2].strip : m[2].rstrip group 'Literal.String.Heredoc' # check if we found the end of the heredoc if check == heredoc_name group 'Name.Constant' @heredoc_queue.shift # if there's no more, we're done looking. pop! if @heredoc_queue.empty? else group 'Literal.String.Heredoc' end end rule /[#\\\n]/, 'Literal.String.Heredoc' rule /[^#\\\n]+/, 'Literal.String.Heredoc' end state :funcname do rule /\s+/, 'Text' rule /\(/, 'Punctuation', :defexpr rule %r( (?:([a-zA-Z_][\w_]*)(\.))? ( [a-zA-Z_][\w_]*[!?]? | \*\*? | [-+]@? | [/%&\|^`~] | \[\]=? | << | >> | <=?> | >=? | ===? ) )x do |m| debug { "matches: #{[m[0], m[1], m[2], m[3]].inspect}" } group 'Name.Class' group 'Operator' group 'Name.Function' pop! end rule(//) { pop! } end state :classname do rule /\s+/, 'Text' rule /\(/, 'Punctuation', :defexpr # class << expr rule /<</, 'Operator', :pop! rule /[A-Z_]\w*/, 'Name.Class' rule(//) { pop! } end state :defexpr do rule /(\))(\.|::)?/ do group 'Punctuation' group 'Operator' pop! end rule /\(/, 'Operator', :defexpr mixin :root end state :in_interp do rule /}/, 'Literal.String.Interpol', :pop! mixin :root end state :string_intp do rule /\#{/, 'Literal.String.Interpol', :in_interp rule /#(@@?|\$)[a-z_]\w*/i, 'Literal.String.Interpol' end state :string_intp_escaped do mixin :string_intp rule /\\([\\abefnrstv#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})/, 'Literal.String.Escape' rule /\\./, 'Literal.String.Escape' end state :method_call do rule %r((\s+)(/)(?=\S|\s*/)) do group 'Text' group 'Literal.String.Regex' pop! push :slash_regex end rule(%r((?=\s*/))) { pop! } rule(//) { pop!; push :expr_start } end state :expr_start do rule %r((\s*)(/)) do group 'Text' group 'Literal.String.Regex' pop! push :slash_regex end # special case for using a single space. Ruby demands that # these be in a single line, otherwise it would make no sense. rule /(\s*)(%[rqswQWxiI]? \S* )/ do group 'Text' group 'Literal.String.Other' pop! end rule(//) { pop! } end state :slash_regex do mixin :string_intp rule %r(\\\\), 'Literal.String.Regex' rule %r(\\/), 'Literal.String.Regex' rule %r([\\#]), 'Literal.String.Regex' rule %r([^\\/#]+)m, 'Literal.String.Regex' rule %r(/) do token 'Literal.String.Regex' pop!; push :regex_flags end end state :end_part do # eat up the rest of the stream as Comment.Preproc rule /.+/m, 'Comment.Preproc', :pop! end end end end