module CodeRay module Scanners module Ruby::Patterns # :nodoc: RESERVED_WORDS = %w[ and def end in or unless begin defined? ensure module redo super until BEGIN break do next rescue then when END case else for retry while alias class elsif if not return undef yield ] DEF_KEYWORDS = %w[ def ] UNDEF_KEYWORDS = %w[ undef ] ALIAS_KEYWORDS = %w[ alias ] MODULE_KEYWORDS = %w[class module] DEF_NEW_STATE = WordList.new(:initial). add(DEF_KEYWORDS, :def_expected). add(UNDEF_KEYWORDS, :undef_expected). add(ALIAS_KEYWORDS, :alias_expected). add(MODULE_KEYWORDS, :module_expected) PREDEFINED_CONSTANTS = %w[ nil true false self DATA ARGV ARGF __FILE__ __LINE__ ] IDENT_KIND = WordList.new(:ident). add(RESERVED_WORDS, :reserved). add(PREDEFINED_CONSTANTS, :pre_constant) IDENT = /[a-z_][\w_]*/i METHOD_NAME = / #{IDENT} [?!]? /ox METHOD_NAME_OPERATOR = / \*\*? # multiplication and power | [-+~]@? # plus, minus, tilde with and without @ | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system` | \[\]=? # array getter and setter | << | >> # append or shift left, shift right | <=?>? | >=? # comparison, rocket operator | ===? | =~ # simple equality, case equality, match | ![~=@]? # negation with and without @, not-equal and not-match /ox METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox INSTANCE_VARIABLE = / @ #{IDENT} /ox CLASS_VARIABLE = / @@ #{IDENT} /ox OBJECT_VARIABLE = / @@? #{IDENT} /ox GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox QUOTE_TO_TYPE = { '`' => :shell, '/'=> :regexp, } QUOTE_TO_TYPE.default = :string REGEXP_MODIFIERS = /[mixounse]*/ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ DECIMAL = /\d+(?:_\d+)*/ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ BINARY = /0b[01]+(?:_[01]+)*/ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox SYMBOL = / : (?: #{METHOD_NAME_EX} | #{PREFIX_VARIABLE} | ['"] ) /ox METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox SIMPLE_ESCAPE = / [abefnrstv] | [0-7]{1,3} | x[0-9A-Fa-f]{1,2} | .? /mx CONTROL_META_ESCAPE = / (?: M-|C-|c ) (?: \\ (?: M-|C-|c ) )* (?: [^\\] | \\ #{SIMPLE_ESCAPE} )? /mox ESCAPE = / #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE} /mox CHARACTER = / \? (?: [^\s\\] | \\ #{ESCAPE} ) /mox # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / << (-)? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | ( ["'`\/] ) # $3 = quote, type ( [^\n]*? ) \3 # $4 = delim ) /mx RUBYDOC = / =begin (?!\S) .*? (?: \Z | ^=end (?!\S) [^\n]* ) /mx DATA = / __END__$ .*? (?: \Z | (?=^\#CODE) ) /mx # Checks for a valid value to follow. This enables # value_expected in method calls without parentheses. VALUE_FOLLOWS = / (?>[ \t\f\v]+) (?: [%\/][^\s=] | <<-?\S | [-+] \d | #{CHARACTER} ) /x RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x # FIXME: \s and = are only a workaround, they are still allowed # as delimiters. FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx FancyStringType = { 'q' => [:string, false], 'Q' => [:string, true], 'r' => [:regexp, true], 's' => [:symbol, false], 'x' => [:shell, true] } FancyStringType['w'] = FancyStringType['q'] FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] class StringState < Struct.new :type, :interpreted, :delim, :heredoc, :paren, :paren_depth, :pattern, :next_state CLOSING_PAREN = Hash[ *%w[ ( ) [ ] < > { } ] ] CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << OPENING_PAREN = CLOSING_PAREN.invert STRING_PATTERN = Hash.new { |h, k| delim, interpreted = *k delim_pattern = Regexp.escape(delim) if closing_paren = CLOSING_PAREN[delim] delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix delim_pattern << Regexp.escape(closing_paren) end special_escapes = case interpreted when :regexp_symbols '| ' + REGEXP_SYMBOLS.source when :words '| \s' end h[k] = if interpreted and not delim == '#' / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx else / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx end } HEREDOC_PATTERN = Hash.new { |h, k| delim, interpreted, indented = *k delim_pattern = Regexp.escape(delim.dup) delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x h[k] = if interpreted / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc else / (?= #{delim_pattern}() | \\ ) /mx end } def initialize kind, interpreted, delim, heredoc = false if heredoc pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] delim = nil else pattern = STRING_PATTERN[ [delim, interpreted] ] if paren = CLOSING_PAREN[delim] delim, paren = paren, delim paren_depth = 1 end end super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial end end unless defined? StringState end end end