*.groovy
//
/*
*/
\\(
# character escape
[nrtbf"'\\$] |
# unicode escape
u[0-9A-Fa-f]{4} |
# octal escape
[0-3] ([0-7] [0-7]?)? | [4-7] [0-7]?
)
[a-zA-Z\x{c0}-\x{d6}\x{d8}-\x{f6}\x{f8}-\x{ff}\x{100}-\x{fffe}_]
\%{escaped-character}
\\
(\$)
# one or more dollarless identifiers separated by dots
\%{letter} (\%{letter} | \d)*
(\. \%{letter} (\%{letter} | \d)*)*
\$\{
\}
\$
'''
'''
'
'
"""
"""
"
"
# The initial slash must not be preceded by a token that can end an expression.
# Otherwise, it's interpreted as a division operator, not the start of a slashy string.
# It'd be complicated to verify whole tokens with a regex, but the last non-space
# character is a good indicator by itself. We can use a negative lookbehind assertion
# to verify that it's not a character that an expression-ending token can end with.
# Trouble is, a lookbehind assertion has to consist of fixed-length alternatives, so
# we can't have it match an arbitrary amount of whitespace. Thus, we do an approximate
# check, only trying zero and one spaces.
(?<! \+\+ | -- | [])}"\d] | \%{letter} |
\+\+\s | --\s | [])}"\d]\s | \%{letter}\s )
/
/
\\/
\$/
# Dollar slashy strings can't be empty (a would-be empty one is parsed
# as a dollar followed by a single-line comment instead).
(?! /\$)
/\$
\$[$/]
\b (
# floating-point
\d ([\d_]* \d)? (
\. \d ([\d_]* \d)? ([eE] [+-]? [\d_]* \d)? [dDfFgG]? |
[eE] [+-]? [\d_]* \d [dDfFgG]? |
[dDfF]
) |
# integer
(
0 | # decimal zero
0[bB] [01] ([01_]* [01])? | # binary
0 [0-7] ([0-7_]* [0-7])? | # octal
[1-9] ([\d_]* \d)? | # decimal
0[xX] [\da-fA-F] ([\da-fA-F_]* [\da-fA-F])? # hexadecimal
) [iIlLgG]?
) \b
import
package
class
enum
extends
implements
interface
native
throws
trait
boolean
byte
char
def
double
float
int
long
short
void
abstract
const
final
static
strictfp
synchronized
transient
volatile
private
protected
public
assert
break
case
catch
continue
default
do
else
finally
for
goto
if
return
throw
switch
try
while
as
in
instanceof
new
super
this
null
false
true