lib/rouge/lexers/rust.rb in rouge-3.26.1 vs lib/rouge/lexers/rust.rb in rouge-3.27.0
- old
+ new
@@ -20,14 +20,14 @@
return true if text.shebang? 'rustc'
end
def self.keywords
@keywords ||= %w(
- as assert async await break const continue copy do drop else enum extern
- fail false fn for if impl let log loop match mod move mut priv pub pure
- ref return self static struct true trait type unsafe use where
- while box
+ as assert async await break crate const continue copy do drop dyn else enum extern
+ fail false fn for if impl let log loop macro match mod move mut priv pub pure
+ ref return self Self static struct super true try trait type union unsafe use
+ where while yield box
)
end
def self.builtins
@builtins ||= Set.new %w(
@@ -38,10 +38,13 @@
i16 i32 i64 i8 isize Index ino_t int intptr_t Left mode_t Modulo Mul
Neg Nil None Num off_t Ok Option Ord Owned pid_t Ptr ptrdiff_t
Right Send Shl Shr size_t Some ssize_t str Sub Success time_t
u16 u32 u64 u8 usize uint uintptr_t
Box Vec String Gc Rc Arc
+ u128 i128 Result Sync Pin Unpin Sized Drop drop Fn FnMut FnOnce
+ Clone PartialEq PartialOrd AsMut AsRef From Into Default
+ DoubleEndedIterator ExactSizeIterator Extend IntoIterator Iterator
)
end
def macro_closed?
@macro_delims.values.all?(&:zero?)
@@ -52,16 +55,16 @@
push :bol
}
delim_map = { '[' => ']', '(' => ')', '{' => '}' }
- id = /[a-z_]\w*/i
+ id = /[\p{XID_Start}_]\p{XID_Continue}*/
hex = /[0-9a-f]/i
escapes = %r(
- \\ ([nrt'"\\0] | x#{hex}{2} | u#{hex}{4} | U#{hex}{8})
+ \\ ([nrt'"\\0] | x#{hex}{2} | u\{(#{hex}_*){1,6}\})
)x
- size = /8|16|32|64/
+ size = /8|16|32|64|128|size/
# Although not officially part of Rust, the rustdoc tool allows code in
# comments to begin with `#`. Code like this will be evaluated but not
# included in the HTML output produced by rustdoc. So that code intended
# for these comments can be higlighted with Rouge, the Rust lexer needs
@@ -80,14 +83,68 @@
rule id, Name::Decorator
end
state :whitespace do
rule %r/\s+/, Text
- rule %r(//[^\n]*), Comment
- rule %r(/[*].*?[*]/)m, Comment::Multiline
+ mixin :comments
end
+ state :comments do
+ # Only 3 slashes are doc comments, `////` and beyond become normal
+ # comments again (for some reason), so match this before the
+ # doc line comments rather than figure out a
+ rule %r(////+[^\n]*), Comment::Single
+ # doc line comments — either inner (`//!`), or outer (`///`).
+ rule %r(//[/!][^\n]*), Comment::Doc
+ # otherwise, `//` is just a plain line comme
+ rule %r(//[^\n]*), Comment::Single
+ # /**/ and /***/ are self-closing block comments, not doc. Because this
+ # is self-closing, it doesn't enter the states for nested comments
+ rule %r(/\*\*\*?/), Comment::Multiline
+ # 3+ stars and it's a normal non-doc block comment.
+ rule %r(/\*\*\*+), Comment::Multiline, :nested_plain_block
+ # `/*!` and `/**` begin doc comments. These nest and can have internal
+ # block/doc comments, but they're still part of the documentation
+ # inside.
+ rule %r(/[*][*!]), Comment::Doc, :nested_doc_block
+ # any other /* is a plain multiline comment
+ rule %r(/[*]), Comment::Multiline, :nested_plain_block
+ end
+
+ # Multiline/block comments fully nest. This is true for ones that are
+ # marked as documentation too. The behavior here is:
+ #
+ # - Anything inside a block doc comment is still included in the
+ # documentation, even if it's a nested non-doc block comment. For
+ # example: `/** /* still docs */ */`
+ # - Anything inside of a block non-doc comment is still just a normal
+ # comment, even if it's a nested block documentation comment. For
+ # example: `/* /** not docs */ */`
+ #
+ # This basically means: if (on the outermost level) the comment starts as
+ # one kind of block comment (either doc/non-doc), then everything inside
+ # of it, including nested block comments of the opposite type, needs to
+ # stay that type.
+ #
+ # Also note that single line comments do nothing anywhere inside of block
+ # comments, thankfully.
+ #
+ # We just define this as two states, because this seems easier than
+ # tracking it with instance vars.
+ [
+ [:nested_plain_block, Comment::Multiline],
+ [:nested_doc_block, Comment::Doc]
+ ].each do |state_name, comment_token|
+ state state_name do
+ rule %r(\*/), comment_token, :pop!
+ rule %r(/\*), comment_token, state_name
+ # We only want to eat at most one `[*/]` at a time,
+ # but we can skip past non-`[*/]` in bulk.
+ rule %r([^*/]+|[*/]), comment_token
+ end
+ end
+
state :root do
rule %r/\n/, Text, :bol
mixin :whitespace
rule %r/#!?\[/, Name::Decorator, :attribute
rule %r/\b(?:#{Rust.keywords.join('|')})\b/, Keyword
@@ -108,10 +165,11 @@
# macros
rule %r/\bmacro_rules!/, Name::Decorator, :macro_rules
rule %r/#{id}!/, Name::Decorator, :macro
+ rule %r/'static\b/, Keyword
rule %r/'#{id}/, Name::Variable
rule %r/#{id}/ do |m|
name = m[0]
if self.class.builtins.include? name
token Name::Builtin
@@ -153,35 +211,36 @@
end
state :has_literals do
# constants
rule %r/\b(?:true|false|nil)\b/, Keyword::Constant
- # characters
+ # characters/bytes
rule %r(
- ' (?: #{escapes} | [^\\] ) '
+ b?' (?: #{escapes} | [^\\] ) '
)x, Str::Char
- rule %r/"/, Str, :string
- rule %r/r(#*)".*?"\1/m, Str
+ rule %r/b?"/, Str, :string
+ rule %r/b?r(#*)".*?"\1/m, Str
# numbers
- dot = /[.][0-9_]+/
- exp = /e[-+]?[0-9_]+/
+ dot = /[.][0-9][0-9_]*/
+ exp = /[eE][-+]?[0-9_]+/
flt = /f32|f64/
rule %r(
- [0-9_]+
+ [0-9][0-9_]*
(#{dot} #{exp}? #{flt}?
|#{dot}? #{exp} #{flt}?
|#{dot}? #{exp}? #{flt}
+ |[.](?![._\p{XID_Start}])
)
)x, Num::Float
rule %r(
( 0b[10_]+
| 0x[0-9a-fA-F_]+
- | 0o[0-7]+
- | [0-9_]+
+ | 0o[0-7_]+
+ | [0-9][0-9_]*
) (u#{size}?|i#{size})?
)x, Num::Integer
end