# -*- coding: utf-8 -*- # # frozen_string_literal: true module Rouge module Lexers class Rust < RegexLexer title "Rust" desc 'The Rust programming language (rust-lang.org)' tag 'rust' aliases 'rs', # So that directives from https://github.com/budziq/rust-skeptic # do not prevent highlighting. 'rust,no_run', 'rs,no_run', 'rust,ignore', 'rs,ignore', 'rust,should_panic', 'rs,should_panic' filenames '*.rs' mimetypes 'text/x-rust' def self.detect?(text) return true if text.shebang? 'rustc' end def self.keywords @keywords ||= %w( as async await break const continue crate dyn else enum extern false fn for if impl in let log loop match mod move mut pub ref return self Self static struct super trait true type unsafe use where while abstract become box do final macro override priv typeof unsized virtual yield try union ) end def self.builtins @builtins ||= Set.new %w( Add BitAnd BitOr BitXor bool c_char c_double c_float char c_int clock_t c_long c_longlong Cons Const Copy c_schar c_short c_uchar c_uint c_ulong c_ulonglong c_ushort c_void dev_t DIR dirent Div Either Eq Err f32 f64 Failure FILE float fpos_t i16 i32 i64 i8 isize Index ino_t int intptr_t Left mode_t Modulo Mul Neg Nil None Num off_t Ok Option Ord Owned pid_t Ptr ptrdiff_t Right Send Shl Shr size_t Some ssize_t str Sub Success time_t u16 u32 u64 u8 usize uint uintptr_t Box Vec String Gc Rc Arc u128 i128 Result Sync Pin Unpin Sized Drop drop Fn FnMut FnOnce Clone PartialEq PartialOrd AsMut AsRef From Into Default DoubleEndedIterator ExactSizeIterator Extend IntoIterator Iterator ) end def macro_closed? @macro_delims.values.all?(&:zero?) end start { @macro_delims = { ']' => 0, ')' => 0, '}' => 0 } push :bol } delim_map = { '[' => ']', '(' => ')', '{' => '}' } id = /[\p{XID_Start}_]\p{XID_Continue}*/ hex = /[0-9a-f]/i escapes = %r( \\ ([nrt'"\\0] | x#{hex}{2} | u\{(#{hex}_*){1,6}\}) )x size = /8|16|32|64|128|size/ # Although not officially part of Rust, the rustdoc tool allows code in # comments to begin with `#`. Code like this will be evaluated but not # included in the HTML output produced by rustdoc. So that code intended # for these comments can be higlighted with Rouge, the Rust lexer needs # to check if the beginning of the line begins with a `# `. state :bol do mixin :whitespace rule %r/#\s[^\n]*/, Comment::Special rule(//) { pop! } end state :attribute do mixin :whitespace mixin :has_literals rule %r/[(,)=:]/, Name::Decorator rule %r/\]/, Name::Decorator, :pop! rule id, Name::Decorator end state :whitespace do rule %r/\s+/, Text mixin :comments end state :comments do # Only 3 slashes are doc comments, `////` and beyond become normal # comments again (for some reason), so match this before the # doc line comments rather than figure out a rule %r(////+[^\n]*), Comment::Single # doc line comments — either inner (`//!`), or outer (`///`). rule %r(//[/!][^\n]*), Comment::Doc # otherwise, `//` is just a plain line comme rule %r(//[^\n]*), Comment::Single # /**/ and /***/ are self-closing block comments, not doc. Because this # is self-closing, it doesn't enter the states for nested comments rule %r(/\*\*\*?/), Comment::Multiline # 3+ stars and it's a normal non-doc block comment. rule %r(/\*\*\*+), Comment::Multiline, :nested_plain_block # `/*!` and `/**` begin doc comments. These nest and can have internal # block/doc comments, but they're still part of the documentation # inside. rule %r(/[*][*!]), Comment::Doc, :nested_doc_block # any other /* is a plain multiline comment rule %r(/[*]), Comment::Multiline, :nested_plain_block end # Multiline/block comments fully nest. This is true for ones that are # marked as documentation too. The behavior here is: # # - Anything inside a block doc comment is still included in the # documentation, even if it's a nested non-doc block comment. For # example: `/** /* still docs */ */` # - Anything inside of a block non-doc comment is still just a normal # comment, even if it's a nested block documentation comment. For # example: `/* /** not docs */ */` # # This basically means: if (on the outermost level) the comment starts as # one kind of block comment (either doc/non-doc), then everything inside # of it, including nested block comments of the opposite type, needs to # stay that type. # # Also note that single line comments do nothing anywhere inside of block # comments, thankfully. # # We just define this as two states, because this seems easier than # tracking it with instance vars. [ [:nested_plain_block, Comment::Multiline], [:nested_doc_block, Comment::Doc] ].each do |state_name, comment_token| state state_name do rule %r(\*/), comment_token, :pop! rule %r(/\*), comment_token, state_name # We only want to eat at most one `[*/]` at a time, # but we can skip past non-`[*/]` in bulk. rule %r([^*/]+|[*/]), comment_token end end state :root do rule %r/\n/, Text, :bol mixin :whitespace rule %r/#!?\[/, Name::Decorator, :attribute rule %r/\b(?:#{Rust.keywords.join('|')})\b/, Keyword mixin :has_literals rule %r([=-]>), Keyword rule %r(<->), Keyword rule %r/[()\[\]{}|,:;]/, Punctuation rule %r/[*\/!@~&+%^<>=\?-]|\.{2,3}/, Operator rule %r/([.]\s*)?#{id}(?=\s*[(])/m, Name::Function rule %r/[.]\s*await\b/, Keyword rule %r/[.]\s*#{id}/, Name::Property rule %r/[.]\s*\d+/, Name::Attribute rule %r/(#{id})(::)/m do groups Name::Namespace, Punctuation end # macros rule %r/\bmacro_rules!/, Name::Decorator, :macro_rules rule %r/#{id}!/, Name::Decorator, :macro rule %r/'static\b/, Keyword rule %r/'#{id}/, Name::Variable rule %r/#{id}/ do |m| name = m[0] if self.class.builtins.include? name token Name::Builtin else token Name end end end state :macro do mixin :has_literals rule %r/[\[{(]/ do |m| @macro_delims[delim_map[m[0]]] += 1 puts " macro_delims: #{@macro_delims.inspect}" if @debug token Punctuation end rule %r/[\]})]/ do |m| @macro_delims[m[0]] -= 1 puts " macro_delims: #{@macro_delims.inspect}" if @debug pop! if macro_closed? token Punctuation end # same as the rule in root, but don't push another macro state rule %r/#{id}!/, Name::Decorator mixin :root # No syntax errors in macros rule %r/./, Text end state :macro_rules do rule %r/[$]#{id}(:#{id})?/, Name::Variable rule %r/[$]/, Name::Variable mixin :macro end state :has_literals do # constants rule %r/\b(?:true|false)\b/, Keyword::Constant # characters/bytes rule %r( b?' (?: #{escapes} | [^\\] ) ' )x, Str::Char rule %r/b?"/, Str, :string rule %r/b?r(#*)".*?"\1/m, Str # numbers dot = /[.][0-9][0-9_]*/ exp = /[eE][-+]?[0-9_]+/ flt = /f32|f64/ rule %r( [0-9][0-9_]* (#{dot} #{exp}? #{flt}? |#{dot}? #{exp} #{flt}? |#{dot}? #{exp}? #{flt} |[.](?![._\p{XID_Start}]) ) )x, Num::Float rule %r( ( 0b[10_]+ | 0x[0-9a-fA-F_]+ | 0o[0-7_]+ | [0-9][0-9_]* ) (u#{size}?|i#{size})? )x, Num::Integer end state :string do rule %r/"/, Str, :pop! rule escapes, Str::Escape rule %r/[^"\\]+/m, Str end end end end