lib/rouge/lexers/python.rb in rouge-3.17.0 vs lib/rouge/lexers/python.rb in rouge-3.18.0

- old
+ new

@@ -67,26 +67,30 @@ ) end identifier = /[a-z_][a-z0-9_]*/i dotted_identifier = /[a-z_.][a-z0-9_.]*/i + + def current_string + @string_register ||= StringRegister.new + end + state :root do rule %r/\n+/m, Text rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do groups Punctuation, Text, Str::Doc end rule %r/[^\S\n]+/, Text rule %r(#(.*)?\n?), Comment::Single - rule %r/[\[\]{}:(),;]/, Punctuation + rule %r/[\[\]{}:(),;.]/, Punctuation rule %r/\\\n/, Text rule %r/\\/, Text rule %r/(in|is|and|or|not)\b/, Operator::Word rule %r/(<<|>>|\/\/|\*\*)=?/, Operator rule %r/[-~+\/*%=<>&^|@]=?|!=/, Operator - rule %r/\.(?![0-9])/, Operator # so it doesn't match float literals rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(import)/ do groups Keyword::Namespace, Text, Name::Namespace, @@ -108,18 +112,15 @@ push :classname end # TODO: not in python 3 rule %r/`.*?`/, Str::Backtick - rule %r/(?:r|ur|ru)"""/i, Str, :raw_tdqs - rule %r/(?:r|ur|ru)'''/i, Str, :raw_tsqs - rule %r/(?:r|ur|ru)"/i, Str, :raw_dqs - rule %r/(?:r|ur|ru)'/i, Str, :raw_sqs - rule %r/u?"""/i, Str, :tdqs - rule %r/u?'''/i, Str, :tsqs - rule %r/u?"/i, Str, :dqs - rule %r/u?'/i, Str, :sqs + rule %r/([rfbu]{0,2})('''|"""|['"])/i do |m| + token Str + current_string.register type: m[1].downcase, delim: m[2] + push :generic_string + end rule %r/@#{dotted_identifier}/i, Name::Decorator # using negative lookbehind so we don't match property names rule %r/(?<!\.)#{identifier}/ do |m| @@ -171,78 +172,86 @@ state :yield do mixin :raise end - state :strings do - rule %r/%(\([a-z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?/i, Str::Interpol - end + state :generic_string do + rule %r/[^'"\\{]+/, Str + rule %r/{{/, Str - state :strings_double do - rule %r/[^\\"%\n]+/, Str - mixin :strings - end + rule %r/'''|"""|['"]/ do |m| + token Str + if current_string.delim? m[0] + current_string.remove + pop! + end + end - state :strings_single do - rule %r/[^\\'%\n]+/, Str - mixin :strings - end + rule %r/\\/ do |m| + if current_string.type? "r" + token Str + else + token Str::Interpol + end + push :generic_escape + end - state :nl do - rule %r/\n/, Str + rule %r/{/ do |m| + if current_string.type? "f" + token Str::Interpol + push :generic_interpol + else + token Str + end + end end - state :escape do - rule %r(\\ + state :generic_escape do + rule %r( ( [\\abfnrtv"'] | \n | N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]} | u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | x[a-fA-F0-9]{2} | [0-7]{1,3} ) - )x, Str::Escape + )x do + if current_string.type? "r" + token Str + else + token Str::Escape + end + pop! + end end - state :raw_escape do - rule %r/\\./, Str + state :generic_interpol do + rule %r/[^{}]+/ do |m| + recurse m[0] + end + rule %r/{/, Str::Interpol, :generic_interpol + rule %r/}/, Str::Interpol, :pop! end - state :dqs do - rule %r/"/, Str, :pop! - mixin :escape - mixin :strings_double - end + class StringRegister < Array + def delim?(delim) + self.last[1] == delim + end - state :sqs do - rule %r/'/, Str, :pop! - mixin :escape - mixin :strings_single - end + def register(type: "u", delim: "'") + self.push [type, delim] + end - state :tdqs do - rule %r/"""/, Str, :pop! - rule %r/"/, Str - mixin :escape - mixin :strings_double - mixin :nl - end + def remove + self.pop + end - state :tsqs do - rule %r/'''/, Str, :pop! - rule %r/'/, Str - mixin :escape - mixin :strings_single - mixin :nl - end - - %w(tdqs tsqs dqs sqs).each do |qtype| - state :"raw_#{qtype}" do - mixin :raw_escape - mixin :"#{qtype}" + def type?(type) + self.last[0].include? type end end + private_constant :StringRegister end end end