coq.rb in rouge-4.0.1

- old
+ new

@@ -23,11 +23,11 @@
           Ltac Require Import Export Module Section End Variable
           Context Polymorphic Monomorphic Universe Universes
           Variables Class Instance Global Local Include
           Printing Notation Infix Arguments Hint Rewrite Immediate
           Qed Defined Opaque Transparent Existing
-          Compute Eval Print SearchAbout Search About Check
+          Compute Eval Print SearchAbout Search About Check Admitted
         )
       end
 
       def self.ltac
         @ltac ||= Set.new %w(
@@ -54,20 +54,10 @@
           omega solve congruence reflexivity exact
           assumption eassumption
         )
       end
 
-      def self.keyopts
-        @keyopts ||= Set.new %w(
-          := => -> /\\ \\/ _ ; :> : ⇒ → ↔ ⇔ ≔ ≡ ∀ ∃ ∧ ∨ ¬ ⊤ ⊥ ⊢ ⊨ ∈
-        )
-      end
-
-      def self.end_sentence
-        @end_sentence ||= Punctuation::Indicator
-      end
-
       def self.classify(x)
         if self.coq.include? x
           return Keyword
         elsif self.gallina.include? x
           return Keyword::Reserved
@@ -80,112 +70,186 @@
         else
           return Name::Constant
         end
       end
 
-      operator = %r([\[\];,{}_()!$%&*+./:<=>?@^|~#-]+)
-      id = /(?:[a-z][\w']*)|(?:[_a-z][\w']+)/i
-      dot_id = /\.((?:[a-z][\w']*)|(?:[_a-z][\w']+))/i
+      # https://github.com/coq/coq/blob/110921a449fcb830ec2a1cd07e3acc32319feae6/clib/unicode.ml#L67
+      # https://coq.inria.fr/refman/language/core/basic.html#grammar-token-ident
+      id_first = /\p{L}/
+      id_first_underscore = /(?:\p{L}|_)/
+      id_subsequent = /(?:\p{L}|\p{N}|_|')/ # a few missing? some mathematical ' primes and subscripts
+      id = /(?:#{id_first}#{id_subsequent}*)|(?:#{id_first_underscore}#{id_subsequent}+)/i
+      dot_id = /\.(#{id})/i
       dot_space = /\.(\s+)/
-      module_type = /Module(\s+)Type(\s+)/
-      set_options = /(Set|Unset)(\s+)(Universe|Printing|Implicit|Strict)(\s+)(Polymorphism|All|Notations|Arguments|Universes|Implicit)(\s*)\./m
 
       state :root do
-        rule %r/[(][*](?![)])/, Comment, :comment
+        mixin :begin_proof
+        mixin :sentence
+      end
+
+      state :sentence do
+        mixin :comment_whitespace
+        mixin :module_setopts
+        # After parsing the id, end up in sentence_postid
+        rule id do |m|
+          @name = m[0]
+          @id_dotted = false
+          push :sentence_postid
+          push :continue_id
+        end
+      end
+
+      state :begin_proof do
+        rule %r/(Proof)(\s*)(\.)(\s+)/i do
+          groups Keyword, Text::Whitespace, Punctuation::Indicator, Text::Whitespace
+          push :proof_mode
+        end
+      end
+
+      state :proof_mode do
+        mixin :comment_whitespace
+        mixin :module_setopts
+        mixin :begin_proof
+
+        rule %r/(Qed|Defined|Save|Admitted)(\s*)(\.)(\s+)/i do
+          groups Keyword, Text::Whitespace, Punctuation::Indicator, Text::Whitespace
+          pop!
+        end
+        # the whole point of parsing Proof/Qed, normally some of these will be operators
+        rule %r/(?:\-+|\++|\*+)/, Punctuation
+        rule %r/[{}]/, Punctuation
+        # toplevel_selector
+        rule %r/(!|all|par)(:)/ do
+          groups Keyword::Pseudo, Punctuation
+        end
+        # numbered goals 1: {} 1,2: {}
+        rule %r/\d+/, Num::Integer, :numeric_labels
+        # [named_goal]: { ... }
+        rule %r/(\[)(\s*)(#{id})(\s*)(\])(\s*)(:)/ do
+          groups Punctuation, Text::Whitespace, Name::Constant, Text::Whitespace, Punctuation, Text::Whitespace, Punctuation
+        end
+        # After parsing the id, end up in sentence_postid
+        rule id do |m|
+          @name = m[0]
+          @id_dotted = false
+          push :sentence_postid
+          push :continue_id
+        end
+      end
+
+      state :numeric_labels do
+        mixin :whitespace
+        rule %r/(,)(\s*)(\d+)/ do
+          groups Punctuation, Text::Whitespace, Num::Integer
+        end
+
+        rule %r(:), Punctuation, :pop!
+      end
+
+      state :whitespace do
         rule %r/\s+/m, Text::Whitespace
-        rule module_type do |m|
-          token Keyword , 'Module'
-          token Text::Whitespace , m[1]
-          token Keyword , 'Type'
-          token Text::Whitespace , m[2]
+      end
+
+      state :comment_whitespace do
+        rule %r/[(][*](?![)])/, Comment, :comment
+        mixin :whitespace
+      end
+
+      state :module_setopts do
+        rule %r/(Module)(\s+)(Type)(\s+)/ do
+          groups Keyword, Text::Whitespace, Keyword, Text::Whitespace
         end
-        rule set_options do |m|
-          token Keyword , m[1]
-          i = 2
-          while m[i] != ''
-            token Text::Whitespace , m[i]
-            token Keyword , m[i+1]
-            i += 2
-          end
-          token self.class.end_sentence , '.'
+
+        rule %r(
+          (Set|Unset)(\s+)
+          (Universe|Printing|Implicit|Strict)(\s+)
+          (Polymorphism|All|Notations|Arguments|Universes|Implicit)?(\s*)(\.)
+        )x do
+          groups Keyword, Text::Whitespace, Keyword, Text::Whitespace, Keyword, Text::Whitespace, Punctuation::Indicator
         end
+      end
+
+      state :sentence_postid do
+        mixin :comment_whitespace
+        mixin :module_setopts
+
+        # up here to beat the id rule for lambda
+        rule %r(:=|=>|;|:>|:|::|_), Punctuation
+        rule %r(->|/\\|\\/|;|:>|[⇒→↔⇔≔≡∀∃∧∨¬⊤⊥⊢⊨∈λ]), Operator
+
         rule id do |m|
           @name = m[0]
-          @continue = false
+          @id_dotted = false
           push :continue_id
         end
-        rule %r(/\\), Operator
-        rule %r/\\\//, Operator
 
+        # must be followed by whitespace, so that we don't match notations like sym.(a + b)
+        rule %r/\.(?=\s)/, Punctuation::Indicator, :pop! # :sentence_postid
+
         rule %r/-?\d[\d_]*(.[\d_]*)?(e[+-]?\d[\d_]*)/i, Num::Float
-        rule %r/\d[\d_]*/, Num::Integer
+        rule %r/-?\d[\d_]*/, Num::Integer
 
         rule %r/'(?:(\\[\\"'ntbr ])|(\\[0-9]{3})|(\\x\h{2}))'/, Str::Char
         rule %r/'/, Keyword
         rule %r/"/, Str::Double, :string
         rule %r/[~?]#{id}/, Name::Variable
 
-        rule %r/./ do |m|
-          match = m[0]
-          if self.class.keyopts.include? match
-            token Punctuation
-          elsif match =~ operator
-            token Operator
-          else
-            token Error
-          end
-        end
+        rule %r(`{|[{}\[\]()?|;,.]), Punctuation
+        rule %r([!@^|~#.%/]+), Operator
+        # any other combo of S (symbol), P (punctuation) and some extras just to be sure
+        rule %r((?:\p{S}|\p{Pc}|[./\:\<=>\-+*])+), Operator
+
+        rule %r/./, Error
       end
 
       state :comment do
         rule %r/[^(*)]+/, Comment
         rule(/[(][*]/) { token Comment; push }
         rule %r/[*][)]/, Comment, :pop!
         rule %r/[(*)]/, Comment
       end
 
       state :string do
-        rule %r/(?:\\")+|[^"]/, Str::Double
-        mixin :escape_sequence
-        rule %r/\\\n/, Str::Double
+        rule %r/[^"]+/, Str::Double
+        rule %r/""/, Str::Double
         rule %r/"/, Str::Double, :pop!
       end
 
-      state :escape_sequence do
-        rule %r/\\[\\"'ntbr]/, Str::Escape
-      end
-
       state :continue_id do
         # the stream starts with an id (stored in @name) and continues here
         rule dot_id do |m|
-          token Name::Namespace , @name
-          token Punctuation , '.'
-          @continue = true
+          token Name::Namespace, @name
+          token Punctuation, '.'
+          @id_dotted = true
           @name = m[1]
         end
+
         rule dot_space do |m|
-          if @continue
-            token Name::Constant , @name
+          if @id_dotted
+            token Name::Constant, @name
           else
-            token self.class.classify(@name) , @name
+            token self.class.classify(@name), @name
           end
-          token self.class.end_sentence , '.'
-          token Text::Whitespace , m[1]
+
+          token Punctuation::Indicator, '.'
+          token Text::Whitespace, m[1]
           @name = false
-          @continue = false
-          pop!
+          @id_dotted = false
+          pop! # :continue_id
+          pop! # :sentence_postid
         end
+
         rule %r// do
-          if @continue
-            token Name::Constant , @name
+          if @id_dotted
+            token Name::Constant, @name
           else
-            token self.class.classify(@name) , @name
+            token self.class.classify(@name), @name
           end
           @name = false
-          @continue = false
-          pop!
+          @id_dotted = false
+          # we finished parsing an id, drop back into the sentence_postid that was pushed first.
+          pop! # :continue_id
         end
       end
-
     end
   end
 end