lib/rouge/lexers/coq.rb in rouge-4.0.0 vs lib/rouge/lexers/coq.rb in rouge-4.0.1
- old
+ new
@@ -23,11 +23,11 @@
Ltac Require Import Export Module Section End Variable
Context Polymorphic Monomorphic Universe Universes
Variables Class Instance Global Local Include
Printing Notation Infix Arguments Hint Rewrite Immediate
Qed Defined Opaque Transparent Existing
- Compute Eval Print SearchAbout Search About Check
+ Compute Eval Print SearchAbout Search About Check Admitted
)
end
def self.ltac
@ltac ||= Set.new %w(
@@ -54,20 +54,10 @@
omega solve congruence reflexivity exact
assumption eassumption
)
end
- def self.keyopts
- @keyopts ||= Set.new %w(
- := => -> /\\ \\/ _ ; :> : ⇒ → ↔ ⇔ ≔ ≡ ∀ ∃ ∧ ∨ ¬ ⊤ ⊥ ⊢ ⊨ ∈
- )
- end
-
- def self.end_sentence
- @end_sentence ||= Punctuation::Indicator
- end
-
def self.classify(x)
if self.coq.include? x
return Keyword
elsif self.gallina.include? x
return Keyword::Reserved
@@ -80,112 +70,186 @@
else
return Name::Constant
end
end
- operator = %r([\[\];,{}_()!$%&*+./:<=>?@^|~#-]+)
- id = /(?:[a-z][\w']*)|(?:[_a-z][\w']+)/i
- dot_id = /\.((?:[a-z][\w']*)|(?:[_a-z][\w']+))/i
+ # https://github.com/coq/coq/blob/110921a449fcb830ec2a1cd07e3acc32319feae6/clib/unicode.ml#L67
+ # https://coq.inria.fr/refman/language/core/basic.html#grammar-token-ident
+ id_first = /\p{L}/
+ id_first_underscore = /(?:\p{L}|_)/
+ id_subsequent = /(?:\p{L}|\p{N}|_|')/ # a few missing? some mathematical ' primes and subscripts
+ id = /(?:#{id_first}#{id_subsequent}*)|(?:#{id_first_underscore}#{id_subsequent}+)/i
+ dot_id = /\.(#{id})/i
dot_space = /\.(\s+)/
- module_type = /Module(\s+)Type(\s+)/
- set_options = /(Set|Unset)(\s+)(Universe|Printing|Implicit|Strict)(\s+)(Polymorphism|All|Notations|Arguments|Universes|Implicit)(\s*)\./m
state :root do
- rule %r/[(][*](?![)])/, Comment, :comment
+ mixin :begin_proof
+ mixin :sentence
+ end
+
+ state :sentence do
+ mixin :comment_whitespace
+ mixin :module_setopts
+ # After parsing the id, end up in sentence_postid
+ rule id do |m|
+ @name = m[0]
+ @id_dotted = false
+ push :sentence_postid
+ push :continue_id
+ end
+ end
+
+ state :begin_proof do
+ rule %r/(Proof)(\s*)(\.)(\s+)/i do
+ groups Keyword, Text::Whitespace, Punctuation::Indicator, Text::Whitespace
+ push :proof_mode
+ end
+ end
+
+ state :proof_mode do
+ mixin :comment_whitespace
+ mixin :module_setopts
+ mixin :begin_proof
+
+ rule %r/(Qed|Defined|Save|Admitted)(\s*)(\.)(\s+)/i do
+ groups Keyword, Text::Whitespace, Punctuation::Indicator, Text::Whitespace
+ pop!
+ end
+ # the whole point of parsing Proof/Qed, normally some of these will be operators
+ rule %r/(?:\-+|\++|\*+)/, Punctuation
+ rule %r/[{}]/, Punctuation
+ # toplevel_selector
+ rule %r/(!|all|par)(:)/ do
+ groups Keyword::Pseudo, Punctuation
+ end
+ # numbered goals 1: {} 1,2: {}
+ rule %r/\d+/, Num::Integer, :numeric_labels
+ # [named_goal]: { ... }
+ rule %r/(\[)(\s*)(#{id})(\s*)(\])(\s*)(:)/ do
+ groups Punctuation, Text::Whitespace, Name::Constant, Text::Whitespace, Punctuation, Text::Whitespace, Punctuation
+ end
+ # After parsing the id, end up in sentence_postid
+ rule id do |m|
+ @name = m[0]
+ @id_dotted = false
+ push :sentence_postid
+ push :continue_id
+ end
+ end
+
+ state :numeric_labels do
+ mixin :whitespace
+ rule %r/(,)(\s*)(\d+)/ do
+ groups Punctuation, Text::Whitespace, Num::Integer
+ end
+
+ rule %r(:), Punctuation, :pop!
+ end
+
+ state :whitespace do
rule %r/\s+/m, Text::Whitespace
- rule module_type do |m|
- token Keyword , 'Module'
- token Text::Whitespace , m[1]
- token Keyword , 'Type'
- token Text::Whitespace , m[2]
+ end
+
+ state :comment_whitespace do
+ rule %r/[(][*](?![)])/, Comment, :comment
+ mixin :whitespace
+ end
+
+ state :module_setopts do
+ rule %r/(Module)(\s+)(Type)(\s+)/ do
+ groups Keyword, Text::Whitespace, Keyword, Text::Whitespace
end
- rule set_options do |m|
- token Keyword , m[1]
- i = 2
- while m[i] != ''
- token Text::Whitespace , m[i]
- token Keyword , m[i+1]
- i += 2
- end
- token self.class.end_sentence , '.'
+
+ rule %r(
+ (Set|Unset)(\s+)
+ (Universe|Printing|Implicit|Strict)(\s+)
+ (Polymorphism|All|Notations|Arguments|Universes|Implicit)?(\s*)(\.)
+ )x do
+ groups Keyword, Text::Whitespace, Keyword, Text::Whitespace, Keyword, Text::Whitespace, Punctuation::Indicator
end
+ end
+
+ state :sentence_postid do
+ mixin :comment_whitespace
+ mixin :module_setopts
+
+ # up here to beat the id rule for lambda
+ rule %r(:=|=>|;|:>|:|::|_), Punctuation
+ rule %r(->|/\\|\\/|;|:>|[⇒→↔⇔≔≡∀∃∧∨¬⊤⊥⊢⊨∈λ]), Operator
+
rule id do |m|
@name = m[0]
- @continue = false
+ @id_dotted = false
push :continue_id
end
- rule %r(/\\), Operator
- rule %r/\\\//, Operator
+ # must be followed by whitespace, so that we don't match notations like sym.(a + b)
+ rule %r/\.(?=\s)/, Punctuation::Indicator, :pop! # :sentence_postid
+
rule %r/-?\d[\d_]*(.[\d_]*)?(e[+-]?\d[\d_]*)/i, Num::Float
- rule %r/\d[\d_]*/, Num::Integer
+ rule %r/-?\d[\d_]*/, Num::Integer
rule %r/'(?:(\\[\\"'ntbr ])|(\\[0-9]{3})|(\\x\h{2}))'/, Str::Char
rule %r/'/, Keyword
rule %r/"/, Str::Double, :string
rule %r/[~?]#{id}/, Name::Variable
- rule %r/./ do |m|
- match = m[0]
- if self.class.keyopts.include? match
- token Punctuation
- elsif match =~ operator
- token Operator
- else
- token Error
- end
- end
+ rule %r(`{|[{}\[\]()?|;,.]), Punctuation
+ rule %r([!@^|~#.%/]+), Operator
+ # any other combo of S (symbol), P (punctuation) and some extras just to be sure
+ rule %r((?:\p{S}|\p{Pc}|[./\:\<=>\-+*])+), Operator
+
+ rule %r/./, Error
end
state :comment do
rule %r/[^(*)]+/, Comment
rule(/[(][*]/) { token Comment; push }
rule %r/[*][)]/, Comment, :pop!
rule %r/[(*)]/, Comment
end
state :string do
- rule %r/(?:\\")+|[^"]/, Str::Double
- mixin :escape_sequence
- rule %r/\\\n/, Str::Double
+ rule %r/[^"]+/, Str::Double
+ rule %r/""/, Str::Double
rule %r/"/, Str::Double, :pop!
end
- state :escape_sequence do
- rule %r/\\[\\"'ntbr]/, Str::Escape
- end
-
state :continue_id do
# the stream starts with an id (stored in @name) and continues here
rule dot_id do |m|
- token Name::Namespace , @name
- token Punctuation , '.'
- @continue = true
+ token Name::Namespace, @name
+ token Punctuation, '.'
+ @id_dotted = true
@name = m[1]
end
+
rule dot_space do |m|
- if @continue
- token Name::Constant , @name
+ if @id_dotted
+ token Name::Constant, @name
else
- token self.class.classify(@name) , @name
+ token self.class.classify(@name), @name
end
- token self.class.end_sentence , '.'
- token Text::Whitespace , m[1]
+
+ token Punctuation::Indicator, '.'
+ token Text::Whitespace, m[1]
@name = false
- @continue = false
- pop!
+ @id_dotted = false
+ pop! # :continue_id
+ pop! # :sentence_postid
end
+
rule %r// do
- if @continue
- token Name::Constant , @name
+ if @id_dotted
+ token Name::Constant, @name
else
- token self.class.classify(@name) , @name
+ token self.class.classify(@name), @name
end
@name = false
- @continue = false
- pop!
+ @id_dotted = false
+ # we finished parsing an id, drop back into the sentence_postid that was pushed first.
+ pop! # :continue_id
end
end
-
end
end
end