ext/linguist/tokenizer.l in github-linguist-7.21.0 vs ext/linguist/tokenizer.l in github-linguist-7.22.0
- old
+ new
@@ -49,14 +49,12 @@
} while (0)
%}
%option never-interactive yywrap reentrant nounput warn nodefault header-file="lex.linguist_yy.h" extra-type="VALUE*" prefix="linguist_yy"
-%x c_comment xml_comment haskell_comment ocaml_comment python_dcomment python_scomment roff_comment punct
+%x c_comment xml_comment haskell_comment ocaml_comment python_dcomment python_scomment roff_comment
-not_punct [^-!#$%&*+,./:;<=>?@\\^_`|~]
-
%%
^#![ \t]*([[:alnum:]_\/]*\/)?env([ \t]+([^ \t=]*=[^ \t]*))*[ \t]+[[:alpha:]_]+ {
const char *off = strrchr(yytext, ' ');
if (!off)
@@ -90,10 +88,11 @@
^[ \t]*[%]+(" ".*|\n) { FEED_STATIC("COMMENT%"); return 1; }
^[ \t]*\"(" ".*|\n) { FEED_STATIC("COMMENT\""); return 1; }
^[ \t]*;+(" ".*|\n) { FEED_STATIC("COMMENT;"); return 1; }
^[.][ \t]*\\\"(.*|\n) { FEED_STATIC("COMMENT.\\\""); return 1; }
^['][ \t]*\\\"(.*|\n) { FEED_STATIC("COMMENT'\\\""); return 1; }
+^"$! "(.*|\n) { FEED_STATIC("COMMENT$!"); return 1; }
"/**/" { FEED_STATIC("COMMENT/*"); return 1; }
"/**" { FEED_STATIC("COMMENT/**"); BEGIN(c_comment); return 1; }
"/*!" { FEED_STATIC("COMMENT/*!"); BEGIN(c_comment); return 1; }
"/*" { FEED_STATIC("COMMENT/*"); BEGIN(c_comment); return 1; }
@@ -116,21 +115,59 @@
\"\"|'' { /* nothing */ }
\" { eat_until_unescaped('"'); }
' { eat_until_unescaped('\''); }
(0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ }
-[.@#]?[[:alnum:]_]+ { FEED(); return 1; }
-"()"|"{}"|"[]" { FEED(); return 1; }
-[({\[]|[)}\]] { FEED(); return 1; }
+[.@#$]?[[:alnum:]_]+ { FEED(); return 1; }
-<INITIAL,punct>[-!#$%&*+,.:;=>?@\\^_`|~]+/{not_punct} { FEED(); BEGIN(INITIAL); return 1; }
-<INITIAL,punct>[-!#$%&*+,.:;=>?@\\^_`|~]+/("<!--"|"/*") { FEED(); BEGIN(INITIAL); return 1; }
-<INITIAL,punct>[</]+/{not_punct} { FEED(); BEGIN(INITIAL); return 1; }
-<INITIAL,punct>[</]+/"<!--" { FEED(); BEGIN(INITIAL); return 1; }
-<INITIAL,punct>[</]+/"/*" { FEED(); BEGIN(INITIAL); return 1; }
-<INITIAL,punct>[-!#$%&*+,.:;=>?@\\^_`|~]+ { yymore(); BEGIN(punct); }
-<INITIAL,punct>[</] { yymore(); BEGIN(punct); }
-<punct><<EOF>> { FEED2(yytext, yyleng - 1); BEGIN(INITIAL); return 1; }
+[(]+[)]+ { FEED(); return 1; }
+[{]+[}]+ { FEED(); return 1; }
+[\[]+[\]]+ { FEED(); return 1; }
+[(]+|[)]+ { FEED(); return 1; }
+[{]+|[}]+ { FEED(); return 1; }
+[\[]+|[\]]+ { FEED(); return 1; }
+[$]([(]+|[{]+|[\[]]+) { FEED(); return 1; }
+
+"(...)"|"{...}"|"[...]" { FEED(); return 1; }
+
+"&>"|"<&"|"<&-"|"&>>"|">&" { FEED(); return 1; }
+"|&"|"&|" { FEED(); return 1; }
+
+[-]+[>]+ { FEED(); return 1; }
+[<]+[-]+ { FEED(); return 1; }
+
+[!]+[=]+ { FEED(); return 1; }
+[<>]*[=]+[<>]* { FEED(); return 1; }
+[<][/]?[?%!#@] { FEED(); return 1; }
+[?%!][>] { FEED(); return 1; }
+[<>/]+ { FEED(); return 1; }
+[-+*/%&|^~:][=]+ { FEED(); return 1; }
+[!=][~] { FEED(); return 1; }
+":-" { FEED(); return 1; }
+
+[.][*]+[?]? { FEED(); return 1; }
+[.][+]+[?]? { FEED(); return 1; }
+"(?:" { FEED(); return 1; }
+
+[-]+ { FEED(); return 1; }
+[!]+ { FEED(); return 1; }
+[#]+ { FEED(); return 1; }
+[$]+ { FEED(); return 1; }
+[%]+ { FEED(); return 1; }
+[&]+ { FEED(); return 1; }
+[*]+ { FEED(); return 1; }
+[+]+ { FEED(); return 1; }
+[,]+ { FEED(); return 1; }
+[.]+ { FEED(); return 1; }
+[:]+ { FEED(); return 1; }
+[;]+ { FEED(); return 1; }
+[?]+ { FEED(); return 1; }
+[@]+ { FEED(); return 1; }
+[\\]+ { FEED(); return 1; }
+[\^]+ { FEED(); return 1; }
+[`]+ { FEED(); return 1; }
+[|]+ { FEED(); return 1; }
+[~]+ { FEED(); return 1; }
.|\n { /* nothing */ }
%%