tokenizer.l in github-linguist-5.3.3

- old
+ new

@@ -7,29 +7,29 @@
     yyextra->type = (typ); \
   } while (0)
 
 #define eat_until_eol() do { \
     int c; \
-    while ((c = input(yyscanner)) != '\n' && c != EOF); \
-    if (c == EOF) \
-      yyterminate(); \
+    while ((c = input(yyscanner)) != '\n' && c != EOF && c); \
+    if (c == EOF || !c) \
+      return 0; \
   } while (0)
 
 #define eat_until_unescaped(q) do { \
     int c; \
-    while ((c = input(yyscanner)) != EOF) { \
+    while ((c = input(yyscanner)) != EOF && c) { \
       if (c == '\n') \
         break; \
       if (c == '\\') { \
         c = input(yyscanner); \
-        if (c == EOF) \
-          yyterminate(); \
+        if (c == EOF || !c) \
+          return 0; \
       } else if (c == q) \
         break; \
     } \
-    if (c == EOF) \
-      yyterminate(); \
+    if (c == EOF || !c) \
+      return 0; \
   } while (0)
 
 %}
 
 %option never-interactive yywrap reentrant nounput warn nodefault header-file="lex.linguist_yy.h" extra-type="struct tokenizer_extra *" prefix="linguist_yy"
@@ -82,20 +82,20 @@
 
 \"\"|''                           { /* nothing */ }
 \"                                { eat_until_unescaped('"'); }
 '                                 { eat_until_unescaped('\''); }
 (0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ }
-\<[^ \t\n\r<>]+/>|" "[^<>\n]{0,2048}>               {
+\<[[:alnum:]_!./?-]+              {
     if (strcmp(yytext, "<!--") == 0) {
      BEGIN(xml_comment);
     } else {
       feed_token(strdup(yytext), SGML_TOKEN);
       BEGIN(sgml);
       return 1;
     }
   }
-<sgml>[[:alnum:]_]+=/\"           { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('"'); return 1; }
-<sgml>[[:alnum:]_]+=/'            { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('\''); return 1; }
+<sgml>[[:alnum:]_]+=\"            { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('"'); return 1; }
+<sgml>[[:alnum:]_]+='             { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('\''); return 1; }
 <sgml>[[:alnum:]_]+=[[:alnum:]_]* { feed_token(strdup(yytext), REGULAR_TOKEN); *(strchr(yyextra->token, '=') + 1) = 0; return 1; }
 <sgml>[[:alnum:]_]+               { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
 <sgml>\>                          { BEGIN(INITIAL); }
 <sgml>.|\n                        { /* nothing */ }
 ;|\{|\}|\(|\)|\[|\]               { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }