ext/linguist/tokenizer.l in github-linguist-5.3.2 vs ext/linguist/tokenizer.l in github-linguist-5.3.3

- old
+ new

@@ -7,29 +7,29 @@ yyextra->type = (typ); \ } while (0) #define eat_until_eol() do { \ int c; \ - while ((c = input(yyscanner)) != '\n' && c != EOF); \ - if (c == EOF) \ - yyterminate(); \ + while ((c = input(yyscanner)) != '\n' && c != EOF && c); \ + if (c == EOF || !c) \ + return 0; \ } while (0) #define eat_until_unescaped(q) do { \ int c; \ - while ((c = input(yyscanner)) != EOF) { \ + while ((c = input(yyscanner)) != EOF && c) { \ if (c == '\n') \ break; \ if (c == '\\') { \ c = input(yyscanner); \ - if (c == EOF) \ - yyterminate(); \ + if (c == EOF || !c) \ + return 0; \ } else if (c == q) \ break; \ } \ - if (c == EOF) \ - yyterminate(); \ + if (c == EOF || !c) \ + return 0; \ } while (0) %} %option never-interactive yywrap reentrant nounput warn nodefault header-file="lex.linguist_yy.h" extra-type="struct tokenizer_extra *" prefix="linguist_yy" @@ -82,20 +82,20 @@ \"\"|'' { /* nothing */ } \" { eat_until_unescaped('"'); } ' { eat_until_unescaped('\''); } (0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ } -\<[^ \t\n\r<>]+/>|" "[^<>\n]{0,2048}> { +\<[[:alnum:]_!./?-]+ { if (strcmp(yytext, "<!--") == 0) { BEGIN(xml_comment); } else { feed_token(strdup(yytext), SGML_TOKEN); BEGIN(sgml); return 1; } } -<sgml>[[:alnum:]_]+=/\" { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('"'); return 1; } -<sgml>[[:alnum:]_]+=/' { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('\''); return 1; } +<sgml>[[:alnum:]_]+=\" { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('"'); return 1; } +<sgml>[[:alnum:]_]+=' { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('\''); return 1; } <sgml>[[:alnum:]_]+=[[:alnum:]_]* { feed_token(strdup(yytext), REGULAR_TOKEN); *(strchr(yyextra->token, '=') + 1) = 0; return 1; } <sgml>[[:alnum:]_]+ { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; } <sgml>\> { BEGIN(INITIAL); } <sgml>.|\n { /* nothing */ } ;|\{|\}|\(|\)|\[|\] { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }