Sha256: 4ecae0b08d3c4aacef7234a8f9ec84c2a41ccb81c3931243f3977ca84c0ee73d
Contents?: true
Size: 1.42 KB
Versions: 2
Compression:
Stored size: 1.42 KB
Contents
#include "ruby.h" #include "linguist.h" #include "lex.linguist_yy.h" int linguist_yywrap(yyscan_t yyscanner) { return 1; } static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) { YY_BUFFER_STATE buf; yyscan_t scanner; struct tokenizer_extra extra; VALUE ary, s; long len; int r; Check_Type(rb_data, T_STRING); len = RSTRING_LEN(rb_data); if (len > 100000) len = 100000; linguist_yylex_init_extra(&extra, &scanner); buf = linguist_yy_scan_bytes(RSTRING_PTR(rb_data), (int) len, scanner); ary = rb_ary_new(); do { extra.type = NO_ACTION; extra.token = NULL; r = linguist_yylex(scanner); switch (extra.type) { case NO_ACTION: break; case REGULAR_TOKEN: rb_ary_push(ary, rb_str_new2(extra.token)); free(extra.token); break; case SHEBANG_TOKEN: s = rb_str_new2("SHEBANG#!"); rb_str_cat2(s, extra.token); rb_ary_push(ary, s); free(extra.token); break; case SGML_TOKEN: s = rb_str_new2(extra.token); rb_str_cat2(s, ">"); rb_ary_push(ary, s); free(extra.token); break; } } while (r); linguist_yy_delete_buffer(buf, scanner); linguist_yylex_destroy(scanner); return ary; } __attribute__((visibility("default"))) void Init_linguist() { VALUE rb_mLinguist = rb_define_module("Linguist"); VALUE rb_cTokenizer = rb_define_class_under(rb_mLinguist, "Tokenizer", rb_cObject); rb_define_method(rb_cTokenizer, "extract_tokens", rb_tokenizer_extract_tokens, 1); }
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
github-linguist-5.3.3 | ext/linguist/linguist.c |
github-linguist-5.3.2 | ext/linguist/linguist.c |