Sha256: 0c5b2f68a3b5451ebff0cca7d938f7d0d935ea4cbab8d1fb2d8cffe5ae91a319
Contents?: true
Size: 484 Bytes
Versions: 56
Compression:
Stored size: 484 Bytes
Contents
require 'strscan' require 'linguist/linguist' module Linguist # Generic programming language tokenizer. # # Tokens are designed for use in the language bayes classifier. # It strips any data strings or comments and preserves significant # language symbols. class Tokenizer # Public: Extract tokens from data # # data - String to tokenize # # Returns Array of token Strings. def self.tokenize(data) new.extract_tokens(data) end end end
Version data entries
56 entries across 56 versions & 1 rubygems