Sha256: cdc99660c3c9d20640907fc88f086547a4d9ba4e8f972666af55a40685fe7a05
Contents?: true
Size: 605 Bytes
Versions: 2
Compression:
Stored size: 605 Bytes
Contents
module TextRank module Tokenizer ## # A tokenizer that preserves punctuation as their own tokens (which can be # used, for example, by the [TokenFilter::PartOfSpeechBase] filter). # # = Example # # WordsAndPunctuation.new.tokenize("i should:like to know:which is worse.") # => ["i", "should", ":", "like", "to", "know", ":", "which", "is", "worse", "."] ## class WordsAndPunctuation < Regex def initialize super(/ ([a-z][a-z0-9-]+) | ([\p{Punct}]) | \s+ /xi) end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
text_rank-1.1.1 | lib/text_rank/tokenizer/words_and_punctuation.rb |
text_rank-1.1.0 | lib/text_rank/tokenizer/words_and_punctuation.rb |