lib/picky/tokenizer.rb in picky-4.18.0 vs lib/picky/tokenizer.rb in picky-4.19.0
- old
+ new
@@ -96,18 +96,20 @@
# We allow Strings, Regexps, and things that respond to #split.
#
# Note: We do not test against to_str since symbols do not work with String#split.
#
def splits_text_on thing
- raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
+ raise ArgumentError.new "#{__method__} takes a Regexp or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
@splits_text_on = if thing.respond_to? :split
thing
else
RegexpWrapper.new thing
end
end
def split text
+ # Does not create a new string if nothing is split.
+ #
@splits_text_on.split text
end
# Normalizing.
#
@@ -231,12 +233,12 @@
#
# Returns:
# [[:token1, :token2], ["Original1", "Original2"]]
#
def tokenize text
- text = preprocess text.to_s # processing the text
+ text = preprocess text.to_s # processing the text
return empty_tokens if text.empty? # TODO blank?
- words = pretokenize text # splitting and preparations for tokenizing
+ words = pretokenize text # splitting and preparations for tokenizing
return empty_tokens if words.empty?
tokens = tokens_for words # creating tokens / strings
[tokens, words]
end
\ No newline at end of file