lib/pragmatic_tokenizer/processor.rb in pragmatic_tokenizer-0.3.1 vs lib/pragmatic_tokenizer/processor.rb in pragmatic_tokenizer-0.3.2
- old
+ new
@@ -135,10 +135,10 @@
cleaned_tokens[-1] = $1
cleaned_tokens.push '.'
end
cleaned_tokens
else
- tokens.flat_map { |t| t =~ /\.\z/ && !@language::ABBREVIATIONS.include?(Unicode::downcase(t.split(".")[0])) && t.length > 2 ? t.split(".").flatten + ["."] : t }
+ tokens.flat_map { |t| t =~ /\.\z/ && !@language::ABBREVIATIONS.include?(Unicode::downcase(t.split(".")[0] == nil ? '' : t.split(".")[0])) && t.length > 2 ? t.split(".").flatten + ["."] : t }
end
end
def separate_other_ending_punc(tokens)
cleaned_tokens = []
\ No newline at end of file