lib/pragmatic_tokenizer/processor.rb in pragmatic_tokenizer-0.4.2 vs lib/pragmatic_tokenizer/processor.rb in pragmatic_tokenizer-0.5.0
- old
+ new
@@ -22,10 +22,10 @@
shift_beginning_hyphen(text)
shift_ending_hyphen(text)
tokens = separate_full_stop(text.squeeze(' ')
.split
.flat_map { |t| (t[0] == '‚' || t[0] == ',') && t.length > 1 ? t.split(/(,|‚)/).flatten : t }
- .flat_map { |t| (t[-1] == '’' || t[-1] == "'") && t.length > 1 ? t.split(/(’|')/).flatten : t }
+ .flat_map { |t| (t[-1] == '’' || t[-1] == "'" || t[-1] == '‘' || t[-1] == '`') && t.length > 1 ? t.split(/(’|'|‘|`)/).flatten : t }
.map { |t| convert_sym_to_punct(t) })
separate_other_ending_punc(tokens)
end
private
\ No newline at end of file