lib/blingfire.rb in blingfire-0.1.5 vs lib/blingfire.rb in blingfire-0.1.6

- old
+ new

@@ -36,12 +36,12 @@ class << self def lib_version FFI.GetBlingFireTokVersion end - def load_model(path) - Model.new(path) + def load_model(path, **options) + Model.new(path, **options) end def text_to_words(text) text_to(text, " ") do |t, out| FFI.TextToWords(t, t.bytesize, out, out.size) @@ -124,10 +124,16 @@ out_size = FFI.NormalizeSpaces(text, text.bytesize, out, out.size, u_space) check_status out_size, out encode_utf8(out.to_str(out_size)) end + def change_settings_dummy_prefix(model, value) + # use opposite of value + ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1) + raise Error, "Bad status: #{ret}" if ret != 1 + end + private def check_status(ret, ptr) raise Error, "Not enough memory allocated" if ret == -1 || ret > ptr.size end @@ -169,10 +175,10 @@ # convert byte offsets to character offsets # TODO see if more efficient to store next_pos in variable pos = 0 text.each_char.with_index do |c, i| - while pos == start_bytes[starts.size] + while pos == start_bytes[starts.size] || start_bytes[starts.size] == -1 starts << i end pos += c.bytesize while pos - 1 == end_bytes[ends.size] ends << i + 1