lib/blingfire.rb in blingfire-0.1.5 vs lib/blingfire.rb in blingfire-0.1.6
- old
+ new
@@ -36,12 +36,12 @@
class << self
def lib_version
FFI.GetBlingFireTokVersion
end
- def load_model(path)
- Model.new(path)
+ def load_model(path, **options)
+ Model.new(path, **options)
end
def text_to_words(text)
text_to(text, " ") do |t, out|
FFI.TextToWords(t, t.bytesize, out, out.size)
@@ -124,10 +124,16 @@
out_size = FFI.NormalizeSpaces(text, text.bytesize, out, out.size, u_space)
check_status out_size, out
encode_utf8(out.to_str(out_size))
end
+ def change_settings_dummy_prefix(model, value)
+ # use opposite of value
+ ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1)
+ raise Error, "Bad status: #{ret}" if ret != 1
+ end
+
private
def check_status(ret, ptr)
raise Error, "Not enough memory allocated" if ret == -1 || ret > ptr.size
end
@@ -169,10 +175,10 @@
# convert byte offsets to character offsets
# TODO see if more efficient to store next_pos in variable
pos = 0
text.each_char.with_index do |c, i|
- while pos == start_bytes[starts.size]
+ while pos == start_bytes[starts.size] || start_bytes[starts.size] == -1
starts << i
end
pos += c.bytesize
while pos - 1 == end_bytes[ends.size]
ends << i + 1