lib/roseflow/tiktoken/tokenizer.rb in roseflow-tiktoken-0.1.0 vs lib/roseflow/tiktoken/tokenizer.rb in roseflow-tiktoken-0.2.0
- old
+ new
@@ -1,14 +1,13 @@
-require "pycall"
+require "tiktoken_ruby"
module Roseflow
module Tiktoken
class Tokenizer
def initialize(model: nil)
- @tokenizer = PyCall.import_module("tiktoken")
@model = model
- @encoding = @tokenizer.encoding_for_model(@model) if @model
+ @encoding = determine_encoding(model)
end
def encode(input)
@encoding.encode(input)
rescue
@@ -38,9 +37,14 @@
token_count += 3 # Every reply is primed with assistant
return token_count
end
private
+
+ def determine_encoding(model)
+ encoding = model ? ::Tiktoken.encoding_for_model(model) : ::Tiktoken.get_encoding("cl100k_base")
+ encoding.is_a?(::Tiktoken::Encoding) ? encoding : ::Tiktoken.get_encoding("cl100k_base")
+ end
def tokens_per_message_for_model(model)
case model
when "gpt-4"
tokens_per_message_for_model("gpt-4-0314")