lib/roseflow/tiktoken/tokenizer.rb in roseflow-tiktoken-0.1.0 vs lib/roseflow/tiktoken/tokenizer.rb in roseflow-tiktoken-0.2.0

- old
+ new

@@ -1,14 +1,13 @@ -require "pycall" +require "tiktoken_ruby" module Roseflow module Tiktoken class Tokenizer def initialize(model: nil) - @tokenizer = PyCall.import_module("tiktoken") @model = model - @encoding = @tokenizer.encoding_for_model(@model) if @model + @encoding = determine_encoding(model) end def encode(input) @encoding.encode(input) rescue @@ -38,9 +37,14 @@ token_count += 3 # Every reply is primed with assistant return token_count end private + + def determine_encoding(model) + encoding = model ? ::Tiktoken.encoding_for_model(model) : ::Tiktoken.get_encoding("cl100k_base") + encoding.is_a?(::Tiktoken::Encoding) ? encoding : ::Tiktoken.get_encoding("cl100k_base") + end def tokens_per_message_for_model(model) case model when "gpt-4" tokens_per_message_for_model("gpt-4-0314")