Sha256: 6fa0c031569c9b1a92651b6af8dbc8606db1275a38392e875db2c269bc925505
Contents?: true
Size: 1.92 KB
Versions: 1
Compression:
Stored size: 1.92 KB
Contents
require "pycall" module Roseflow module Tiktoken class Tokenizer def initialize(model: nil) @tokenizer = PyCall.import_module("tiktoken") @model = model @encoding = @tokenizer.encoding_for_model(@model) if @model end def encode(input) @encoding.encode(input) rescue raise ::Roseflow::Tiktoken::NoEncodingError, "No encoding found for model #{@model}" end def decode(input) @encoding.decode(input) rescue raise ::Roseflow::Tiktoken::NoEncodingError, "No encoding found for model #{@model}" end def count_tokens(messages) token_count = 0 messages.each do |message| token_count += tokens_per_message_for_model(@model) message.each do |key, value| token_count += encode(value).count if key == "name" token_count += tokens_per_message_for_model(@model) end end end token_count += 3 # Every reply is primed with assistant return token_count end private def tokens_per_message_for_model(model) case model when "gpt-4" tokens_per_message_for_model("gpt-4-0314") when "gpt-3.5-turbo" tokens_per_message_for_model("gpt-3.5-turbo-0301") when "gpt-4-0314" 3 when "gpt-3.5-turbo-0301" 4 else raise NotImplementedError, "Model #{model} is not supported." end end def tokens_per_name_for_model(model) case model when "gpt-4" tokens_per_message_for_model("gpt-4-0314") when "gpt-3.5-turbo" tokens_per_message_for_model("gpt-3.5-turbo-0301") when "gpt-4-0314" 1 when "gpt-3.5-turbo-0301" -1 else raise NotImplementedError, "Model #{model} is not supported." end end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
roseflow-tiktoken-0.1.0 | lib/roseflow/tiktoken/tokenizer.rb |