Sha256: 5b8798231750164cf657e00cff677c06a0bd7f1a95adc36ba863a797b3ffcd69

Contents?: true

Size: 1.88 KB

Versions: 30

Compression:

Stored size: 1.88 KB

Contents

# frozen_string_literal: true

class Tiktoken::Encoding
  CACHE_MUTEX = Mutex.new

  attr_reader :name

  # This returns a new Tiktoken::Encoding instance for the requested encoding
  # @param encoding [Symbol] The name of the encoding to load
  # @return [Tiktoken::Encoding] The encoding instance
  def self.for_name(encoding)
    Tiktoken::Encoding.new(Tiktoken::BpeFactory.send(encoding.to_sym), encoding.to_sym)
  end

  # This returns a Tiktoken::Encoding instance for the requested encoding
  # It will reuse an existing encoding if it's already been loaded
  # @param encoding [Symbol] The name of the encoding to load
  # @return [Tiktoken::Encoding] The encoding instance
  def self.for_name_cached(encoding)
    CACHE_MUTEX.synchronize do
      @encodings ||= {}
      @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
    end
  end

  # Encodes the text as a list of integer tokens. This encoding will encode special non text tokens
  # basically it's unescaped
  # @param text [String] The text to encode
  # @return [Array<Integer>] The encoded tokens
  def encode_ordinary(text)
    @ext_base_bpe.encode_ordinary(text)
  end

  # Encodes the text as a list of integer tokens. This encoding will treat special non text tokens
  # as text unless they're in the allowed_special array. It's basically like the text was escaped
  # @param text [String] The text to encode
  # @param allowed_special [Array<String>] An array of special tokens to allow
  # @return [Array<Integer>] The encoded tokens
  def encode(text, allowed_special: [])
    @ext_base_bpe.encode(text, allowed_special)
  end

  # Decodes the tokens back into text
  # @param tokens [Array<Integer>] The tokens to decode
  # @return [String] The decoded text
  def decode(tokens)
    @ext_base_bpe.decode(tokens)
  end

  private

  def initialize(ext_base_bpe, name)
    @ext_base_bpe = ext_base_bpe
    @name = name
  end
end

Version data entries

30 entries across 30 versions & 1 rubygems

Version Path
tiktoken_ruby-0.0.11.1 lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-x86_64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-x86_64-linux-musl lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-x86_64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-x64-mingw-ucrt lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-arm64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-arm-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11.1-aarch64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11 lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-x86_64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-x86_64-linux-musl lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-x86_64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-x64-mingw-ucrt lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-arm64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-arm-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.11-aarch64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.10 lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.10-x86_64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.10-x86_64-linux-musl lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.10-x86_64-darwin lib/tiktoken_ruby/encoding.rb