Sha256: f3f848eb21361fc2969bb7e32f00e059a485957e8287a45aab3e8993fd783d18

Contents?: true

Size: 1.91 KB

Versions: 9

Compression:

Stored size: 1.91 KB

Contents

# frozen_string_literal: true

class Tiktoken::Encoding
    attr_reader :name

    # This returns a new Tiktoken::Encoding instance for the requested encoding
    # @param encoding [Symbol] The name of the encoding to load
    # @return [Tiktoken::Encoding] The encoding instance
    def self.for_name(encoding)
        Tiktoken::Encoding.new(Tiktoken::BpeFactory.send(encoding.to_sym), encoding.to_sym)
    end

    # This returns a Tiktoken::Encoding instance for the requested encoding
    # It will reuse an existing encoding if it's already been loaded
    # @param encoding [Symbol] The name of the encoding to load
    # @return [Tiktoken::Encoding] The encoding instance
    def self.for_name_cached(encoding)
        @encodings ||= {}
        @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
    end

    # Encodes the text as a list of integer tokens. This encoding will encode special non text tokens
    # basically it's unescaped
    # @param text [String] The text to encode
    # @return [Array<Integer>] The encoded tokens
    def encode_ordinary(text)
        @ext_base_bpe.encode_ordinary(text)
    end

    # Encodes the text as a list of integer tokens. This encoding will treat special non text tokens
    # as text unless they're in the allowed_special array. It's basically like the text was escaped
    # @param text [String] The text to encode
    # @param allowed_special [Array<String>] An array of special tokens to allow
    # @return [Array<Integer>] The encoded tokens
    def encode(text, allowed_special: [])
        @ext_base_bpe.encode(text, allowed_special)
    end

    # Decodes the tokens back into text
    # @param tokens [Array<Integer>] The tokens to decode
    # @return [String] The decoded text
    def decode(tokens)
        @ext_base_bpe.decode(tokens)
    end

    private
    def initialize(ext_base_bpe, name)
        @ext_base_bpe = ext_base_bpe
        @name = name
    end
end

Version data entries

9 entries across 9 versions & 1 rubygems

Version Path
tiktoken_ruby-0.0.3-x86_64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-x86_64-linux-musl lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-x86_64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-x64-mingw32 lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-x64-mingw-ucrt lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-arm64-darwin lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-arm-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3-aarch64-linux lib/tiktoken_ruby/encoding.rb
tiktoken_ruby-0.0.3 lib/tiktoken_ruby/encoding.rb