lib/ollama/documents.rb in ollama-ruby-0.11.0 vs lib/ollama/documents.rb in ollama-ruby-0.12.0

- old
+ new

@@ -2,48 +2,34 @@ require 'digest' require 'kramdown/ansi' class Ollama::Documents end -class Ollama::Documents::Cache +module Ollama::Documents::Cache end +require 'ollama/documents/cache/records' require 'ollama/documents/cache/memory_cache' require 'ollama/documents/cache/redis_cache' require 'ollama/documents/cache/redis_backed_memory_cache' +require 'ollama/documents/cache/sqlite_cache' module Ollama::Documents::Splitters end require 'ollama/documents/splitters/character' require 'ollama/documents/splitters/semantic' class Ollama::Documents include Kramdown::ANSI::Width + include Ollama::Documents::Cache - class Record < JSON::GenericObject - def to_s - my_tags = tags_set - my_tags.empty? or my_tags = " #{my_tags}" - "#<#{self.class} #{text.inspect}#{my_tags} #{similarity || 'n/a'}>" - end + Record = Class.new Ollama::Documents::Cache::Records::Record - def tags_set - Ollama::Utils::Tags.new(tags, source:) - end - - def ==(other) - text == other.text - end - - alias inspect to_s - end - - def initialize(ollama:, model:, model_options: nil, collection: nil, cache: MemoryCache, redis_url: nil, debug: false) + def initialize(ollama:, model:, model_options: nil, collection: nil, embedding_length: 1_024, cache: MemoryCache, database_filename: nil, redis_url: nil, debug: false) collection ||= default_collection - @ollama, @model, @model_options, @collection = - ollama, model, model_options, collection.to_sym - @redis_url = redis_url - @cache = connect_cache(cache) - @debug = debug + @ollama, @model, @model_options, @collection, @debug = + ollama, model, model_options, collection.to_sym, debug + database_filename ||= ':memory:' + @cache = connect_cache(cache, redis_url, embedding_length, database_filename) end def default_collection :default end @@ -108,44 +94,23 @@ def size @cache.size end def clear(tags: nil) - if tags - tags = Ollama::Utils::Tags.new(Array(tags)).to_a - @cache.each do |key, record| - if (tags & record.tags).size >= 1 - @cache.delete(@cache.unpre(key)) - end - end - else - @cache.clear - end + @cache.clear(tags:) self end - def find(string, tags: nil, prompt: nil) - needle = convert_to_vector(string, prompt:) - needle_norm = @cache.norm(needle) - records = @cache - if tags - tags = Ollama::Utils::Tags.new(tags).to_a - records = records.select { |_key, record| (tags & record.tags).size >= 1 } - end - records = records.sort_by { |key, record| - record.key = key - record.similarity = @cache.cosine_similarity( - a: needle, - b: record.embedding, - a_norm: needle_norm, - b_norm: record.norm, - ) - } - records.transpose.last&.reverse.to_a + def find(string, tags: nil, prompt: nil, max_records: nil) + needle = convert_to_vector(string, prompt:) + @cache.find_records(needle, tags:, max_records: nil) end def find_where(string, text_size: nil, text_count: nil, **opts) + if text_count + opts[:max_records] = text_count + end records = find(string, **opts) size, count = 0, 0 records.take_while do |record| if text_size and (size += record.text.size) > text_size next false @@ -160,33 +125,41 @@ def collections ([ default_collection ] + @cache.collections('%s-' % self.class)).uniq end def tags - @cache.each_with_object(Ollama::Utils::Tags.new) do |(_, record), t| - record.tags.each do |tag| - t.add(tag, source: record.source) - end - end + @cache.tags end private - def connect_cache(cache_class) + def connect_cache(cache_class, redis_url, embedding_length, database_filename) cache = nil - if cache_class.instance_method(:redis) + if (cache_class.instance_method(:redis) rescue nil) begin - cache = cache_class.new(prefix:, url: @redis_url, object_class: Record) + cache = cache_class.new(prefix:, url: redis_url, object_class: Record) cache.size rescue Redis::CannotConnectError STDERR.puts( - "Cannot connect to redis URL #{@redis_url.inspect}, "\ + "Cannot connect to redis URL #{redis_url.inspect}, "\ "falling back to MemoryCache." ) end + elsif cache_class == SQLiteCache + cache = cache_class.new( + prefix:, + embedding_length:, + filename: database_filename, + debug: @debug + ) end ensure - cache ||= MemoryCache.new(prefix:) + cache ||= MemoryCache.new(prefix:,) + cache.respond_to?(:find_records) or cache.extend(Records::FindRecords) + cache.extend(Records::Tags) + if cache.respond_to?(:redis) + cache.extend(Records::RedisFullEach) + end return cache end def convert_to_vector(input, prompt: nil) if prompt