lib/ollama/documents.rb in ollama-ruby-0.11.0 vs lib/ollama/documents.rb in ollama-ruby-0.12.0
- old
+ new
@@ -2,48 +2,34 @@
require 'digest'
require 'kramdown/ansi'
class Ollama::Documents
end
-class Ollama::Documents::Cache
+module Ollama::Documents::Cache
end
+require 'ollama/documents/cache/records'
require 'ollama/documents/cache/memory_cache'
require 'ollama/documents/cache/redis_cache'
require 'ollama/documents/cache/redis_backed_memory_cache'
+require 'ollama/documents/cache/sqlite_cache'
module Ollama::Documents::Splitters
end
require 'ollama/documents/splitters/character'
require 'ollama/documents/splitters/semantic'
class Ollama::Documents
include Kramdown::ANSI::Width
+ include Ollama::Documents::Cache
- class Record < JSON::GenericObject
- def to_s
- my_tags = tags_set
- my_tags.empty? or my_tags = " #{my_tags}"
- "#<#{self.class} #{text.inspect}#{my_tags} #{similarity || 'n/a'}>"
- end
+ Record = Class.new Ollama::Documents::Cache::Records::Record
- def tags_set
- Ollama::Utils::Tags.new(tags, source:)
- end
-
- def ==(other)
- text == other.text
- end
-
- alias inspect to_s
- end
-
- def initialize(ollama:, model:, model_options: nil, collection: nil, cache: MemoryCache, redis_url: nil, debug: false)
+ def initialize(ollama:, model:, model_options: nil, collection: nil, embedding_length: 1_024, cache: MemoryCache, database_filename: nil, redis_url: nil, debug: false)
collection ||= default_collection
- @ollama, @model, @model_options, @collection =
- ollama, model, model_options, collection.to_sym
- @redis_url = redis_url
- @cache = connect_cache(cache)
- @debug = debug
+ @ollama, @model, @model_options, @collection, @debug =
+ ollama, model, model_options, collection.to_sym, debug
+ database_filename ||= ':memory:'
+ @cache = connect_cache(cache, redis_url, embedding_length, database_filename)
end
def default_collection
:default
end
@@ -108,44 +94,23 @@
def size
@cache.size
end
def clear(tags: nil)
- if tags
- tags = Ollama::Utils::Tags.new(Array(tags)).to_a
- @cache.each do |key, record|
- if (tags & record.tags).size >= 1
- @cache.delete(@cache.unpre(key))
- end
- end
- else
- @cache.clear
- end
+ @cache.clear(tags:)
self
end
- def find(string, tags: nil, prompt: nil)
- needle = convert_to_vector(string, prompt:)
- needle_norm = @cache.norm(needle)
- records = @cache
- if tags
- tags = Ollama::Utils::Tags.new(tags).to_a
- records = records.select { |_key, record| (tags & record.tags).size >= 1 }
- end
- records = records.sort_by { |key, record|
- record.key = key
- record.similarity = @cache.cosine_similarity(
- a: needle,
- b: record.embedding,
- a_norm: needle_norm,
- b_norm: record.norm,
- )
- }
- records.transpose.last&.reverse.to_a
+ def find(string, tags: nil, prompt: nil, max_records: nil)
+ needle = convert_to_vector(string, prompt:)
+ @cache.find_records(needle, tags:, max_records: nil)
end
def find_where(string, text_size: nil, text_count: nil, **opts)
+ if text_count
+ opts[:max_records] = text_count
+ end
records = find(string, **opts)
size, count = 0, 0
records.take_while do |record|
if text_size and (size += record.text.size) > text_size
next false
@@ -160,33 +125,41 @@
def collections
([ default_collection ] + @cache.collections('%s-' % self.class)).uniq
end
def tags
- @cache.each_with_object(Ollama::Utils::Tags.new) do |(_, record), t|
- record.tags.each do |tag|
- t.add(tag, source: record.source)
- end
- end
+ @cache.tags
end
private
- def connect_cache(cache_class)
+ def connect_cache(cache_class, redis_url, embedding_length, database_filename)
cache = nil
- if cache_class.instance_method(:redis)
+ if (cache_class.instance_method(:redis) rescue nil)
begin
- cache = cache_class.new(prefix:, url: @redis_url, object_class: Record)
+ cache = cache_class.new(prefix:, url: redis_url, object_class: Record)
cache.size
rescue Redis::CannotConnectError
STDERR.puts(
- "Cannot connect to redis URL #{@redis_url.inspect}, "\
+ "Cannot connect to redis URL #{redis_url.inspect}, "\
"falling back to MemoryCache."
)
end
+ elsif cache_class == SQLiteCache
+ cache = cache_class.new(
+ prefix:,
+ embedding_length:,
+ filename: database_filename,
+ debug: @debug
+ )
end
ensure
- cache ||= MemoryCache.new(prefix:)
+ cache ||= MemoryCache.new(prefix:,)
+ cache.respond_to?(:find_records) or cache.extend(Records::FindRecords)
+ cache.extend(Records::Tags)
+ if cache.respond_to?(:redis)
+ cache.extend(Records::RedisFullEach)
+ end
return cache
end
def convert_to_vector(input, prompt: nil)
if prompt