Sha256: 8f2fa956d7e354f38b20498a1644e14775484423e786aa81d8bd5f8813dcb78e

Contents?: true

Size: 1.65 KB

Versions: 1

Compression:

Stored size: 1.65 KB

Contents

# frozen_string_literal: true

require 'hnswlib'

module Boxcars
  module VectorStores
    class SimilaritySearch
      def initialize(embeddings:, vector_store:, openai_connection: nil, openai_access_token: nil)
        @embeddings = embeddings
        @vector_store = vector_store
        @similarity_search_instance = create_similarity_search_instance
        @openai_connection = openai_connection || default_connection(openai_access_token: openai_access_token)
      end

      def call(query:)
        validate_query(query)
        query_vector = convert_query_to_vector(query)
        @similarity_search_instance.call(query_vector)
      end

      private

      attr_reader :embeddings, :vector_store, :openai_connection

      def default_connection(openai_access_token: nil)
        Openai.open_ai_client(openai_access_token: openai_access_token)
      end

      def validate_query(query)
        raise_error 'query must be a string' unless query.is_a?(String)
        raise_error 'query must not be empty' if query.empty?
      end

      def convert_query_to_vector(query)
        Boxcars::VectorStores::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
      end

      def create_similarity_search_instance
        case vector_store
        when ::Hnswlib::HierarchicalNSW
          Boxcars::VectorStores::Hnswlib::HnswlibSearch.new(
            vector_store: vector_store,
            options: { json_doc_path: embeddings, num_neighbors: 2 }
          )
        else
          raise_error 'Unsupported vector store provided'
        end
      end

      def raise_error(message)
        raise ArgumentError, message
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
boxcars-0.2.10 lib/boxcars/boxcar/vector_stores/similarity_search.rb