Sha256: a02b6ddb72318552ab78fa7656f9ad36f6684da58f289b440f65f568eaaa0f24

Contents?: true

Size: 1.98 KB

Versions: 18

Compression:

Stored size: 1.98 KB

Contents

require 'spec_helper'

RSpec.describe Ollama::Documents::Splitters::Semantic do
  let :ollama do
    double('Ollama::Client')
  end

  let :splitter do
    described_class.new ollama:, model: 'mxbai-embed-large'
  end

  let :embeddings do
    JSON(File.read(asset('embeddings.json')))
  end

  it 'can be instantiated' do
    expect(splitter).to be_a described_class
  end

  before do
    allow(ollama).to receive(:embed).and_return(double(embeddings:))
  end

  it 'can split with breakpoint :percentile' do
    text = ([ "A" * 10 ] * 3 + [ "B" * 10 ] * 3 + [ "A" * 10 ] * 3) * ". "
    result = splitter.split(text, breakpoint: :percentile, percentile: 75)
    expect(result.count).to eq 3
    expect(result.to_a.join('').count(?A)).to eq text.count(?A)
    expect(result.to_a.join('').count(?B)).to eq text.count(?B)
  end

  it 'can split with breakpoint :percentile' do
    described_class.new ollama:, model: 'mxbai-embed-large', chunk_size: 50
    text = ([ "A" * 10 ] * 6 + [ "B" * 10 ] * 3 + [ "A" * 10 ] * 3) * ". "
    result = splitter.split(text, breakpoint: :percentile, percentile: 75)
    expect(result.count).to eq 4
    expect(result.to_a.join('').count(?A)).to eq text.count(?A)
    expect(result.to_a.join('').count(?B)).to eq text.count(?B)
  end

  it 'can split with breakpoint :standard_deviation' do
    text = ([ "A" * 10 ] * 3 + [ "B" * 10 ] * 3 + [ "A" * 10 ] * 3) * ". "
    result = splitter.split(text, breakpoint: :standard_deviation, percentage: 100)
    expect(result.count).to eq 3
    expect(result.to_a.join('').count(?A)).to eq text.count(?A)
    expect(result.to_a.join('').count(?B)).to eq text.count(?B)
  end

  it 'can split with breakpoint :interquartile' do
    text = ([ "A" * 10 ] * 3 + [ "B" * 10 ] * 3 + [ "A" * 10 ] * 3) * ". "
    result = splitter.split(text, breakpoint: :interquartile, percentage: 75)
    expect(result.count).to eq 3
    expect(result.to_a.join('').count(?A)).to eq text.count(?A)
    expect(result.to_a.join('').count(?B)).to eq text.count(?B)
  end
end

Version data entries

18 entries across 18 versions & 1 rubygems

Version Path
ollama-ruby-0.12.1 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.12.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.11.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.10.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.9.3 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.9.2 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.9.1 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.9.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.8.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.7.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.6.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.5.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.4.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.3.2 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.3.1 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.3.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.2.0 spec/ollama/documents/splitters/semantic_spec.rb
ollama-ruby-0.1.0 spec/ollama/documents/splitters/semantic_spec.rb