Sha256: 49b0f9a71663120099418efbd39b1440e39accae2e1239dfeedb41919e6132d1

Contents?: true

Size: 1.34 KB

Versions: 5

Compression:

Stored size: 1.34 KB

Contents

#!/usr/bin/env ruby
# -*- encoding: utf-8 -*-
# Copyright Steffie Dorn <mail@muflax.com>, 2019
# License: GNU APGLv3 (or later) <http://www.gnu.org/copyleft/gpl.html>

require "forwardable"

class Reservoir
  include Enumerable
  extend Forwardable

  attr_accessor :sample_size, :total

  def initialize sample_size
    @sample_size	= sample_size
    @total      	= 0
    @reservoir  	= []
  end

  def <<(obj)
    if @total < @sample_size
      # fill empty slot in the reservoir
      @reservoir << obj
    else
      # randomly replace elements in the reservoir with a decreasing probability
      r = rand(0..@total)
      @reservoir[r] = obj if r < @sample_size
    end

    @total += 1
  end

  def add list	; list.each{|x| self << x}       	; end
  def clear   	; @reservoir.clear; @total = 0   	; end
  def full?   	; @reservoir.size >= @sample_size	; end

  def_delegators :@reservoir, :each, :size, :empty?, :uniq
end

class HardReservoir < Reservoir
  # assumes examples are pre-randomized, but ensures that all elements are unique

  def initialize sample_size
    @sample_size = sample_size
    @total       = 0
    @reservoir   = Set.new
  end

  def <<(obj)
    # *only* fill empty slot in the reservoir
    @reservoir << obj if @reservoir.size < @sample_size

    @total += 1
  end

  def uniq *args, &block	; @reservoir.to_a.uniq(*args, &block)	; end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
muflax-0.5.5 lib/muflax/reservoir.rb
muflax-0.5.3 lib/muflax/reservoir.rb
muflax-0.5.2 lib/muflax/reservoir.rb
muflax-0.5.1 lib/muflax/reservoir.rb
muflax-0.5.0 lib/muflax/reservoir.rb