lib/dimsum.rb in dimsum-0.0.1 vs lib/dimsum.rb in dimsum-0.0.2
- old
+ new
@@ -1,5 +1,38 @@
require "dimsum/version"
module Dimsum
- # Your code goes here...
+ def reservoir_file(filename, sample_size)
+ random = Random.new
+ out = []
+
+ File.open(filename, "r").each_line.each_with_index do |line, line_number|
+ keep, index = keep_line_in_index(line_number, sample_size, random)
+ out[index] = line if keep
+ end
+ out
+ end
+
+ def reservoir_stdin(sample_size)
+ random = Random.new
+ input_line_number = 0
+ out = []
+
+ STDIN.readlines.each do |line|
+ keep, index = keep_line_in_index(input_line_number, sample_size, random)
+ out[index] = line if keep
+ input_line_number += 1
+ end
+
+ out
+ end
+
+ def keep_line_in_index(input_line_number, sample_size, random_number_generator)
+ if input_line_number <= sample_size
+ [true, input_line_number]
+ else
+ r = random_number_generator.rand(0 .. input_line_number)
+ keep = r < sample_size
+ [keep, r]
+ end
+ end
end