Sha256: 6e5d261ee44dc47c1b2c5b912a00a51ee7664a164893f3fbe8f2470678f948d3
Contents?: true
Size: 1.49 KB
Versions: 2
Compression:
Stored size: 1.49 KB
Contents
# encoding: utf-8 # module Indexers # Uses a number of categories, a source, and a tokenizer to index data. # # The tokenizer is taken from each category if specified, from the index, if not. # class Parallel < Base delegate :categories, :source, :to => :@index def initialize index @index = index end def process comma = ?, newline = ?\n # Prepare a combined object - array. # combined = categories.map { |category| [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)] } # Index. # i = 0 source.each do |object| id = object.id # This needs to be rewritten. # # Is it a good idea that not the tokenizer has control over when he gets the next text? # combined.each do |category, cache, _, tokenizer| tokenizer.tokenize(object.send(category.from).to_s).each do |token_text| next unless token_text cache << id << comma << token_text << newline end end if i >= 100_000 flush combined i = 0 end i += 1 end flush combined combined.each { |_, _, file, _| file.close } end def flush combined # :nodoc: combined.each do |_, cache, file, _| file.write(cache.join) && cache.clear end end # # def indexing_message # :nodoc: timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.} end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
picky-2.4.2 | lib/picky/internals/indexers/parallel.rb |
picky-2.4.1 | lib/picky/internals/indexers/parallel.rb |