Sha256: 388b88141c540999271b17067470d99224d19ad1acea4d30c57821bd467db61e

Contents?: true

Size: 1.7 KB

Versions: 68

Compression:

Stored size: 1.7 KB

Contents

# encoding: utf-8
#
module Picky

  module Indexers

    # Uses a category to index its data.
    #
    # Note: It is called serial since it indexes each category separately.
    #
    class Serial < Base

      # Harvest the data from the source, tokenize,
      # and write to an intermediate "prepared index" file.
      #
      # Parameters:
      #  * categories: An enumerable of Category-s.
      #
      def process source_for_prepare, categories, scheduler = Scheduler.new
        categories.each do |category|

          category.prepared_index_file do |file|

            datas = []
            result = []
            tokenizer = category.tokenizer

            reset source_for_prepare

            source.harvest(category) do |*data|

              # Accumulate data.
              #
              datas << data
              next if datas.size < 10_000

              # Opening the file inside the scheduler to
              # have it automagically closed.
              #
              index_flush datas, file, result, tokenizer

              datas.clear

            end

            index_flush datas, file, result, tokenizer

            yield file
          end
        end

      end

      def index_flush datas, file, cache, tokenizer
        comma   = ?,
        newline = ?\n

        datas.each do |indexed_id, text|
          tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
          tokens.each do |token_text|
            next unless token_text
            cache << indexed_id << comma << token_text << newline
          end
        end

        flush file, cache
      end

      def flush prepared_file, cache
        prepared_file.write(cache.join) && cache.clear
      end

    end
  end

end

Version data entries

68 entries across 68 versions & 1 rubygems

Version Path
picky-4.13.1 lib/picky/indexers/serial.rb
picky-4.13.0 lib/picky/indexers/serial.rb
picky-4.12.13 lib/picky/indexers/serial.rb
picky-4.12.12 lib/picky/indexers/serial.rb
picky-4.12.11 lib/picky/indexers/serial.rb
picky-4.12.10 lib/picky/indexers/serial.rb
picky-4.12.8 lib/picky/indexers/serial.rb
picky-4.12.7 lib/picky/indexers/serial.rb
picky-4.12.6 lib/picky/indexers/serial.rb
picky-4.12.5 lib/picky/indexers/serial.rb
picky-4.12.4 lib/picky/indexers/serial.rb
picky-4.12.3 lib/picky/indexers/serial.rb
picky-4.12.2 lib/picky/indexers/serial.rb
picky-4.12.1 lib/picky/indexers/serial.rb
picky-4.12.0 lib/picky/indexers/serial.rb
picky-4.11.3 lib/picky/indexers/serial.rb
picky-4.11.2 lib/picky/indexers/serial.rb
picky-4.11.1 lib/picky/indexers/serial.rb
picky-4.11.0 lib/picky/indexers/serial.rb
picky-4.10.0 lib/picky/indexers/serial.rb