Sha256: eea2c1ca3d953c19d5c1c0dce2833d151a92285db086b02c0647777919dd7cf0

Contents?: true

Size: 1.17 KB

Versions: 3

Compression:

Stored size: 1.17 KB

Contents

# encoding: utf-8
#
module Indexers

  # Uses a category to index its data.
  #
  # Note: It is called serial since it indexes each
  #
  class Serial < Base

    attr_reader :category

    delegate :source, :to => :category

    def initialize category
      @category = category
    end

    # The tokenizer used is a cached tokenizer from the category.
    #
    def tokenizer
      @tokenizer ||= category.tokenizer
    end

    # Harvest the data from the source, tokenize,
    # and write to an intermediate "prepared index" file.
    #
    def process
      comma   = ?,
      newline = ?\n

      local_tokenizer = tokenizer
      category.prepared_index_file do |file|
        result = []
        source.harvest(category) do |indexed_id, text|
          local_tokenizer.tokenize(text).each do |token_text|
            next unless token_text
            result << indexed_id << comma << token_text << newline
          end
          file.write(result.join) && result.clear if result.size > 100_000
        end
        file.write result.join
      end
    end
    #
    #
    def indexing_message
      timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
    end

  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
picky-2.4.0 lib/picky/internals/indexers/serial.rb
picky-2.3.0 lib/picky/internals/indexers/serial.rb
picky-2.2.1 lib/picky/internals/indexers/serial.rb