Sha256: c5b6cc6952425fa85fc59d54ac91b5318f1e47e9f6700693db98458a0693595e

Contents?: true

Size: 1.2 KB

Versions: 7

Compression:

Stored size: 1.2 KB

Contents

# encoding: utf-8
#
module Indexers

  # Uses a category to index its data.
  #
  # Note: It is called serial since it indexes each category separately.
  #
  class Serial < Base

    attr_reader :category

    delegate :source, :to => :category

    def initialize category
      @category = category
    end

    # The tokenizer used is a cached tokenizer from the category.
    #
    def tokenizer
      @tokenizer ||= category.tokenizer
    end

    # Harvest the data from the source, tokenize,
    # and write to an intermediate "prepared index" file.
    #
    def process
      comma   = ?,
      newline = ?\n

      local_tokenizer = tokenizer
      category.prepared_index_file do |file|
        result = []
        source.harvest(category) do |indexed_id, text|
          local_tokenizer.tokenize(text).each do |token_text|
            next unless token_text
            result << indexed_id << comma << token_text << newline
          end
          file.write(result.join) && result.clear if result.size > 100_000
        end
        file.write result.join
      end
    end
    #
    #
    def indexing_message # :nodoc:
      timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
    end

  end
end

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
picky-2.6.0 lib/picky/indexers/serial.rb
picky-2.5.2 lib/picky/internals/indexers/serial.rb
picky-2.5.1 lib/picky/internals/indexers/serial.rb
picky-2.5.0 lib/picky/internals/indexers/serial.rb
picky-2.4.3 lib/picky/internals/indexers/serial.rb
picky-2.4.2 lib/picky/internals/indexers/serial.rb
picky-2.4.1 lib/picky/internals/indexers/serial.rb