Sha256: d643fd564be19a90cd35482a0cfb32922f34ed2a9765130bf45b1fa0f5c9144c

Contents?: true

Size: 1.54 KB

Versions: 1

Compression:

Stored size: 1.54 KB

Contents

# encoding: utf-8
#
module Indexers

  # Uses a category to index its data.
  #
  # Note: It is called serial since it indexes each
  #
  # FIXME Giving the serial a category would be enough, since it already contains an index!
  #
  class Serial < Base

    attr_reader :category

    delegate :source, :to => :category

    def initialize category
      @category = category
    end

    # The tokenizer used is a cached tokenizer from the category.
    #
    def tokenizer
      @tokenizer ||= category.tokenizer
    end

    def process
      comma   = ?,
      newline = ?\n

      # TODO Move open to config?
      #
      # @category.prepared_index do |file|
      #   source.harvest(@index, @category) do |indexed_id, text|
      #     tokenizer.tokenize(text).each do |token_text|
      #       next unless token_text
      #       file.buffer indexed_id << comma << token_text << newline
      #     end
      #     file.write_maybe
      #   end
      # end
      #
      local_tokenizer = tokenizer
      category.prepared_index_file do |file|
        result = []
        source.harvest(category) do |indexed_id, text|
          local_tokenizer.tokenize(text).each do |token_text|
            next unless token_text
            result << indexed_id << comma << token_text << newline
          end
          file.write(result.join) && result.clear if result.size > 100_000
        end
        file.write result.join
      end
    end
    #
    #
    def indexing_message
      timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
picky-2.2.0 lib/picky/internals/indexers/serial.rb