Sha256: f5e1adf3aa81c1d552a07052c467da03c31746d84fa8d2381db2ef0c9aeeafd7

Contents?: true

Size: 1.34 KB

Versions: 1

Compression:

Stored size: 1.34 KB

Contents

# encoding: utf-8
#
module Indexers

  # Uses a category to index its data.
  #
  # Note: It is called serial since it indexes each category separately.
  #
  class Serial < Base

    # Harvest the data from the source, tokenize,
    # and write to an intermediate "prepared index" file.
    #
    # Parameters:
    #  * categories: An enumerable of Category-s.
    #
    def process categories
      comma   = ?,
      newline = ?\n

      categories.each do |category|

        tokenizer = category.tokenizer

        category.prepared_index_file do |file|
          result = []

          source.harvest(category) do |indexed_id, text|
            tokenizer.tokenize(text).each do |token_text|
              next unless token_text
              result << indexed_id << comma << token_text << newline
            end
            file.write(result.join) && result.clear if result.size > 100_000
          end

          timed_exclaim %Q{"#{@index_or_category.identifier}":   => #{file.path}.}

          file.write result.join
        end

      end

    end

    #
    #
    def start_indexing_message # :nodoc:
      timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
    end
    def finish_indexing_message # :nodoc:
      timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
picky-2.7.0 lib/picky/indexers/serial.rb