Sha256: 6292a64dd14a432e170dba83c7f2ec4371e7dd33b1286f2bc50a4e376bf14e16

Contents?: true

Size: 1.5 KB

Versions: 23

Compression:

Stored size: 1.5 KB

Contents

# encoding: utf-8
#
module Picky

  module Indexers

    # Uses a category to index its data.
    #
    # Note: It is called serial since it indexes each category separately.
    #
    class Serial < Base

      # Harvest the data from the source, tokenize,
      # and write to an intermediate "prepared index" file.
      #
      # Parameters:
      #  * categories: An enumerable of Category-s.
      #
      def process categories
        comma   = ?,
        newline = ?\n

        categories.each do |category|

          tokenizer = category.tokenizer

          category.prepared_index_file do |file|
            result = []

            source.harvest(category) do |indexed_id, text|
              tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
              tokens.each do |token_text|
                next unless token_text
                result << indexed_id << comma << token_text << newline
              end
              file.write(result.join) && result.clear if result.size > 100_000
            end

            timed_exclaim %Q{"#{@index_or_category.identifier}":   => #{file.path}.}

            file.write result.join
          end

        end

      end

      #
      #
      def start_indexing_message # :nodoc:
        timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
      end
      def finish_indexing_message # :nodoc:
        timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
      end

    end
  end

end

Version data entries

23 entries across 23 versions & 1 rubygems

Version Path
picky-3.4.3 lib/picky/indexers/serial.rb
picky-3.4.2 lib/picky/indexers/serial.rb
picky-3.4.1 lib/picky/indexers/serial.rb
picky-3.4.0 lib/picky/indexers/serial.rb
picky-3.3.3 lib/picky/indexers/serial.rb
picky-3.3.2 lib/picky/indexers/serial.rb
picky-3.3.1 lib/picky/indexers/serial.rb
picky-3.3.0 lib/picky/indexers/serial.rb
picky-3.2.0 lib/picky/indexers/serial.rb
picky-3.1.13 lib/picky/indexers/serial.rb
picky-3.1.12 lib/picky/indexers/serial.rb
picky-3.1.11 lib/picky/indexers/serial.rb
picky-3.1.10 lib/picky/indexers/serial.rb
picky-3.1.9 lib/picky/indexers/serial.rb
picky-3.1.8 lib/picky/indexers/serial.rb
picky-3.1.7 lib/picky/indexers/serial.rb
picky-3.1.6 lib/picky/indexers/serial.rb
picky-3.1.5 lib/picky/indexers/serial.rb
picky-3.1.4 lib/picky/indexers/serial.rb
picky-3.1.3 lib/picky/indexers/serial.rb