Sha256: 852ee2c9ab2c6ba4cb7d616bd22ca7280f4aafad4bff8615907e888d640cc76e

Contents?: true

Size: 1.13 KB

Versions: 21

Compression:

Stored size: 1.13 KB

Contents

# encoding: utf-8
#
module Picky

  module Indexers

    # Uses a category to index its data.
    #
    # Note: It is called serial since it indexes each category separately.
    #
    class Serial < Base

      # Harvest the data from the source, tokenize,
      # and write to an intermediate "prepared index" file.
      #
      # Parameters:
      #  * categories: An enumerable of Category-s.
      #
      def process categories
        comma   = ?,
        newline = ?\n

        categories.each do |category|

          tokenizer = category.tokenizer

          category.prepared_index_file do |file|
            result = []

            source.harvest(category) do |indexed_id, text|
              tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
              tokens.each do |token_text|
                next unless token_text
                result << indexed_id << comma << token_text << newline
              end
              file.write(result.join) && result.clear if result.size > 100_000
            end

            yield file

            file.write result.join
          end

        end

      end

    end
  end

end

Version data entries

21 entries across 21 versions & 1 rubygems

Version Path
picky-3.6.16 lib/picky/indexers/serial.rb
picky-3.6.15 lib/picky/indexers/serial.rb
picky-3.6.14 lib/picky/indexers/serial.rb
picky-3.6.13 lib/picky/indexers/serial.rb
picky-3.6.12 lib/picky/indexers/serial.rb
picky-3.6.11 lib/picky/indexers/serial.rb
picky-3.6.10 lib/picky/indexers/serial.rb
picky-3.6.9 lib/picky/indexers/serial.rb
picky-3.6.8 lib/picky/indexers/serial.rb
picky-3.6.7 lib/picky/indexers/serial.rb
picky-3.6.6 lib/picky/indexers/serial.rb
picky-3.6.4 lib/picky/indexers/serial.rb
picky-3.6.3 lib/picky/indexers/serial.rb
picky-3.6.2 lib/picky/indexers/serial.rb
picky-3.6.1 lib/picky/indexers/serial.rb
picky-3.6.0 lib/picky/indexers/serial.rb
picky-3.5.4 lib/picky/indexers/serial.rb
picky-3.5.3 lib/picky/indexers/serial.rb
picky-3.5.2 lib/picky/indexers/serial.rb
picky-3.5.1 lib/picky/indexers/serial.rb