Sha256: bc28d238101d0617c7751d09ea8f42d5993e723c573263f92cfd1bfe5ea65cfc

Contents?: true

Size: 909 Bytes

Versions: 5

Compression:

Stored size: 909 Bytes

Contents

# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License::   LGPL

class String
  def summary(count = 10, separator = ' [...] ')
    perform_lsi split_sentences, count, separator
  end

  def paragraph_summary(count = 1, separator = ' [...] ')
    perform_lsi split_paragraphs, count, separator
  end

  def split_sentences
    split(/(\.|!|\?)/) # TODO: make this less primitive
  end

  def split_paragraphs
    split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive
  end

  private

  def perform_lsi(chunks, count, separator)
    lsi = Classifier::LSI.new auto_rebuild: false
    chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
    lsi.build_index
    summaries = lsi.highest_relative_content count
    summaries.select { |chunk| summaries.include?(chunk) }.map(&:strip).join(separator)
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
classifier-1.4.4 lib/classifier/lsi/summary.rb
classifier-1.4.3 lib/classifier/lsi/summary.rb
classifier-1.4.2 lib/classifier/lsi/summary.rb
classifier-1.4.1 lib/classifier/lsi/summary.rb
classifier-1.4.0 lib/classifier/lsi/summary.rb