Sha256: 5c0249628fc8e9e1b9956bf8017e368a72dd4f27a16dc4625da3ee000509fbbc

Contents?: true

Size: 1.02 KB

Versions: 5

Compression:

Stored size: 1.02 KB

Contents

# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License::   LGPL

module ClassifierReborn
  module Summarizer
    extend self

    def summary( str, count=10, separator=" [...] " )
      perform_lsi split_sentences(str), count, separator
    end

    def paragraph_summary( str, count=1, separator=" [...] " )
      perform_lsi split_paragraphs(str), count, separator
    end

    def split_sentences(str)
      str.split /(\.|\!|\?)/ # TODO: make this less primitive
    end

    def split_paragraphs(str)
      str.split /(\n\n|\r\r|\r\n\r\n)/ # TODO: make this less primitive
    end

    def perform_lsi(chunks, count, separator)
      lsi = ClassifierReborn::LSI.new :auto_rebuild => false
      chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
      lsi.build_index
      summaries = lsi.highest_relative_content count
      return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
    end
  end
end

Version data entries

5 entries across 5 versions & 2 rubygems

Version Path
scoot-0.0.4 .bundle/gems/ruby/2.2.0/gems/classifier-reborn-2.0.3/lib/classifier-reborn/lsi/summarizer.rb
classifier-reborn-2.0.3 lib/classifier-reborn/lsi/summarizer.rb
classifier-reborn-2.0.2 lib/classifier-reborn/lsi/summarizer.rb
classifier-reborn-2.0.1 lib/classifier-reborn/lsi/summarizer.rb
classifier-reborn-2.0.0 lib/classifier-reborn/lsi/summarizer.rb