Sha256: 5c0249628fc8e9e1b9956bf8017e368a72dd4f27a16dc4625da3ee000509fbbc
Contents?: true
Size: 1.02 KB
Versions: 5
Compression:
Stored size: 1.02 KB
Contents
# Author:: Lucas Carlson (mailto:lucas@rufy.com) # Copyright:: Copyright (c) 2005 Lucas Carlson # License:: LGPL module ClassifierReborn module Summarizer extend self def summary( str, count=10, separator=" [...] " ) perform_lsi split_sentences(str), count, separator end def paragraph_summary( str, count=1, separator=" [...] " ) perform_lsi split_paragraphs(str), count, separator end def split_sentences(str) str.split /(\.|\!|\?)/ # TODO: make this less primitive end def split_paragraphs(str) str.split /(\n\n|\r\r|\r\n\r\n)/ # TODO: make this less primitive end def perform_lsi(chunks, count, separator) lsi = ClassifierReborn::LSI.new :auto_rebuild => false chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 } lsi.build_index summaries = lsi.highest_relative_content count return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator) end end end
Version data entries
5 entries across 5 versions & 2 rubygems