Sha256: 094b8513c69c9ea6d74dc6290bc0ee848f5578f8488ba14f39e398447de144d6

Contents?: true

Size: 1.04 KB

Versions: 1

Compression:

Stored size: 1.04 KB

Contents

# frozen_string_literal: true

# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License::   LGPL

module ClassifierReborn
  module Summarizer
    module_function

    def summary(str, count = 10, separator = ' [...] ')
      perform_lsi split_sentences(str), count, separator
    end

    def paragraph_summary(str, count = 1, separator = ' [...] ')
      perform_lsi split_paragraphs(str), count, separator
    end

    def split_sentences(str)
      str.split(/(\.|\!|\?)/) # TODO: make this less primitive
    end

    def split_paragraphs(str)
      str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive
    end

    def perform_lsi(chunks, count, separator)
      lsi = ClassifierReborn::LSI.new auto_rebuild: false
      chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
      lsi.build_index
      summaries = lsi.highest_relative_content count
      summaries.select { |chunk| summaries.include? chunk }.map(&:strip).join(separator)
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
classifier-reborn-2.3.0 lib/classifier-reborn/lsi/summarizer.rb