Sha256: aa7842424b6dad8c740fdd0188b683519a04729c0e40822154749ae40ad058ec

Contents?: true

Size: 1.2 KB

Versions: 3

Compression:

Stored size: 1.2 KB

Contents

module Semantic
  class Search

    def initialize(documents, options = {})
      options = {
        :transforms => [:TFIDF, :LSA],
        :verbose    => false,
        :filter_stop_words => true,
        :stem_words => true,
      }.merge(options)
      Semantic.logger.level = options[:verbose] ? Logger::INFO : Logger::ERROR


      @builder = VectorSpace::Builder.new(:filter_stop_words => options[:filter_stop_words], :stem_words => options[:stem_words])
      @matrix_transformer = MatrixTransformer.new(options[:transforms])

      @vector_space_model = @builder.build_document_matrix(documents)

      Semantic.logger.info(@vector_space_model)

      @vector_space_model = @matrix_transformer.apply_transforms(@vector_space_model)
    end

    def related(document_id)
      ratings = []
      @vector_space_model.each_column do |column|
        ratings << Compare.similarity(@vector_space_model.column(document_id), column)
      end
      ratings
    end

    def search(search_list)
      ratings = []
      query_vector = @builder.build_query_vector(search_list)
      @vector_space_model.each_column do |column|
        ratings << Compare.similarity(query_vector.col, column)
      end
      ratings
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
rsemantic-0.2.1 lib/semantic/search.rb
rsemantic-0.2.0 lib/semantic/search.rb
rsemantic-0.1.4 lib/semantic/search.rb