Sha256: 20d3c63ea4449f91f0181c2baa7103b85d5024d1013cd212d5d8385579501b09

Contents?: true

Size: 1.55 KB

Versions: 3

Compression:

Stored size: 1.55 KB

Contents

require 'ftsearch/fragment_writer'
require 'ftsearch/analysis/simple_identifier_analyzer'
#require 'ftsearchrt'

class Shoes::Search
  include FTSearch
  attr_reader :index
  def initialize fields = [:uri, :body]
    field_infos = FTSearch::FieldInfos.new
    fields.each do |name|
      field_infos.add_field :name => name,
        :analyzer => FTSearch::Analysis::SimpleIdentifierAnalyzer.new
    end
    @index = FTSearch::FragmentWriter.new :path => nil, :field_infos => field_infos
  end
  def add_document hsh
    @index.add_document hsh
  end
  def finish!
    @index.finish!

    @ft = FulltextReader.new :io => StringIO.new(@index.fulltext_writer.data)
    @sa = SuffixArrayReader.new @ft, nil, :io => StringIO.new(@index.suffix_array_writer.data)
    @dm = DocumentMapReader.new :io => StringIO.new(@index.doc_map_writer.data)
  end
  def find_all terms, show = 20, prob_sort = false
    h = Hash.new{|h,k| h[k] = 0}
    weights = Hash.new(1.0)
    weights[0] = 10000000   # :uri
    weights[1] = 10000000  # :body
    hits = @sa.find_all terms
    size = hits.size
    if prob_sort && size > 10000
      iterations = 50 * Math.sqrt(size)
      offsets = @sa.lazyhits_to_offsets(hits)
      weight_arr = weights.sort_by{|id,w| id}.map{|_,v| v}
      sorted = @dm.rank_offsets_probabilistic(offsets, weight_arr, iterations)
    else
      offsets = @sa.lazyhits_to_offsets(hits)
      sorted = @dm.rank_offsets(offsets, weights.sort_by{|id,w| id}.map{|_,v| v})
    end
    sorted[0..show].map do |doc_id, count|
      [@dm.document_id_to_uri(doc_id), count]
    end
  end
end

Version data entries

3 entries across 3 versions & 2 rubygems

Version Path
shoes-3.0.1 lib/shoes/search.rb
shoesgem-0.1514.0 shoes/lib/shoes/search.rb
shoesgem-0.1424.0 shoes/lib/shoes/search.rb