Sha256: db1c620ccaf6fbf0805042254b1665f1ce99f843b1c5e4958501e0d0b8bb0c8b

Contents?: true

Size: 1.4 KB

Versions: 1

Compression:

Stored size: 1.4 KB

Contents

require "open-uri"
require "json"

module Sakuru
  class Database
    attr_reader :files
    def initialize
      @inverted_index = {}
      @files = []
    end

    def add(path)
      if @files.include?(path)
        id = @files.index(path)
        @inverted_index.each do |key, posting_list|
          @inverted_index[key].delete(id)
        end
      else
        @files << path
        id = @files.index(path)
      end

      open(path) do |file|
        file.each_line do |line|
          # TODO: normalize and tokenize.
          line.split(/\s+/).each do |word|
            next if word.empty?
            @inverted_index[word] ||= []
            # TODO: add position
            @inverted_index[word] << id
          end
        end
      end
    end

    def search(query)
      results = {}
      # TODO: normalize and tokenize.
      ids = @inverted_index[query]
      return results unless ids
      ids.each do |id|
        file = @files[id]
        results[file] ||= 0
        results[file] += 1
      end
      results
    end

    def save(output_path)
      data = {
        "files" => @files,
        "inverted_index" => @inverted_index,
      }
      File.open(output_path, "w") do |file|
        JSON.dump(data, file)
      end
    end

    def load(saved_file_path)
      data = JSON.load(File.read(saved_file_path))
      @files = data["files"]
      @inverted_index = data["inverted_index"]
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
sakuru-0.0.1 lib/sakuru/database.rb