lib/soulmate/loader.rb in soulmate-0.0.4 vs lib/soulmate/loader.rb in soulmate-0.0.5

- old
+ new

@@ -17,29 +17,43 @@ # everything will work itself out as soon as the cache expires again. # delete the data stored for this type Soulmate.redis.del(database) - items_loaded = 0 items.each_with_index do |item, i| - id = item["id"] - term = item["term"] - score = item["score"] + add(item, :skip_duplicate_check => true) + end + end - if id and term - # store the raw data in a separate key to reduce memory usage - Soulmate.redis.hset(database, id, JSON.dump(item)) + # "id", "term", "score", "aliases", "data" + def add(item, opts = {}) + opts = { :skip_duplicate_check => false }.merge(opts) + raise ArgumentError unless item["id"] && item["term"] + + # kill any old items with this id + remove("id" => item["id"]) unless opts[:skip_duplicate_check] + + # store the raw data in a separate key to reduce memory usage + Soulmate.redis.hset(database, item["id"], MultiJson.encode(item)) + phrase = ([item["term"]] + (item["aliases"] || [])).join(' ') + prefixes_for_phrase(phrase).each do |p| + Soulmate.redis.sadd(base, p) # remember this prefix in a master set + Soulmate.redis.zadd("#{base}:#{p}", item["score"], item["id"]) # store the id of this term in the index + end + end - phrase = ([term] + (item["aliases"] || [])).join(' ') - prefixes_for_phrase(phrase).uniq.each do |p| - Soulmate.redis.sadd(base, p) # remember this prefix in a master set - Soulmate.redis.zadd("#{base}:#{p}", score, id) # store the id of this term in the index - end - items_loaded += 1 + # remove only cares about an item's id, but for consistency takes an object + def remove(item) + prev_item = Soulmate.redis.hget(database, item["id"]) + if prev_item + prev_item = MultiJson.decode(prev_item) + # undo the operations done in add + Soulmate.redis.hdel(database, prev_item["id"]) + phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ') + prefixes_for_phrase(phrase).each do |p| + Soulmate.redis.srem(base, p) + Soulmate.redis.zrem("#{base}:#{p}", prev_item["id"]) end - puts "added #{i} entries" if i % 100 == 0 and i != 0 end - - items_loaded end end end \ No newline at end of file