lib/code_zauker.rb in code_zauker-0.0.8 vs lib/code_zauker.rb in code_zauker-0.0.9

- old
+ new

@@ -211,15 +211,13 @@ # From 5.8 # to 7.6 Files per sec # changing multi into pipielined @redis.pipelined do s.each do | trigram | - @redis.sadd "trigram:#{trigram}",fid - @redis.sadd "fscan:trigramsOnFile:#{fid}", trigram - # Add the case-insensitive-trigram begin @redis.sadd "trigram:ci:#{trigram.downcase}",fid + @redis.sadd "fscan:trigramsOnFile:#{fid}", trigram rescue ArgumentError error=true end end end # multi/pipelined @@ -325,10 +323,13 @@ # = Do a case-insenitive search # using the special set of trigrams # "trigram:ci:*" # all downcase def isearch(term) + if term.length < GRAM_SIZE + raise "FATAL: #{term} is shorter then the minimum size of #{GRAM_SIZE} character" + end termLowercase=term.downcase() trigramInAnd=split_in_trigrams(termLowercase,"trigram:ci") if trigramInAnd.length==0 return [] end @@ -373,24 +374,14 @@ # = search # Find a list of file candidates to a search string # The search string is padded into trigrams + # Starting from 0.0.9 is case insensitive and + # equal to isearch def search(term) - if term.length < GRAM_SIZE - raise "FATAL: #{term} is shorter then the minimum size of #{GRAM_SIZE} character" - end - #puts " ** Searching: #{term}" - trigramInAnd=split_in_trigrams(term,"trigram") - #puts "Trigam conversion /#{term}/ into #{trigramInAnd}" - if trigramInAnd.length==0 - return [] - end - fileIds= @redis.sinter(*trigramInAnd) - fileNames=map_ids_to_files(fileIds) - #puts "DEBUG #{fileIds} #{fileNames}" - return fileNames + return self.isearch(term) end def reindex(fileList) #puts "Reindexing... #{fileList.length} files..." fileList.each do |current_file | @@ -429,11 +420,10 @@ trigramsToExpurge=@redis.smembers "fscan:trigramsOnFile:#{fid}" if trigramsToExpurge.length==0 puts "?Nothing to do on #{filename}" end puts "#{filename} id=#{fid} Trigrams: #{trigramsToExpurge.length} Expurging..." - trigramsToExpurge.each do | ts | - @redis.srem "trigram:#{ts}", fid + trigramsToExpurge.each do | ts | begin @redis.srem "trigram:ci:#{ts.downcase}",fid #putc "." rescue ArgumentError # Ignore "ArgumentError: invalid byte sequence in UTF-8"