lib/picky/cacher/partial/subtoken.rb in picky-0.0.9 vs lib/picky/cacher/partial/subtoken.rb in picky-0.1.0

- old
+ new

@@ -1,9 +1,32 @@ module Cacher module Partial + # Generates the right subtokens for use in the subtoken strategy. + # + class SubtokenGenerator + + attr_reader :down_to, :starting_at + + def initialize down_to, starting_at + @down_to, @starting_at = down_to, starting_at + + if @starting_at.zero? + def each_subtoken token, &block + token.each_subtoken @down_to, &block + end + else + def each_subtoken token, &block + token[0..@starting_at].intern.each_subtoken @down_to, &block + end + end + + end + + end + # The subtoken partial strategy. # # If given # "florian" # will index @@ -15,52 +38,56 @@ # "f" # Depending on what the given down_to value is. (Example with down_to == 1) # class Subtoken < Strategy - attr_reader :down_to, :starting_at - # Down to is how far it will go down in generating the subtokens. # # Examples: - # With :hello, and starting_at 0 + # With :hello, and starting_at -1 # * down to == 1: [:hello, :hell, :hel, :he, :h] # * down to == 4: [:hello, :hell] # - # With :hello, and starting_at -1 + # With :hello, and starting_at -2 # * down to == 1: [:hell, :hel, :he, :h] # * down to == 4: [:hell] # def initialize options = {} - @down_to = options[:down_to] || 1 - starting_at = options[:starting_at] || 0 - @starting_at = starting_at.zero? ? 0 : starting_at - 1 + down_to = options[:down_to] || 1 + starting_at = options[:starting_at] || -1 + @generator = SubtokenGenerator.new down_to, starting_at end + def down_to + @generator.down_to + end + def starting_at + @generator.starting_at + end # Generates a partial index from the given index. # def generate_from index result = {} # Generate for each key token the subtokens. # - i = 5000 + i = 0 index.each_key do |token| - i -= 1 - if i == 0 + i += 1 + if i == 5000 puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens." - i = 5000 + i = 0 end generate_for token, index, result end # Remove duplicate ids. # # TODO If it is unique for a subtoken, it is # unique for all derived longer tokens. # - result.each_value &:uniq! # Removed because of the set combination operation below + result.each_value &:uniq! result end private @@ -72,20 +99,19 @@ # "token" here means just text. # # TODO Could be improved by appending the aforegoing ids? # def generate_for token, index, result - clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym - clipped_token.subtokens(down_to).each do |subtoken| + @generator.each_subtoken(token) do |subtoken| if result[subtoken] result[subtoken] += index[token] # unique else - result[subtoken] = index[token].dup + result[subtoken] = index[token].dup # TODO Spec this dup end end end - + end - + end - + end \ No newline at end of file