module RubyLabs =begin rdoc == HashLab The HashLab module has definitions of classes and methods used in the projects for Chapter 6 of Explorations in Computing. The module has methods used to compute hash functions, simple stand-alone methods named +insert+ and +lookup+ to demonsstrate how hash functions work, and a HashTable class for larger experiments with hash tables. =end module HashLab # When a hash table is drawn on the screen the +drawing+ attribute is set to a # TableView struct that describes the drawing. TableView = Struct.new(:cells, :buckets, :nrows, :options) # Default options for drawing a hash table on the canvas @@tableOptions = { :tableX => 20, :tableY => 20, :cellHeight => 15, :cellYSpace => 2, :cellWidth => 30, :cellColor => :lightgray, :textX => 70, :maxRows => 26, } # Compute the wighted sum of the ordinal values of characters in a string, where the # weight for a character is defined by its position in the string: the weight for the # character +i+ positions from the right is 26**+i+. The numeric value of a character # is determined by Fixnum#ord, an extension to the Fixnum class, which is defined in # rubylabs.rb. Upper and lower case letters are mapped to integer from 0 to 25, while # other characters are unchanged. # # Examples: # >> radix26("be") # => 30 # >> radix26("bee") # => 784 # >> radix26("beetle") # => 13792718 # >> radix26("beethoven") # => 242419173737 # #-- # :begin :radix26 def radix26(s) x = 0 s.each_char do |b| x = x * 26 + b.iord end return x end # :end :radix26 # Trival hash function that uses only the first letter in the input string. +s+ is the # string to hash, +n+ is the size of the hash table. # # Example: # >> ?b.iord # => 1 # >> h0("beer", 10) # => 1 # #-- # :begin :h0 def h0(s, n) return s[0].iord % n end # :end :h0 # A hash function that uses the first two letters in the input string, weighing the # first letter by 26**1 = 26 and the second by 26**0 = 1. +s+ is the # string to hash, +n+ is the size of the hash table. # # Example: # >> ?b.iord * 26 + ?e.iord # => 30 # >> h1("beer", 10) # => 0 # #-- # :begin :h1 def h1(s, n) return ((s[0].iord * 26) + s[1].iord) % n end # :end :h1 # A hash function based on the radix-26 representation of the full input string. +s+ is the # string to hash, +n+ is the size of the hash table. # # Example: # >> radix26("beer") # => 20401 # >> hr("beer", 10) # => 1 # #-- # :begin :hr def hr(s, n) return radix26(s) % n end # :end :hr # Simple hash function used in the first project, to introduce the idea of hashing. # Uses the first letter in tring +s+ to find where it would go in a table of # size +n+ (which has a default value of 10). # # Example: # >> ?z.iord # => 25 # >> h("zymurgy") # => 5 # >> h("zymurgy", 15) # => 10 # #-- # :begin :h def h(s, n = 10) return s[0].iord % n end # :end :h # Insert string +s+ into array +t+. The location for +s+ # is determined by the hash function implemented in method +h+. # If +s+ can be added to +t+ (i.e. if there is no # collision) return the location where +s+ is stored, otherwise return +nil+. # # This method is intended to be used to demonstrate how hash fucnctions # work; for a more complete implementation see HashTable#insert. # # Example: # >> t = Array.new(10) # => [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil] # >> insert("delta", t) # => 3 # >> t # => [nil, nil, nil, "delta", nil, nil, nil, nil, nil, nil] # >> insert("derp", t) # => nil # #-- # :begin :insert def insert(s, t) i = h(s, t.length) if t[i].nil? t[i] = s return i else return nil end end # :end :insert # Look for string +s+ in array +t+, returning the location where +s+ is stored # if it is in the array, otherwise +nil+. If +s+ is in +t+ it will be in the # row computed by the hash function implmeneted in method +h+. # # This method is for demonstrations of simple hash tables; for experiments # use tHashTable#insert and #HashTable#lookup. # # Example: # >> t # => [nil, nil, nil, "delta", nil, nil, nil, nil, nil, nil] # >> lookup("delta", t) # => 3 # >> lookup("epsilon", t) # => nil # #-- # :begin :lookup def lookup(s, t) i = h(s, t.length) if t[i] == s return i else return nil end end # :end :lookup # Print a nicely formatted representation of table +t. The argument +t+ can be # an array or a HashTable object. To make the table structure easier to see only # non-empty rows are printed, one row per line. The row number is printed at the # front of the line. The optional parameter +max+ # is the number of rows to print. # # Example: # >> t # => ["upsilon", nil, nil, "delta", "omega", nil, nil, nil, nil, nil] # >> print_table(t) # 0: "upsilon" # 3: "delta" # 4: "omega" # => nil # def print_table(t, max = nil) t = t.table if t.class == HashTable max = t.length unless max max.times { |i| print_row(i, t[i] ) unless t[i].nil? } return nil end # Helper method called by print_table. Print a row number followed # by the contents of the row. def print_row(n, row) printf "%4d: %s\n", n, row.inspect end # Initialize the RubyLabs Canvas and draw a picture of hash table +t+. # Subsequent calls to this table's +insert+ method will update the drawing # to show where the new item is placed. # # Table drawing parameters can be passed as optional arguments. The defaults are: # :tableX => 20 # :tableY => 20 # :cellHeight => 15 # :cellYSpace => 2 # :cellWidth => 30 # :cellColor => :lightgray # :textX => 70 # :maxRows => 26 # # Example: to make a drawing of a table +t+, showing only the first 10 rows and using # light blue to show empty rows: # >> view_table(t, :cellColor => :lightblue, :maxRows => 10) # => true def view_table(t, userOptions = {} ) options = @@tableOptions.merge(userOptions) height = 2 * options[:tableX] + options[:maxRows] * (options[:cellHeight] + options[:cellYSpace] ) Canvas.init(400, height, "HashLab") Canvas::Font.new('bucketfont', :family => 'Helvetica', :size => 11) tbl = t.table x0 = options[:tableX] x1 = x0 + options[:cellWidth] cells = [] buckets = [] nrows = min(tbl.size, options[:maxRows]) nrows.times do |i| y0 = options[:tableY] + i * (options[:cellHeight] + options[:cellYSpace]) y1 = y0 + options[:cellHeight] cells << Canvas::Rectangle.new(x0, y0, x1, y1) if tbl[i] buckets << Canvas::Text.new(tbl[i].join(", "), options[:textX], y0, {:font => :bucketfont}) cells[i].fill = 'white' else buckets << nil cells[i].fill = options[:cellColor] end end t.drawing = TableView.new(cells, buckets, nrows, options) # sprintf "viewing table with %d cells, %d buckets", cells.length, buckets.length return true end =begin rdoc == HashTable A HashTable object is an array of strings. When an object is created, it is given a specified number of rows, and each row is initially empty. The class has methods to add strings to the table, look to see if a string is in the table, and various methods for displaying information about the table. Example: >> t = HashTable.new(10) => # >> TestArray.new(5, :cars).each { |x| t.insert(x) } => ["oldsmobile", "maserati", "porsche", "lotus", "saturn"] >> puts t 2: ["porsche", "lotus"] 6: ["oldsmobile"] 7: ["saturn"] 8: ["maserati"] => nil >> t.lookup("lotus") => 2 >> t.lookup("lexus") => nil =end class HashTable @@hash_functions = [:h0, :h1, :hr] attr_reader :table attr_accessor :drawing # Make a hash table with +n+ rows. Each row is a bucket that will expand to # hold new items that hash to that row. # By default the hash function is the one implemented by the method # +hr+. The optional parameter is a symbol specifying # an alternative hash function, either :h0 or :h1. def initialize(n, f = :hr) raise "HashTable: hash function must be one of #{@@hash_functions.inspect}" unless @@hash_functions.include?(f) @table = Array.new(n) @hash = f end # Look up string +s+ in the table. Return the row number where +s+ is found, or # +nil+ if +s+ is not in the table. def lookup(s) i = send(@hash, s, @table.length) return ( @table[i] && @table[i].include?(s) ) ? i : nil end # Add string +s+ to the table. Collisions are resolved by appending +s+ to the # end of the bucket in the row for +s+. def insert(s) i = send(@hash, s, @table.length) @table[i] = Array.new if @table[i].nil? @table[i] << s if @drawing options = @drawing.options if @drawing.buckets[i] # sprintf "adding '%s' to bucket %d", s, i @drawing.buckets[i].update( @table[i].join(", ") ) else # sprintf "new bucket for '%s' in row %d", s, i y0 = options[:tableY] + i * (options[:cellHeight] + options[:cellYSpace]) @drawing.buckets[i] = Canvas::Text.new(@table[i].join(", "), options[:textX], y0, {:font => :bucketfont}) @drawing.cells[i].fill = 'white' end end return i end # Call HashLab#print_table to display the contents of the table. def to_s print_table(self) end # Return a string that contains essential information about a table # (number of rows and the hash function used to insert or look up strings). def inspect sprintf '#', @table.size, @hash.to_s end # Print usage statistics for the table: the lengths of longest and shortest # buckets, number of empty buckets, and mean bucket length. def print_stats max = 0 min = Float::MAX nzero = 0 sum = 0 @table.each do |bucket| n = bucket.nil? ? 0 : bucket.length min = n if n < min max = n if n > max sum += n nzero += 1 if n == 0 end printf "shortest bucket: %d\n", min printf "longest bucket: %d\n", max printf "empty buckets: %d\n", nzero if max > 0 printf "mean bucket length: %.2f\n", sum.to_f / (@table.length - nzero) end return nil end # Return a list of indices for buckets that have more than +cutoff+ entries. def long_rows(cutoff = 5) rows = Array.new @table.each_with_index do |row, i| rows << i if !row.nil? && row.length > cutoff end return rows end end # HashTable end # HashLab end # RubyLabs