=begin rdoc == BitLab Classes used in experiments on binary representations, including Huffman trees. =end module RubyLabs module BitLab QueueView = Struct.new(:queue) =begin rdoc Make a unique binary code for each item in array +a+, returning a Hash that associates each item with its code. =end def make_codes(a) n = log2(a.length).ceil res = Hash.new a.each_with_index do |x,i| res[x] = i.code(n) end return res end =begin rdoc Print the codes in +a+ (an associative array made by +make_codes+ or the Huffman tree +assign_codes+ method). An option specifies how to order the output, either +:by_code+ or +:by_name+. =end def print_codes(a, mode = :by_code) if mode == :by_code a.sort { |x,y| x[1] <=> y[1] }.each do |sym, code| printf "%s %s\n", code, sym end elsif mode == :by_name width = a.keys.map{ |x| x.length }.max a.keys.sort.each do |x| printf "%-#{width}s %s\n", x, a[x] end else raise "print_codes: mode must be :by_code or :by_name" end return true end =begin rdoc Make a Message object for the characters in string s. The second parameter determines the code to use. It can be :ascii, :parity, a hash object that has code mappings for letters, or a Huffman tree. If a hash or tree is passed, the message type is set to :packed, otherwise it's :unpacked. The encoding type is saved so it can be used later in decoding (but see note in documentation of +decode+). =end def encode(s, type, opt = nil) if (type.class == Hash || type.class == Node) code = (type.class == Node) ? assign_codes(type) : type msg = Message.new(:packed) s.each_byte do |ch| msg << code[ch.chr] printf("%s: %s\n", ch.chr, code[ch.chr]) if opt == :trace end else msg = Message.new(:unpacked) s.each_byte do |ch| code = ch.code(8) code.add_parity_bit if type == :parity msg << code printf("%s: %s\n", ch.chr, code) if opt == :trace end end msg.encoding = type return msg end =begin rdoc Decode a message using the specified decoding scheme. Note: see also the +decode+ method in the Message class, which assembles a string using the coding scheme specified when the message was created. +Message#decode+ always gives the right result -- the point of the method here is to show what happens if the decoding scheme does not match the encoding scheme. =end def decode(m, type) raise "not a message" unless m.class == Message res = "" if type.class == Node # weird -- it appears case labels can't be class names... res = huffman_decode(m, type) elsif type.class == Code raise "packed decode not implemented" elsif type == :ascii m.array.each do |x| res << x.value.chr end elsif type == :parity m.array.each do |x| if x.even_parity? res << (x.value >> 1).chr elsif $KCODE[0] == ?U res << "\xe2\x80\xa2" else res << "?" end end else raise "unknown option: #{type}" end return res end =begin rdoc Simulate transmission of message +m+, adding +n+ random errors. Returns a copy of the Message object after making +n+ calls to the +flip+ method. =end def garbled(m, n) res = m.copy n.times do c = res.array[ rand(res.array.length) ] c.flip( rand(c.length) ) end return res end =begin rdoc Huffman tree interface: Read letters and frequencies from file +fn+, save them in a hash indexed by letter name =end def read_frequencies(fn) a = Hash.new if fn.class == Symbol fn = File.join(@@bitsDirectory, fn.to_s + ".txt") end File.open(fn).each do |line| line.chomp! next if line.length == 0 next if line[0] == ?# x = line[/^./] f = line[/\d+\.\d+/].to_f a[x] = f end return a end =begin rdoc Huffman tree interface: Build a tree using frequencies in Hash +f+. =end # :begin :build_tree def build_tree(f) pq = init_queue(f) while pq.length > 1 n1 = pq.shift n2 = pq.shift pq << Node.combine( n1, n2) end return pq[0] end # :end :build_tree =begin rdoc Huffman tree helper procedure: Traverse +tree+, making a +Code+ object for each leaf node, returning the codes in a Hash object. On recursive calls the +prefix+ parameter is the set of codes on the path so far. Students should pass a tree to +encode+, which will call this method to make the code. The same tree should also be passed to +decode+ when they want to decode a message. =end # :begin :assign_codes def assign_codes(tree, code = {}, prefix = Code.new(0,0)) if tree.char != nil code[tree.char] = prefix else assign_codes(tree.left, code, prefix + 0) assign_codes(tree.right, code, prefix + 1) end return code end # :end :assign_codes =begin rdoc Huffman tree helper procedure: initialize a priority queue with Node objects for each letter in Hash +a+. =end # :begin :init_queue def init_queue(a) q = PriorityQueue.new a.each do |x,f| q << Node.new(x,f) end return q end # :end :init_queue =begin rdoc Huffman tree helper procedure: decode the binary codes in message +m+, using +tree+ as a guide. Not intended to be called by students; they will use the top level +decode+ method or the +decode+ method defined in the Message class. =end def huffman_decode(m, tree) res = "" path = tree m.each do |bit| if path.leaf? res << path.char path = tree end path = (bit == 0) ? path.left : path.right end res << path.char if path.leaf? return res end =begin rdoc Return an array of binary codes (so students can text decoding skills) =end def read_codes codes = Array.new fn = File.join(@@bitsDirectory, "testcodes.txt") File.open(fn).each do |line| line.chomp! code = Code.new(0,0) line.each_byte do |byte| code << byte[0] # add least significant digit of ASCII "0" or "1" end msg = Message.new(:packed) msg << code codes << msg end return codes end =begin rdoc Return an array of binary codes (so students can text decoding skills) =end def read_words fn = File.join(@@bitsDirectory, "testwords.txt") words = File.open(fn).readlines return words.map { |x| x.chomp } end =begin rdoc Huffman tree utility: generate a random string of length +n+ using the letter frequencies +f+. =end def generate_string(n, f) s = "" n.times do r = rand sum = 0 f.each do |ch,x| sum += x if r < sum s += ch break end end end return s end =begin rdoc Class for nodes of a Huffman tree. All nodes have a frequency (+freq+) used to determine its place in a priority queue. Leaf nodes have a character (+char+). Interior nodes have a +nil+ character value, in which case +left+ and +right+ are Nodes for the roots of subtrees. Use +new+ to create a new leaf node. Call the class method +combine+ (an alternative constructor) to make a new interior node from two existing nodes. The +<+ method allows Nodes to be compared so they can be ordered in a priority queue. =end class Node attr_accessor :freq, :char, :left, :right def initialize(char,freq) @char = char @freq = freq @left = @right = nil end def Node.combine(n1,n2) node = Node.new(nil, n1.freq + n2.freq) node.left = n1 node.right = n2 return node end def <(x) x.class == Node && @freq < x.freq end def leaf? return @char != nil end def inspect if leaf? sprintf "( %s: %.3f )", @char, @freq else sprintf "( %.3f %s %s )", @freq, @left.to_s, @right.to_s end end alias to_s inspect end # Node =begin rdoc Code objects are variable-length binary numbers representing individual letters or members of a set. The main reason to create a Code object is so to have the value of the integer displayed in binary or hex. To make it easier for advanced students to understand the Huffman tree "assign_codes" method this class defines a method that attaches a bit to the end of a code and returns a new Code object. Students will not create Code objects directly -- instead they are created by methods in other classes, e.g. the +code+ method added to Fixnum or the top level encode method. There are two methods for attaching bits: << appends a bit to a code (used by the method that makes a parity-encoded message) and +, which returns a copy of a code with the bit added (used by the recursive method that assigns Huffman codes). << can also be used to extend a code by appending a second code. The index operator for Fixnums orders bits from right to left, consistent with standard usage but the opposite of Strings and Arrays. In this module bits will be ordered from left to right to be consistent with Strings and Arrays. The modified ordering is used in the index operator and the +flip+ method. =end # TBD allow + and << on hex codes; they should have same internal rep, as binary codes, just show as hex when displaying # Way cool -- this class used to have a maxcodesize (set to 60) to make sure codes fit within # a single 64-bit word. But a typo during one experiment created an 80-bit code. Turns out # indexing etc work just fine on Bignums: # # >> c1 = s[1].code(80) # => 00000000000000000000000000000000000000000000000000000000000000000000000001101000 # >> c1.flip(0) # => 10000000000000000000000000000000000000000000000000000000000000000000000001101000 class Code attr_accessor :value, :length, :base def initialize(value, length = 0, base = :binary) raise "Code: unknown base: #{base}" unless [:binary, :hex].include?(base) @value = value @length = length @base = base @has_parity_bit = false end def +(bit) raise "operation not defined for hex codes" unless @base == :binary val = (@value << 1) | bit[0] return Code.new(val, @length+1) end def <<(x) raise "operation not defined for hex codes" unless @base == :binary if x.class == Code val = x.value # val known to fit in x.length bits n = x.length else val = x[0] # val is a single bit n = 1 end @value <<= n @value |= val # val can't have any higher order bits set -- see above @length += n return self end def [](i) if i.class == Range res = 0 n = 0 i.each { |j| res <<= 1; res |= @value[@length-j-1]; n += 1 } return Code.new(res, n) else return @value[@length-i-1] end end def parity_bit bit = 0 for i in 0...@length bit ^= @value[i] end return bit end def add_parity_bit @has_parity_bit = true return self << parity_bit end def even_parity? return parity_bit() == 0 end def flip(i) raise "bit index out of range" unless i < @length @value ^= (1 << (@length - i - 1)) return self end def chr if @has_parity_bit if even_parity? return (@value >> 1).chr else return "?" end else return @value.chr end end def <=>(x) return @value <=> x.value end def inspect return "" if @length == 0 case @base when :binary return sprintf "%0#{@length}b", @value when :hex return sprintf "%0#{@length/4}X", @value end end alias to_s inspect end # Code =begin rdoc Message objects are arrays of Codes. New codes are added by a call to <<. If the message is unpacked, each code is stored in its own array element (use this for :ascii or :parity encodings). If the message is packed, codes are repackaged into 8-bit codes, and individual codes may cross array item boundaries. The +each+ method iterates over each bit in a message. +copy+ makes a deep copy, copying each code object (but sharing a reference to +encoding+). =end # TODO: allow for the possibility that a code word being added to a packed code can be longer than 8 bits class Message attr_accessor :packed, :array, :encoding @@packsize = 8 # number of bits to pack into single code def initialize(type) raise "Message: unknown type" unless [:packed, :unpacked].include? type if type == :packed @packed = true @array = [ Code.new(0,0) ] else @packed = false @array = [ ] end end def copy dup = self.clone # copies @packed, @encoding dup.array = Array.new @array.each { |x| dup.array << x.clone } # deep copy of @array return dup end def each @array.each do |byte| for i in 0...byte.length yield(byte[i]) end end return self end def <<(x) raise "Message#<<: not a code" unless x.class == Code if @packed if @array[-1].length + x.length <= @@packsize @array[-1] << x else n = @@packsize - @array[-1].length m = x.length - n @array[-1] << x[0...n] @array << x[n...x.length] end else @array << x end return self end def decode res = "" if @encoding.class == Symbol @array.each do |code| res << code.chr end elsif @encoding.class == Node res = huffman_decode(self, @encoding) else res = unpack end return res end def unpack "unpack: not implemented" end def length @array.inject(0) { |sum, code| sum += code.length } end def print(option) "a message" end def inspect if @packed return @array.join("") else return @array.join(" ") end end alias to_s inspect end # Message =begin rdoc Initialize the canvas with a drawing of a priority queue. =end =begin TODO fill in this stub... =end def view_queue(pq, userOptions = {} ) options = @@queueOptions.merge(userOptions) Canvas.init(300, 500, "BitLab") @@drawing = QueueView.new(pq) pq.on_canvas = true Canvas.sync return true end def redraw_queue(pq) puts "redraw: #{pq.inspect}" end @@queueOptions = { } @@bitsDirectory = File.join(File.dirname(__FILE__), '..', 'data', 'huffman') end # BitLab end # RubyLabs =begin rdoc Add hooks to the PriorityQueue's shift and << methods so they check to see if the queue is on the canvas, and if so, update the drawing when an item is removed or added. =end class PriorityQueue attr_accessor :on_canvas alias_method :pqappend, :<< def <<(obj) res = pqappend(obj) redraw_queue(self) if on_canvas return res end alias_method :pqshift, :shift def shift res = pqshift redraw_queue(self) if on_canvas return res end end class Fixnum =begin rdoc Add a method to +Fixnum+ to make a +Code+ object showing the binary or hexadecimal representation of a number. Intended mainly to show codes for characters in +String+ objects. Usage: x.code binary, size = log2 bits x.code(n) binary, size = n bits x.code(:hex) hex, size = log2 bits x.code(:hex,n) hex, size = 4*n bits =end def code(*args) if args.first == :hex base = args.shift else base = :binary end if args.first.kind_of? Integer digits = args.shift elsif args.empty? b = (self == 0) ? 1 : log2(self+1).ceil digits = (base == :hex) ? (b/4.0).ceil : b else raise "code: can't understand #{args}" end bits = (base == :hex) ? digits * 4 : digits return Code.new(self, bits, base) end end