#! /usr/bin/env ruby =begin rdoc == ELIZA Joseph Weizenbaum's ELIZA program in Ruby. Students can play with the Doctor script, which mimics a Rogerian psychiatrist, and experiment by adding new rules to Doctor or writing their own scripts. =end =begin TODO don't add rule to queue more than once (e.g. word repeated in input sentence) =end module RubyLabs module ElizaLab require 'readline' include Readline =begin rdoc A transformation rule is associated with a key word, and is triggered when that word is found in an input sentence. Rules have integer priorities, and if more than one rule is enabled ELIZA applies the one with the highest priority. Each rule has an ordered list of patterns, and each pattern has a list of reassembly rules. To apply a rule, scan the patterns, and for the first pattern that matches a sentence, build the output using the current reassembly rule. =end class Rule attr_accessor :key, :priority, :patterns =begin rdoc Specify the key word for a rule when the rule is created. =end def initialize(key, priority = 1) @key = key @priority = priority @patterns = Array.new end =begin rdoc Compare rule priorities. r1 precedes r2 in the queue if r1 has a higher priority than r2. The >= is important, in order to make sure the default rule stays at the end of the queue (i.e. new rules will be inserted at the front). =end def <(x) @priority >= x.priority end def addPattern(expr) if expr.class == Pattern @patterns << expr else if expr.class == String expr = Regexp.new(expr.slice(1..-2)) end @patterns << Pattern.new(expr) end end def [](n) @patterns[n] end def addReassembly(line, n = -1) @patterns[n].add_response(line) end =begin rdoc Rule application -- try the patterns in order. When the line matches a pattern, return the next reassembly for that pattern. Apply variable substitutions to both the patterns and the reassemblies if they contain variables. =end def apply(s, opt) @patterns.each do |p| if @@verbose print "trying pattern " p p.regexp end res = p.apply(s, opt) return res if ! res.nil? end return nil end def to_s s = @key + " / " + @priority.to_s + "\n" @patterns.each { |r| s += " " + r.to_s + "\n" } return s end def inspect # s = @key.inspect s = "" s += " [#{@priority}]" if @priority > 1 s += " --> [\n" + @patterns.join("\n") + "]" return s end end # class Rule =begin rdoc A Pattern represents one way to transform an input sentence into a response. A Pattern instance has a regular expression and a list of one or more reassembly strings that can refer to groups in the expression. There is also an index to record the last reassembly string used, so the application can cycle through the strings. For convenience the constructor inserts word break anchors and attaches a /i to the expression as needed. NOTE: the inspect method removes these automatic items so the printed string is cleaner; to see the real Regexp call the regexp accessor. Example: >> p = Pattern.new(/hi/,"hello") => /hi/ -> ["hello"] [0] >> p.regexp => /\bhi\b/i Another convenience: add group delimiters around wildcards (.*), groups of words (a|b|c), and variable names ($x) if they aren't already there. =end # Pattern.new called internally only from Rule#addPattern, which is called # to add /.*/ for default rule, or when reading /.../ line from script. # In interactive experiments, users can call Pattern.new(s) or Pattern.new(s,a) # where s is a string or regexp, and a is an array of response strings. class Pattern attr_accessor :regexp, :list, :index, :md def initialize(expr, list = []) raise "Pattern#initialize: expr must be String or Regexp" unless (expr.class == String || expr.class == Regexp) re = (expr.class == String) ? expr : expr.source add_parens(re, /\(?\.\*\)?/ ) add_parens(re, /\(?[\w' ]+(\|[\w' ]+)+\)?/ ) add_parens(re, /\(?\$\w+\)?/ ) re.insert(0,'\b') if re =~ /^\w/ re.insert(-1,'\b') if re =~ /\w$/ @regexp = Regexp.new(re, :IGNORECASE) @list = list.nil? ? Array.new : list @index = 0 end def reset @index = 0 end # s is a source string, r is a pattern with optional parens -- add parens if they're not there def add_parens(s, r) s.gsub!(r) { |m| ( m[0] == ?( ) ? m : "(" + m + ")" } end def add_response(sentence) @list << sentence end def apply(s, opt = :preprocess) Eliza.preprocess(s) if opt == :preprocess @md = s.match(@regexp) return nil if @list.empty? || @md == nil res = @list[inc()].clone return res if res[0] == ?@ puts "reassembling '#{res}'" if @@verbose res.gsub!(/\$\d+/) do |ns| n = ns.slice(1..-1).to_i # strip leading $, convert to int if n && @md[n] puts "postprocess #{@md[n]}" if @@verbose @md[n].gsub(/[a-z\-$']+/i) do |w| (@@post.has_key?(w) && @@post[w][0] != ?$) ? @@post[w] : w end else warn "Pattern.apply: no match for #{ns} in '#{res}'" "" end end return res end def match(s) @md = s.match(@regexp) return @md != nil end def parts return @md.nil? ? nil : @md.captures end def to_s s = " /" + cleanRegexp + "/\n" @list.each { |x| s += " \"" + x + "\"\n" } return s end def inspect return cleanRegexp + ": " + @list.inspect end def cleanRegexp res = @regexp.source res.gsub!(/\\b/,"") return res end private def inc n = @index @index = (@index + 1) % @list.length return n end end # class Pattern =begin rdoc A Dictionary is basically a Hash, but it overrides [] and []= to be case-insensitive =end class Dictionary < Hash def initialize super @lc_keys = Hash.new end def [](x) @lc_keys[x.downcase] end def []=(x,y) super @lc_keys[x.downcase] = y end def has_key?(x) return @lc_keys.has_key?(x.downcase) end end # class Dictionary class Eliza # These class variables define the "application" processed by ELIZA -- the rule # sets used to transform inputs to outputs. When ELIZA is initialized or reset it # gets a default rule that just echoes the user input. # Note: I haven't figured out how to have this method called when the module is first # loaded. As a workaround, any method that refers to a class variable (run, info, etc) # checks to see if they have been defined yet, and if not, call the reset method. def Eliza.clear @@script = nil @@aliases = Hash.new @@vars = Hash.new @@starts = Array.new @@stops = Array.new @@queue = PriorityQueue.new @@verbose = false @@pre.clear @@post.clear @@rules.clear @@default = Rule.new(:default) @@default.addPattern(/(.*)/) @@default.addReassembly("$1") return true end # # def Eliza.queue # return @@queue # end # # def Eliza.aliases # return @@aliases # end # # def Eliza.vars # return @@vars # end # def Eliza.pre return @@pre end def Eliza.post return @@post end def Eliza.rules return @@rules end def Eliza.verbose @@verbose = true end def Eliza.quiet @@verbose = false end =begin rdoc Save a copy of a script that is distributed with RubyLabs; if no output file name specified make a file name from the program name. =end def Eliza.checkout(script, filename = nil) scriptfilename = script.to_s + ".txt" scriptfilename = File.join(@@elizaDirectory, scriptfilename) if !File.exists?(scriptfilename) puts "Script not found: #{scriptfilename}" return nil end outfilename = filename.nil? ? (script.to_s + ".txt") : filename dest = File.open(outfilename, "w") File.open(scriptfilename).each do |line| dest.puts line.chomp end dest.close puts "Copy of #{script} saved in #{outfilename}" end # Utility procedure to get the rule for a word -- can be called interactively or # when processing a script def Eliza.rule_for(w) @@rules[w] || ((x = @@aliases[w]) && (r = @@rules[x])) end # Preprocessing -- turn string into single line, words separated by single space, # apply pre-processing substitutions def Eliza.preprocess(s) s.gsub!( /\s+/, " " ) s.gsub!(@@word) { |w| @@pre.has_key?(w) ? @@pre[w] : w } puts "preprocess: line = '#{s}'" if @@verbose end # First pass over the input -- scan each word, apply preprocessing substitutions, # add rule names to the priority queue. NOTE: this method does a destructive # update to the input line.... def Eliza.scan(line, queue) Eliza.preprocess(line) line.scan(@@word) do |w| w.downcase! if r = Eliza.rule_for(w) queue << r puts "add rule for '#{w}' to queue" if @@verbose end end end def Eliza.apply(line, rule) puts "applying rule: key = '#{rule.key}'" if @@verbose if res = rule.apply(line, :no_preprocess) if res[0] == ?@ rulename = res.slice(1..-1) if @@rules[rulename] return Eliza.apply( line, @@rules[rulename] ) else warn "Eliza.apply: no rule for #{rulename}" return nil end else return res end else return nil end end # The heart of the program -- apply transformation rules to an input sentence. def Eliza.transform(s) s.sub!(/[\n\.\?!\-]*$/,"") # strip trailing punctuation # s.downcase! @@queue = PriorityQueue.new @@queue << @@default # initialize queue with default rule Eliza.scan(s, @@queue) # add rules for recognized key words while @@queue.length > 0 # apply rules in order of priority if @@verbose print "queue: " p @@queue.collect { |r| r.key } end rule = @@queue.shift if result = Eliza.apply(s, rule) return result end end warn "No rules applied" if @@queue.empty? return nil end # The parser calls this method to deal with directives (lines where the first # word begins with a colon) def Eliza.parseDirective(line) word = Eliza.detachWord(line) case word when "alias" if line.empty? || line[0] != ?$ warn "symbol after :alias must be a variable name; ignoring '#{word} #{line}'" return else sym = Eliza.detachWord(line) @@vars[sym] = Array.new line.split.each do |s| @@aliases[s] = sym @@vars[sym] << s end end when "start" @@starts << line.unquote when "stop" @@stops << line.unquote when "pre" sym = Eliza.detachWord(line) @@pre[sym] = line.unquote when "post" sym = Eliza.detachWord(line) @@post[sym] = line.unquote when "default" @@default = line[@@word] else warn "unknown directive: :#{word} (ignored)" end end # Remove a word from the front of a line def Eliza.detachWord(line) word = line[@@word] # pattern matches the first word if line.index(" ") line.slice!(0..line.index(" ")) # delete up to end of the word line.lstrip! # in case there are extra spaces after word else line.slice!(0..-1) # line just had the one word end return word end # Check each pattern's regular expression and replace var names by alternation # constructs. If the script specified a default rule name look up that # rule and save it as the default. def Eliza.compileRules @@rules.each do |key,val| a = val.patterns() a.each do |p| expr = p.regexp.inspect expr.gsub!(/\$\w+/) { |x| @@vars[x].join("|") } p.regexp = eval(expr) end end if @@default.class == String @@default = @@rules[@@default] end end # Parse rules in file f, store them in global arrays. Strategy: use a local # var named 'rule', initially set to nil. New rules start with a single word # at the start of a line. When such a line is found in the input file, create a # new Rule object and store it in 'rule'. Subsequent lines that are part of the # current rule (lines that contain regular expressions or strings) are added to # current Rule object. Directives indicate the end of a rule, so 'rule' is reset # to nil when a directive is seen. def Eliza.load(filename) begin Eliza.clear rule = nil if filename.class == Symbol filename = File.join(@@elizaDirectory, filename.to_s + ".txt") end File.open(filename).each do |line| line.strip! next if line.empty? || line[0] == ?# if line[0] == ?: Eliza.parseDirective(line) rule = nil else if line =~ @@iword rulename, priority = line.split rule = priority ? Rule.new(rulename, priority.to_i) : Rule.new(rulename) @@rules[rule.key] = rule elsif rule.nil? warn "missing rule name? unexpected input '#{line}'" elsif line[0] == ?/ if line[-1] == ?/ rule.addPattern(line) else warn "badly formed expression (missing /): '#{line}'" end elsif line[0] == ?" if line[-1] == ?" rule.addReassembly(line.unquote) else warn "badly formed string (missing \"): '#{line}'" end elsif line[0] == ?@ rule.addReassembly(line) else warn "unexpected line in rule for #{rulename}: '#{line}'" end end end Eliza.compileRules @@script = filename rescue puts "Eliza: Error processing #{filename}: #{$!}" return false end return true end def Eliza.dump Eliza.clear unless defined? @@default puts "Script: #{@@script}" print "Starts:\n "; p @@starts print "Stops:\n "; p @@stops print "Vars:\n "; p @@vars print "Aliases:\n "; p @@aliases print "Pre:\n "; p @@pre print "Post:\n "; p @@post print "Default:\n "; p @@default print "Queue:\n "; p @@queue.collect { |r| r.key } puts @@rules.each { |key,val| puts val } return nil end def Eliza.info Eliza.clear unless defined? @@default words = Hash.new npatterns = 0 @@rules.each do |k,r| words[k] = 1 unless k[0] == ?$ r.patterns.each do |p| npatterns += 1 p.cleanRegexp.split.each do |w| Eliza.saveWords(w, words) end end end @@aliases.keys.each do |k| Eliza.saveWords(k, words) end puts "Script: #{@@script}" puts " #{@@rules.size} rules with #{npatterns} sentence patterns" puts " #{words.length} key words: #{words.keys.sort.join(', ')}" end def Eliza.reset @@rules.each do |k, r| r.patterns.each { |p| p.reset } end return true end def Eliza.saveWords(s, hash) return if ["a","an","in","of","the"].include?(s) s.gsub! "(", "" s.gsub! ")", "" s.gsub! ".*", "" s.gsub! "?", "" return if s.length == 0 s.split(/\|/).each { |w| hash[w.downcase] = 1 } end def Eliza.run Eliza.clear unless defined? @@default puts @@starts[rand(@@starts.length)] if ! @@starts.empty? loop do s = readline(" H: ", true) return if s.nil? s.chomp! next if s.empty? if s == "bye" || s == "quit" puts @@stops[rand(@@stops.length)] if ! @@stops.empty? return end puts " C: " + Eliza.transform(s) end end end # class Eliza # These state variables are accessible by any methods in a class defined inside # the ElizaLab module @@verbose = false @@elizaDirectory = File.join(File.dirname(__FILE__), '..', 'data', 'eliza') @@pre = Dictionary.new @@post = Dictionary.new @@rules = Dictionary.new @@word = /[a-z\-$']+/i # pattern for a "word" in the input language @@iword = /^[a-z\-$']+/i # same, but must be the first item on the line @@var = /\$\d+/ # variable name in reassembly string end # module ElizaLab end # module RubyLabs class String =begin rdoc A useful operation on strings -- call +s.unquote+ to remove double quotes from the beginning and end of string +s+. =end def unquote if self[0] == ?" && self[-1] == ?" return self.slice(1..-2) else return self end end end