#! /usr/bin/env ruby

=begin rdoc

== ELIZA
  
Joseph Weizenbaum's ELIZA program in Ruby.  Students can play with the Doctor script,
which mimics a Rogerian psychiatrist, and experiment by adding new rules to Doctor or
writing their own scripts.

=end

=begin
  TODO don't add rule to queue more than once (e.g. word repeated in input sentence)
=end

module RubyLabs
  
module ElizaLab

  require 'readline'
  include Readline

=begin rdoc

A transformation rule is associated with a key word, and is triggered
when that word is found in an input sentence.  Rules have integer
priorities, and if more than one rule is enabled ELIZA applies the one
with the highest priority.  Each rule has an ordered list of patterns, 
and each pattern has a list of reassembly rules.  

To apply a rule, scan the patterns, and for the first pattern that matches 
a sentence, build the output using the current reassembly rule.

=end

  class Rule
  
  	attr_accessor :key, :priority, :patterns
	
=begin rdoc
  Specify the key word for a rule when the rule is created.
=end

  	def initialize(key, priority = 1)
  		@key = key
  		@priority = priority
  		@patterns = Array.new
  	end

=begin rdoc
  Compare rule priorities.  r1 precedes r2 in the queue if r1 has a higher 
  priority than r2.  The >= is important, in order to make sure the default
  rule stays at the end of the queue (i.e. new rules will be inserted at the
  front).
=end
	
    def <(x)
      @priority >= x.priority
    end

  	def addPattern(expr)
  		if expr.class == Pattern
  			@patterns << expr
  		else
  			if expr.class == String
  				expr = Regexp.new(expr.slice(1..-2))
  			end
  			@patterns << Pattern.new(expr)
  		end
  	end
	
  	def [](n)
  		@patterns[n]
  	end
	
  	def addReassembly(line, n = -1)
  		@patterns[n].add_response(line)
  	end
	
	
=begin rdoc
  Rule application -- try the patterns in order.  When the line matches a pattern,
  return the next reassembly for that pattern.  Apply variable substitutions to both
  the patterns and the reassemblies if they contain variables.
=end
	
  	def apply(s, opt)
  		@patterns.each do |p|
  			if @@verbose
  			  print "trying pattern "
  				p p.regexp
  			end
  			res = p.apply(s, opt)
  			return res if ! res.nil?
  		end
  		return nil		
  	end
	
  	def to_s
  		s = @key + " / " + @priority.to_s + "\n"
  		@patterns.each { |r| s += "  " + r.to_s + "\n" }
  		return s
  	end
	
  	def inspect
      # s = @key.inspect
      s = ""
  		s += " [#{@priority}]" if @priority > 1
  		s += " --> [\n" + @patterns.join("\n") + "]"
  		return s
  	end

  end # class Rule

=begin rdoc

A Pattern represents one way to transform an input sentence into a
response.  A Pattern instance has a regular expression and a list of
one or more reassembly strings that can refer to groups in the expression.
There is also an index to record the last reassembly string used, so
the application can cycle through the strings.

For convenience the constructor inserts word break anchors and attaches
a /i to the expression as needed.  NOTE: the inspect method removes these
automatic items so the printed string is cleaner; to see the real Regexp
call the regexp accessor.  Example:

  >> p = Pattern.new(/hi/,"hello")
  => /hi/ -> ["hello"] [0]
  >> p.regexp
  => /\bhi\b/i
  
Another convenience: add group delimiters around wildcards (.*), groups of 
words (a|b|c), and variable names ($x) if they aren't already there.

=end

# Pattern.new called internally only from Rule#addPattern, which is called
# to add /.*/ for default rule, or when reading /.../ line from script.

# In interactive experiments, users can call Pattern.new(s) or Pattern.new(s,a)
# where s is a string or regexp, and a is an array of response strings.

  class Pattern
  	attr_accessor :regexp, :list, :index, :md
        
  	def initialize(expr, list = [])
      raise "Pattern#initialize: expr must be String or Regexp" unless (expr.class == String || expr.class == Regexp)
  	  re = (expr.class == String) ? expr : expr.source
      add_parens(re, /\(?\.\*\)?/ )
      add_parens(re, /\(?[\w' ]+(\|[\w' ]+)+\)?/ )
      add_parens(re, /\(?\$\w+\)?/ )
  		re.insert(0,'\b') if re =~ /^\w/
  		re.insert(-1,'\b') if re =~ /\w$/
  		@regexp = Regexp.new(re, :IGNORECASE)
  		@list = list.nil? ? Array.new : list
  		@index = 0		
  	end
  	
  	def reset
  	  @index = 0
  	end
  	
  	# s is a source string, r is a pattern with optional parens -- add parens if they're not there
  	
  	def add_parens(s, r)
  	  s.gsub!(r) { |m| ( m[0] == ?( ) ? m : "(" + m + ")" }
  	end
	
  	def add_response(sentence)
  		@list << sentence
  	end
  		
  	def apply(s, opt = :preprocess)
  	  Eliza.preprocess(s) if opt == :preprocess
  		@md = s.match(@regexp)
  		return nil if @list.empty? || @md == nil
  		res = @list[inc()].clone
  		return res if res[0] == ?@
  		puts "reassembling '#{res}'" if @@verbose
  		res.gsub!(/\$\d+/) do |ns|
  			n = ns.slice(1..-1).to_i				# strip leading $, convert to int
  			if n && @md[n]
  				puts "postprocess #{@md[n]}" if @@verbose
  				@md[n].gsub(/[a-z\-$']+/i) do |w| 
  					(@@post.has_key?(w) && @@post[w][0] != ?$) ? @@post[w] : w
  				end
  			else
  				warn "Pattern.apply: no match for #{ns} in '#{res}'"
  				""
  			end
  		end
  		return res
  	end
  	
  	def match(s)
  	  @md = s.match(@regexp) 
  	  return @md != nil
  	end
  	
  	def parts
  	  return @md.nil? ? nil : @md.captures
  	end
	
  	def to_s
  		s = "  /" + cleanRegexp + "/\n"
  		@list.each { |x| s += "    \"" + x + "\"\n" }
  		return s
  	end
	
  	def inspect
  		return cleanRegexp + ": " + @list.inspect 
  	end
	
  	def cleanRegexp
  		res = @regexp.source
  		res.gsub!(/\\b/,"")
  		return res
  	end
	
	  private
	  
  	def inc
  		n = @index
  		@index = (@index + 1) % @list.length
  		return n
  	end
		
  end # class Pattern
  
  
=begin rdoc
  A Dictionary is basically a Hash, but it overrides [] and []= to be case-insensitive 
=end
  
  class Dictionary < Hash
    
    def initialize
      super
      @lc_keys = Hash.new
    end
    
    def [](x)
      @lc_keys[x.downcase]
    end
    
    def []=(x,y)
      super
      @lc_keys[x.downcase] = y
    end
    
    def has_key?(x)
      return @lc_keys.has_key?(x.downcase)
    end
    
  end # class Dictionary

  class Eliza
	
  	# These class variables define the "application" processed by ELIZA -- the rule
  	# sets used to transform inputs to outputs.  When ELIZA is initialized or reset it
  	# gets a default rule that just echoes the user input.  
  	
  	# Note: I haven't figured out how to have this method called when the module is first
  	# loaded.  As a workaround, any method that refers to a class variable (run, info, etc) 
  	# checks to see if they have been defined yet, and if not, call the reset method.
	
  	def Eliza.clear
    	@@script = nil
    	@@aliases = Hash.new
    	@@vars = Hash.new
    	@@starts = Array.new
    	@@stops = Array.new
    	@@queue = PriorityQueue.new
    	
    	@@verbose = false
    	@@pre.clear
    	@@post.clear
    	@@rules.clear

  		@@default = Rule.new(:default)
  		@@default.addPattern(/(.*)/)
  		@@default.addReassembly("$1")
  		
  		return true
  	end
		
    #   
    # def Eliza.queue
    #   return @@queue
    # end
    #   
    # def Eliza.aliases
    #   return @@aliases
    # end
    #   
    # def Eliza.vars
    #   return @@vars
    # end
    #   
      
    def Eliza.pre
      return @@pre
    end
      
    def Eliza.post
      return @@post
    end
    
    def Eliza.rules
      return @@rules
    end
	
  	def Eliza.verbose
  		@@verbose = true
  	end
	
  	def Eliza.quiet
  		@@verbose = false
  	end
  	
=begin rdoc
  Save a copy of a script that is distributed with RubyLabs; if no output file name specified
  make a file name from the program name.
=end

    def Eliza.checkout(script, filename = nil)
      scriptfilename = script.to_s + ".txt"
      scriptfilename = File.join(@@elizaDirectory, scriptfilename)
      if !File.exists?(scriptfilename)
        puts "Script not found: #{scriptfilename}"
        return nil
      end
      outfilename = filename.nil? ? (script.to_s + ".txt") : filename
      dest = File.open(outfilename, "w")
  	  File.open(scriptfilename).each do |line|
  	    dest.puts line.chomp
  	  end
      dest.close
      puts "Copy of #{script} saved in #{outfilename}"
    end

  	# Utility procedure to get the rule for a word -- can be called interactively or
  	# when processing a script
  	
  	def Eliza.rule_for(w)
  	  @@rules[w] || ((x = @@aliases[w]) && (r = @@rules[x]))
  	end
  	
  	# Preprocessing -- turn string into single line, words separated by single space,
  	# apply pre-processing substitutions
  	
  	def Eliza.preprocess(s)
      s.gsub!( /\s+/, " " )
  		s.gsub!(@@word) { |w| @@pre.has_key?(w) ? @@pre[w] : w }
  		puts "preprocess: line = '#{s}'" if @@verbose  	 
  	end
			
  	# First pass over the input -- scan each word, apply preprocessing substitutions,
  	# add rule names to the priority queue.  NOTE: this method does a destructive
  	# update to the input line....
		
  	def Eliza.scan(line, queue)
  	  Eliza.preprocess(line)
  		line.scan(@@word) do |w|
        w.downcase!
  			if r = Eliza.rule_for(w)
  				queue << r 
  				puts "add rule for '#{w}' to queue" if @@verbose
  			end
  		end
  	end
		
  	def Eliza.apply(line, rule)
  		puts "applying rule: key = '#{rule.key}'" if @@verbose
  		if res = rule.apply(line, :no_preprocess)		
  			if res[0] == ?@
  				rulename = res.slice(1..-1)
  				if @@rules[rulename]
  					return Eliza.apply( line, @@rules[rulename] )
  				else
  					warn "Eliza.apply: no rule for #{rulename}"
  					return nil
  				end
  			else
  				return res
  			end
  		else
  			return nil
  		end
  	end
	
  	# The heart of the program -- apply transformation rules to an input sentence.
	
  	def Eliza.transform(s)
  		s.sub!(/[\n\.\?!\-]*$/,"")				# strip trailing punctuation
      # s.downcase!

  		@@queue = PriorityQueue.new
  		@@queue << @@default							# initialize queue with default rule

  		Eliza.scan(s, @@queue)						# add rules for recognized key words

  		while @@queue.length > 0					# apply rules in order of priority
  		  if @@verbose
  		    print "queue: " 
  			  p @@queue.collect { |r| r.key }
			  end
  			rule = @@queue.shift
  			if result = Eliza.apply(s, rule)
  				return result
  			end
  		end

  		warn "No rules applied" if @@queue.empty?
  		return nil
  	end
		
  	# The parser calls this method to deal with directives (lines where the first
  	# word begins with a colon)
	
  	def Eliza.parseDirective(line)
  		word = Eliza.detachWord(line)
  		case word
  		when "alias"
  			if line.empty? || line[0] != ?$
  				warn "symbol after :alias must be a variable name; ignoring '#{word} #{line}'"
  				return
  			else
  				sym = Eliza.detachWord(line)
  				@@vars[sym] = Array.new
  				line.split.each do |s| 
  					@@aliases[s] = sym
  					@@vars[sym] << s
  				end
  			end
  		when "start"
  			@@starts << line.unquote
  		when "stop"
  			@@stops << line.unquote
  		when "pre"
  			sym = Eliza.detachWord(line)
  			@@pre[sym] = line.unquote
  		when "post"
  			sym = Eliza.detachWord(line)
  			@@post[sym] = line.unquote
  		when "default"
  			@@default = line[@@word]
  		else
  			warn "unknown directive: :#{word} (ignored)"
  		end
  	end
	
  	# Remove a word from the front of a line
	
  	def Eliza.detachWord(line)
  		word = line[@@word]									# pattern matches the first word
  		if line.index(" ")
  			line.slice!(0..line.index(" "))		# delete up to end of the word
  			line.lstrip!											# in case there are extra spaces after word
  		else
  			line.slice!(0..-1)								# line just had the one word
  		end
  		return word
  	end
	
  	# Check each pattern's regular expression and replace var names by alternation
  	# constructs.  If the script specified a default rule name look up that
  	# rule and save it as the default.
	
  	def Eliza.compileRules
  		@@rules.each do |key,val|
  			a = val.patterns()
  			a.each do |p|
  				expr = p.regexp.inspect
  				expr.gsub!(/\$\w+/) { |x| @@vars[x].join("|") }
  				p.regexp = eval(expr)
  			end
  		end
  		if @@default.class == String
  			@@default = @@rules[@@default]
  		end
  	end
		
  	# Parse rules in file f, store them in global arrays.  Strategy: use a local
  	# var named 'rule', initially set to nil.  New rules start with a single word
  	# at the start of a line.  When such a line is found in the input file, create a
  	# new Rule object and store it in 'rule'.  Subsequent lines that are part of the
  	# current rule (lines that contain regular expressions or strings) are added to 
  	# current Rule object.  Directives indicate the end of a rule, so 'rule' is reset 
  	# to nil when a directive is seen.
	
  	def Eliza.load(filename)
  		begin
  		  Eliza.clear
  			rule = nil
    	  if filename.class == Symbol
    	    filename = File.join(@@elizaDirectory, filename.to_s + ".txt")
        end
  			File.open(filename).each do |line|
  				line.strip!
  				next if line.empty? || line[0] == ?#
  				if line[0] == ?:
  					Eliza.parseDirective(line)
  					rule = nil
  				else
  					if line =~ @@iword
  						rulename, priority = line.split
  						rule = priority ? Rule.new(rulename, priority.to_i) : Rule.new(rulename)
  						@@rules[rule.key] = rule
  					elsif rule.nil?
  						warn "missing rule name? unexpected input '#{line}'"
  					elsif line[0] == ?/
  						if line[-1] == ?/
  							rule.addPattern(line)
  						else
  							warn "badly formed expression (missing /): '#{line}'"
  						end
  					elsif line[0] == ?"
  						if line[-1] == ?"
  							rule.addReassembly(line.unquote)
  						else
  							warn "badly formed string (missing \"): '#{line}'"
  						end
  					elsif line[0] == ?@
  						rule.addReassembly(line)
  					else
  						warn "unexpected line in rule for #{rulename}: '#{line}'"
  					end
  				end
  			end
  			Eliza.compileRules
  			@@script = filename
  		rescue
  			puts "Eliza: Error processing #{filename}: #{$!}"
  			return false
  		end
  		return true
  	end
	
  	def Eliza.dump
  	  Eliza.clear unless defined? @@default
  		puts "Script: #{@@script}"
  		print "Starts:\n  "; p @@starts
  		print "Stops:\n  "; p @@stops
  		print "Vars:\n  "; p @@vars
  		print "Aliases:\n  "; p @@aliases
  		print "Pre:\n  "; p @@pre
  		print "Post:\n  "; p @@post
  		print "Default:\n  "; p @@default
  		print "Queue:\n  "; p @@queue.collect { |r| r.key }
  		puts
  		@@rules.each { |key,val| puts val }
  		return nil
  	end
	
  	def Eliza.info
  	  Eliza.clear unless defined? @@default
  	  
  		words = Hash.new
  		npatterns = 0
  		
  		@@rules.each do |k,r| 
  		  words[k] = 1 unless k[0] == ?$
  		  r.patterns.each do |p|
  		    npatterns += 1
  		    p.cleanRegexp.split.each do |w|
            Eliza.saveWords(w, words)
  		    end
  		  end
		  end
		  
  		@@aliases.keys.each do |k|
  		  Eliza.saveWords(k, words)
		  end
  		
  		puts "Script: #{@@script}"
  		puts "  #{@@rules.size} rules with #{npatterns} sentence patterns"
  		puts "  #{words.length} key words: #{words.keys.sort.join(', ')}"
  	end
  	
  	def Eliza.reset
  	  @@rules.each do |k, r|
  	    r.patterns.each { |p| p.reset }
  	  end
  	  return true
  	end
  	
  	def Eliza.saveWords(s, hash)
      return if ["a","an","in","of","the"].include?(s)
      s.gsub! "(", ""
      s.gsub! ")", ""
      s.gsub! ".*", ""
      s.gsub! "?", ""
      return if s.length == 0
      s.split(/\|/).each { |w| hash[w.downcase] = 1 }  	 
  	end
		
  	def Eliza.run
  	  Eliza.clear unless defined? @@default
  		puts @@starts[rand(@@starts.length)] if ! @@starts.empty?
  	  loop do 
        s = readline("  H: ", true)
  			return if s.nil?
  			s.chomp!
  		  next if s.empty?
  			if s == "bye" || s == "quit"
  				puts @@stops[rand(@@stops.length)] if ! @@stops.empty?
  				return
  			end
  		  puts "  C: " + Eliza.transform(s)
  		end
  	end

  end # class Eliza

  # These state variables are accessible by any methods in a class defined inside
  # the ElizaLab module
  
  @@verbose = false
	@@elizaDirectory = File.join(File.dirname(__FILE__), '..', 'data', 'eliza')
	@@pre = Dictionary.new
	@@post = Dictionary.new
	@@rules = Dictionary.new
	@@word = /[a-z\-$']+/i				# pattern for a "word" in the input language
	@@iword = /^[a-z\-$']+/i			# same, but must be the first item on the line
	@@var = /\$\d+/								# variable name in reassembly string
	  
end # module ElizaLab

end # module RubyLabs

class String

=begin rdoc
A useful operation on strings -- call +s.unquote+ to remove double quotes from
the beginning and end of string +s+.
=end

	def unquote
		if self[0] == ?" && self[-1] == ?"
			return self.slice(1..-2)
		else
			return self
		end
	end

end