#!/usr/bin/ruby
#
#	dictionary.rb - this file contains the Dictionary class of the
#	Ruby-LinkParser system.  This governs the parsing of the dictionary files
#	into their respective data structures.
#
# == Synopsis
#
#   require "linkparser/dictionary"
#   
#   dict_opts = Hash::new('')
#   dict_opts[dict] = "tiny.dict"
#   dict_opts[affix] = "4.0.affix"
#   dict_opts[datadir] = "/usr/local/share/linkparser"
#   dict = LinkParser::Dictionary::new(dict_opts)
#	puts dict['word'].to_connectors
#
# == Rcsid
#
# $Id: dictionary.rb,v 1.16 2003/08/28 04:49:34 stillflame Exp $
#
# == Authors
#
#	Martin Chase <stillflame@FaerieMUD.org>
#
#:include: COPYRIGHT
#
#---
#
# Please see the file COPYRIGHT for licensing details.
#

require "forwardable"
require "rbconfig"
#require "linkparser/log"
require "linkparser/utils"
require "linkparser/definition"

class LinkParser
    
    class Dictionary

		DefaultDataDir = Config::CONFIG['datadir']

		# Lots of constants for the handling of the different dictionary files.
		Dict = 1
		Affix = 2
		Post = 3
		Constituent = 4
		DefaultDict = "4.0.dict"
		DefaultAffix = "4.0.affix"
		DefaultPost = "4.0.knowledge"
		DefaultConstituent = "4.0.constituent-knowledge"

		class << self

			# This parses a dictionary string/file for its words and their
			# definitions, returning a hash keyed by word with values being
			# LinkParser::Definition objects.
			def read_dict( dict, datadir = DefaultDataDir )
				wordHash = {} # Hash#[] is faster than BinarySearchTree#[]
				macros = [] # Array#each is faster than Hash#each
				
				# read the dictionary file into an array of words:definition
				# "statements", excluding comments.
				statements = dict.gsub(/\n+|\s*%(?!\").*?\n/, " ").split(/\s*;(?!\")\s*/).compact
				return nil if statements.empty?
				statements.each {|statement|
					words, definition = statement.split(/\s*:(?!\")\s*/)
					macros.each {|macro|
						definition.gsub!(macro[0], macro[1])
					}
					if(words =~ /<.*>/)
						words.strip!
						macros << [Regexp::new(words), definition]
					elsif(!words or !definition)
					$stderr.print "dict error #{statement}"
					
					#	raise ParseError, "Dictionary outta whack: '#{statement}'"
					else
						if words =~ /^\// #/
							# then it's a filename, not a word, and the file will
							# contain a list of words.
			#				Log.info("Reading in words from %s." % datadir + words)
        $stderr.print "Reading in words \n"				
        words = File.open(datadir + words) {|f| f.read(f.stat.size)}
						end
						# so now we have a bunch of words and their shared
						# definition.  put each word into the hash with a value of
						# the definition data structure.
						words.gsub!(/"([^ ]+?)"/, '\1') # punctuation marks are in double-quotes
						words = words.split(/\s+/)
						definition = Definition::new(definition)
						words.each {|word|
							wordHash[word] = definition unless word.empty?
						}
					end
				}
				return wordHash
			end

			# This takes a filename of a dictionary, and reads it into the
			# word-keyed hash.
			def open_read_dict( dicttype, dictname, datadir = DefaultDataDir )
				if dictname and dictname.empty?
					# do nothing
					return nil
				else
					if dictname
						f = File.open( File.join(datadir, dictname) )
					else
						default = case dicttype
								  when Dict
									  DefaultDict
								  when Affix
									  DefaultAffix
								  when Post
									  DefaultPost
								  when Constituent
									  DefaultConstituent
								  end
						f = File.open( File.join(datadir, default) )
					end
					return read_dict(f.read(f.stat.size), datadir)
				end
			end

		end # class << self

		extend Forwardable

		# Initializes a new Dictionary object.  takes a hash as its argument.
		# entries:
		#	datadir - the directory where the dictionary files are located
		#	dict - the main dictionary file
		#	affix - the affix dictionary file
		#	knowledge - the post-processing dictionary file
		#	constituent-knowledge - the constituent knowledge dictionary file
		#
		# setting a value to an empty string prevents it from being used, which
		# will work out fine for all but the datadir and the main dict.  a value
		# set to nil will mean to use the default setting.
		def initialize( dict_opts )
			@datadir = dict_opts['datadir'] || DefaultDataDir
			
			@dict =			Dictionary::open_read_dict( Dict,
											dict_opts['dict'], @datadir )
			@affix =		Dictionary::open_read_dict( Affix,
											dict_opts['affix'], @datadir )
			@post =			Dictionary::open_read_dict( Post,
											dict_opts['knowledge'], @datadir )
			@constituent =	Dictionary::open_read_dict( Constituent,
											dict_opts['constituent-knowledge'], @datadir )
		end

		# the directory that the dictionary files are located
		attr_reader :datadir

		# the main dictionary hash
		attr_reader :dict

		def_delegators :@dict, *(Hash.instance_methods(false))

		# does affix processing on the words, which is just separating
		# conjunctions and punctuation from the words they are next to.
		def affix( words )
			return words unless @affix
			@affix.each {|punct,move|
				words = words.inject([]) {|arr,ele|
					if /RPUNC/.match(move.inspect) && /(.*)(#{Regexp.escape(punct)}.*)$/.match(ele)
						arr << $1 << $2
					elsif /LPUNC/.match(move.inspect) && /^(#{Regexp.escape(punct)})(.*)/.match(ele)
						arr << $1 << $2
					else
						arr << ele
					end
				}
			}
			words
		end

		# the post-processing dictionary hash
		attr_reader :post

		# the constituent-knowledge dictionary hash
		attr_reader :constituent
		
    end # class Dictionary
end # module LinkParser

if $0 == __FILE__
    #	require 'profile'
    
    # p LinkParser::Dictionary::read_dict("/home/stillflame/src/system-4.1/link-4.1/data/tiny.dict")["dog"]
    
    s = <<-DICT
    <macro>:(A- and {Bb+ or @C+});
    <meow>:(Aa- & E+);
	bar:(A- or A-) and A-;
	foo:( ((A- and E-) and {Bb+ or (@C+ and De-)});% and (E+) );
	word: %comment - miaow
  	(<meow>) | (<macro> & [@Dd- or De-]);
	meow:(A+ and {B- or C-});
	boo:[[[()]]];
	am:((Sp- or (RS- & Bp-) or ({Q-} & SIp+)) & (((O+ or B-) & {@MV+}) or P+ or AF-));
	DICT
	LinkParser::Dictionary::read_dict(s).each {|k,v| puts "#{k} = #{(v).to_connectors}"}
end