# encoding: utf-8

require 'jldrill/model/items/DictionaryEntry'
require 'jldrill/model/DataFile'
require "jldrill/model/items/Vocabulary"
require "jldrill/model/items/edict/Meaning"
require 'Context/Log'
require 'kconv'

module JLDrill

    # A Dictionary.
    # It is composed of an array of entries from an EDict
    # dictionary. These entries are parsed to create DictionaryEntry.
    # The DictionaryEntry can then further parse the entries to
    # create Meanings.
	class Dictionary < DataFile
        attr_reader :dictEntries

        def initialize
            super
            @stepSize = 1000
        end

        # Ruby 1.8 and 1.9 use different counting mechanisms for the size
        # of strings.  hashSize must return the size of the character that
        # you want to hash on.  This implementation is an example.  You
        # should override it in the concrete Dictionary class.
        def hashSize
            return "雨".size
        end

        # Reset the dictionary back to empty
        def reset
            @dictEntries = []
            @readingHash = {}
            @kanjiHash = {}
            @simplifiedHash = {}
            super
        end

        # The number of items we have indexed in the dictionary.
        def dataSize
            return @dictEntries.size
        end

        def length
            return dataSize
        end

        # Return the meaning for the word at the position in the file.
        # The concrete implementation should override this method.
        def getMeaning(position)
            return ""
        end

        # Read all the lines into the buffer.
        def readLines
            super
        end

        # Hash the word in both the reading and kanji hashes so that
        # we can find them quickly.
        def hashWord(word)
            # We will hash on the first character.
            if !word.reading.empty?
                (@readingHash[word.reading[0..hashSize - 1]] ||= []).push(word)
            end
            if !word.kanji.empty?
                (@kanjiHash[word.kanji[0..hashSize - 1]] ||= []).push(word)
            end
            if !word.simplified.empty? && !word.kanji.eql?(word.simplified)
                (@simplifiedHash[word.simplified[0..hashSize - 1]] ||= []).push(word)
            end
        end

        def parseLine(index)
            word = getDictionaryEntry(index)
            if !word.nil?
                @dictEntries[@dictEntries.size] = word
                hashWord(word)
            end
            return word
        end

        def vocab(index)
            word = @dictEntries[index]
            if !word.nil?
                return word.toVocab
            else
                return nil
            end
        end

        def eachVocab(&block)
            @dictEntries.each do |word|
                block.call(word.toVocab)
            end
        end

        # Create the indeces for the item at the current line.
        def parseEntry
            parseLine(@parsed)
            @parsed += 1
        end

        # This is what to do when we are finished parsing.
        def finishParsing
            # Don't reset the lines because we need them later
            setLoaded(true)
        end

        # Find the items that may have been hashed with this reading.
        def findBinWithReading(reading)
            if reading.size >= hashSize
                bin = (@readingHash[reading[0..hashSize - 1]] ||= [])
            else
                keys = @readingHash.keys.find_all do |key|
                    key.start_with?(reading)
                end
                bin = []
                keys.each do |key|
                    bin += @readingHash[key]
                end
            end
            return bin
        end

        # Find the items that may have been hashed with this simplified kanji.
        def findBinWithSimplified(kanji)
            if kanji.size >= hashSize
                bin = (@simplifiedHash[kanji[0..hashSize - 1]] ||= [])
            else
                keys = @simplifiedHash.keys.find_all do |key|
                    key.start_with?(kanji)
                end
                bin = []
                keys.each do |key|
                    bin += @simplifiedHash[key]
                end
            end
            return bin
        end

        # Find the items that may have been hashed with this kanji.
        def findBinWithKanji(kanji)
            if kanji.size >= hashSize
                bin = (@kanjiHash[kanji[0..hashSize - 1]] ||= [])
            else
                keys = @kanjiHash.keys.find_all do |key|
                    key.start_with?(kanji)
                end
                bin = []
                keys.each do |key|
                    bin += @kanjiHash[key]
                end
            end
            if bin.empty?
                bin = findBinWithSimplified(kanji)
            end
            return bin
        end

        # Return all the DictionaryEntry that have a reading 
        # starting with reading.
        def findReadingsStartingWith(reading)
            bin = findBinWithReading(reading)
            if reading.size > hashSize 
                return bin.find_all do |word|
                    word.readingStartsWith?(reading)
                end
            else
                return bin
            end
        end

        # Return all the DictionaryEntry that have kanji starting with kanji.
        def findKanjiStartingWith(kanji)
            bin = findBinWithKanji(kanji)
            if kanji.size > hashSize 
                return bin.find_all do |word|
                    word.kanjiStartsWith?(kanji)
                end
            else
                return bin
            end
        end

        # Return all the DictionaryEntry that have the reading, reading.
        def findReading(reading)
            relevance = reading.size
            return findBinWithReading(reading).find_all do |word|
                if word.readingEql?(reading)
                    word.relevance = relevance
                    true
                else
                    false
                end
            end
        end

        # Return all the DictionaryEntry that have the kanji, kanji.
        def findKanji(kanji)
            relevance = kanji.size
            return findBinWithKanji(kanji).find_all do |word|
                if word.kanjiEql?(kanji)
                    word.relevance = relevance
                    true
                else
                    false
                end
            end
        end

        def findWord(string)
            kanji = findKanji(string)
            reading = findReading(string)
            return kanji + reading
        end

        # Return true if the dictionary contains this vocabulary.
        def include?(vocabulary)
            if vocabulary.reading.nil?
                return false
            end
            return findReading(vocabulary.reading).any? do |word|
                word.toVocab.eql?(vocabulary)
            end
        end

        # Return all the words that occur at the begining of reading
        def findReadingsThatStart(reading)
            findBinWithReading(reading[0..hashSize - 1]).find_all do |word|
                relevance = word.reading.size
                if word.keyStartsWithReading?(reading)
                    word.relevance = relevance
                    true
                else
                    false
                end
            end
        end

        # Return all the words that occur at the begining of kanji
        def findKanjiThatStart(kanji)
            findBinWithKanji(kanji[0..hashSize - 1]).find_all do |word|
                relevance = word.kanji.size
                if word.keyStartsWithKanji?(kanji)
                    word.relevance = relevance
                    true
                else
                    false
                end
            end
        end

        # Return all the words that occur at the begining of the string
        # These are sorted by size with the largest finds given first
        def findWordsThatStart(string)
            kanji = findKanjiThatStart(string)
            reading = findReadingsThatStart(string)
            return kanji + reading
        end
    end
end