# encoding: utf-8
require 'Context/Publisher'
require 'kconv'

module JLDrill
    # This class represents a data file in JLDrill.   This
    # is an abstract class meant to define the interface
    # for having a file which can be read in the background
    # in JLDrill.
    class DataFile
        attr_reader :file, :lines, :parsed, :publisher, :stepSize, :encoding
        attr_writer :lines, :stepSize

        def initialize
            @publisher = Context::Publisher.new(self)
            # Default to reporting every 100 lines
            @stepSize = 100
            @encoding = nil
            self.reset
        end

        # Returns the number of items you have created
        def dataSize
            # Please implement this in the concrete class
        end

        # Returns a reference to the object that can parse a line
        def parser
            # Please implement this in the concrete class unless
            # you modify the parseEntry() method to directly access
            # the parser.
        end

        # Resets the file
        def reset
            @file = ""
            @lines = []
            @parsed = 0
            setLoaded(false)
            # Please define the rest of the method and call super()
            # at the end.
        end

        # Sets the filename of the file and resets the data.
        def file=(filename)
            if @file != filename
                @file = filename
            end
        end

        # Indicate to the outside world that the file is loaded
        def setLoaded(bool)
            if bool
                @publisher.update("loaded")
            end
        end

        # Returns true if there is no more data to parse
		def eof?
			return @parsed >= @lines.size
		end

        # Returns true if the we have completed parsing a file
		def loaded?
			return eof? && (dataSize > 0)
		end

        # Returns a float showing the percentage of the file that
        # has been parsed so far.
		def fraction
			retVal = 0.0
			if @lines.size != 0
				retVal = @parsed.to_f / @lines.size.to_f
			end
			return retVal
		end

        # Try to determine the encoding from the first 999 characters
        # of the string.  By keeping it to a multiple of 3 we avoid
        # splitting the encodings for UTF8 strings.  I can't help but
        # think that this function is prone to failure since UTF8 characters
        # are variable length, but I can't think of a better idea.
        # Note this problem will only manifest itself on ruby 1.8
        def findEncoding(buffer)
            encoding = Kconv.guess(buffer[0..998])
            return encoding
        end

        # Make sure the encoding is correct and split the lines
        def createLines(buffer)
            @encoding = findEncoding(buffer)
            if (@encoding != Kconv::UTF8)
                buffer = Kconv.kconv(buffer, Kconv::UTF8, @encoding)
            end
            @lines = buffer.split("\n")
        end

        # Read the file into memory.  This is done before parsing
        def readLines
            begin
                buffer = IO.read(@file)
            rescue
                Context::Log::warning("JLDrill::DataFile",
                                      "Could not load #{@file}.")
                buffer = ""
            end
            createLines(buffer)
            @parsed = 0
        end

        # Load in the file data, but don't parse it yet
		def load(file)
            reset
			@file = file
			readLines
		end

        # Parse the entire file all at once
		def parse
			parseChunk(@lines.size)
		end

        # Parses one entry from the lines.
        # The default parses a single line from the lines.
        # You can override this for files whose entries span more than one line.
        def parseEntry
            parser.parse(@lines[@parsed])
            @parsed += 1
        end

        # Parse a chunk of the file.  Size shows how many entries
        # to parse
		def parseChunk(size)
            # We don't want to get updated when we parse a large block of data
            @publisher.block

			last = @parsed + size
			if last > @lines.size
				last = @lines.size
			end
			while @parsed < last do
                parseEntry
			end
            @publisher.unblock

			# If the parsing is finished dispose of the unparsed lines
			finished = self.eof?
			if finished
                finishParsing
			end

			return finished
		end

        # Usually we want to delete the original source lines when
        # we are finished parsing.  But some files are only
        # partially parsed on reading (like Edict). 
        # Please redefine this if you want to keep the source
        # lines around for some reason.
        def finishParsing
            @lines = []
            @parsed = 0
            setLoaded(true)
        end

        # Returns the filename without the path
        def shortFilename
            if @file.nil? || @file.empty?
                return "No name"
            end
            return File.basename(file)
        end

    end
end