lib/jldrill/model/DataFile.rb in jldrill-0.5.1.7 vs lib/jldrill/model/DataFile.rb in jldrill-0.6.0.1
- old
+ new
@@ -1,20 +1,23 @@
+# encoding: utf-8
require 'Context/Publisher'
+require 'kconv'
module JLDrill
# This class represents a data file in JLDrill. This
# is an abstract class meant to define the interface
# for having a file which can be read in the background
# in JLDrill.
class DataFile
- attr_reader :file, :lines, :parsed, :publisher, :stepSize
+ attr_reader :file, :lines, :parsed, :publisher, :stepSize, :encoding
attr_writer :lines, :stepSize
def initialize
@publisher = Context::Publisher.new(self)
# Default to reporting every 100 lines
@stepSize = 100
+ @encoding = nil
self.reset
end
# Returns the number of items you have created
def dataSize
@@ -70,18 +73,39 @@
retVal = @parsed.to_f / @lines.size.to_f
end
return retVal
end
+ # Try to determine the encoding from the first 999 characters
+ # of the string. By keeping it to a multiple of 3 we avoid
+ # splitting the encodings for UTF8 strings. I can't help but
+ # think that this function is prone to failure since UTF8 characters
+ # are variable length, but I can't think of a better idea.
+ # Note this problem will only manifest itself on ruby 1.8
+ def findEncoding(buffer)
+ encoding = Kconv.guess(buffer[0..998])
+ return encoding
+ end
+
+ # Make sure the encoding is correct and split the lines
+ def createLines(buffer)
+ @encoding = findEncoding(buffer)
+ if (@encoding != Kconv::UTF8)
+ buffer = Kconv.kconv(buffer, Kconv::UTF8, @encoding)
+ end
+ @lines = buffer.split("\n")
+ end
+
# Read the file into memory. This is done before parsing
def readLines
begin
- @lines = IO.readlines(@file)
+ buffer = IO.read(@file)
rescue
Context::Log::warning("JLDrill::DataFile",
"Could not load #{@file}.")
- @lines = []
+ buffer = ""
end
+ createLines(buffer)
@parsed = 0
end
# Load in the file data, but don't parse it yet
def load(file)