lib/dbf/reader.rb in dbf-0.4.5 vs lib/dbf/reader.rb in dbf-0.4.6
- old
+ new
@@ -1,39 +1,16 @@
module DBF
-
- DBF_HEADER_SIZE = 32
- FPT_HEADER_SIZE = 512
- FPT_BLOCK_HEADER_SIZE = 8
- DATE_REGEXP = /([\d]{4})([\d]{2})([\d]{2})/
- VERSION_DESCRIPTIONS = {
- "02" => "FoxBase",
- "03" => "dBase III without memo file",
- "04" => "dBase IV without memo file",
- "05" => "dBase V without memo file",
- "30" => "Visual FoxPro",
- "31" => "Visual FoxPro with AutoIncrement field",
- "7b" => "dBase IV with memo file",
- "83" => "dBase III with memo file",
- "8b" => "dBase IV with memo file",
- "8e" => "dBase IV with SQL table",
- "f5" => "FoxPro with memo file",
- "fb" => "FoxPro without memo file"
- }
-
- class DBFError < StandardError; end
- class UnpackError < DBFError; end
-
class Reader
attr_reader :field_count
attr_reader :fields
attr_reader :record_count
attr_reader :version
attr_reader :last_updated
attr_reader :memo_file_format
+ attr_reader :memo_block_size
def initialize(file)
-
@data_file = File.open(file, 'rb')
@memo_file = open_memo(file)
reload!
end
@@ -47,11 +24,11 @@
@memo_file ? true : false
end
def open_memo(file)
%w(fpt FPT dbt DBT).each do |extension|
- filename = file.sub(/dbf$/i, extension)
+ filename = file.sub(/#{File.extname(file)[1..-1]}$/, extension)
if File.exists?(filename)
@memo_file_format = extension.downcase.to_sym
return File.open(filename, 'rb')
end
end
@@ -60,95 +37,49 @@
def field(field_name)
@fields.detect {|f| f.name == field_name.to_s}
end
- def memo(start_block)
- @memo_file.rewind
- @memo_file.seek(start_block * @memo_block_size)
- if @memo_file_format == :fpt
- memo_type, memo_size, memo_string = @memo_file.read(@memo_block_size).unpack("NNa56")
- if memo_size > @memo_block_size - FPT_BLOCK_HEADER_SIZE
- memo_string << @memo_file.read(memo_size - @memo_block_size + FPT_BLOCK_HEADER_SIZE)
- end
- else
- if version == "83" # dbase iii
- memo_string = ""
- loop do
- memo_string << block = @memo_file.read(512)
- break if block.strip.size < 512
- end
- elsif version == "8b" # dbase iv
- memo_type, memo_size = @memo_file.read(8).unpack("LL")
- memo_string = @memo_file.read(memo_size)
- end
- end
- memo_string
- end
-
# An array of all the records contained in the database file
def records
seek_to_record(0)
@records ||= Array.new(@record_count) do |i|
if active_record?
- build_record
+ Record.new(self, @data_file, @memo_file)
else
seek_to_record(i + 1)
nil
end
end
end
alias_method :rows, :records
- # Jump to record
+ # Returns the record at <a>index</i> by seeking to the record in the
+ # physical database file. See the documentation for the records method for
+ # information on how these two methods differ.
def record(index)
seek_to_record(index)
- active_record? ? build_record : nil
+ active_record? ? Record.new(self, @data_file, @memo_file) : nil
end
alias_method :row, :record
def version_description
VERSION_DESCRIPTIONS[version]
end
private
+ # Returns false if the record has been marked as deleted, otherwise it returns true. When dBase records are deleted a
+ # flag is set, marking the record as deleted. The record will not be fully removed until the database has been compacted.
def active_record?
@data_file.read(1).unpack('H2').to_s == '20'
rescue
false
end
- def build_record
- record = Record.new
- @fields.each do |field|
- case field.type
- when 'N' # number
- record[field.name] = field.decimal == 0 ? unpack_integer(field) : unpack_float(field) rescue nil
- when 'D' # date
- raw = unpack_string(field).to_s.strip
- unless raw.empty?
- begin
- record[field.name] = Time.gm(*raw.match(DATE_REGEXP).to_a.slice(1,3).map {|n| n.to_i})
- rescue
- record[field.name] = Date.new(*raw.match(DATE_REGEXP).to_a.slice(1,3).map {|n| n.to_i}) rescue nil
- end
- end
- when 'M' # memo
- starting_block = unpack_integer(field)
- record[field.name] = starting_block == 0 ? nil : memo(starting_block) rescue nil
- when 'L' # logical
- record[field.name] = unpack_string(field) =~ /^(y|t)$/i ? true : false rescue false
- else
- record[field.name] = unpack_string(field)
- end
- end
- record
- end
-
def get_header_info
@data_file.rewind
@version, @record_count, @header_length, @record_length = @data_file.read(DBF_HEADER_SIZE).unpack('H2xxxVvv')
@field_count = (@header_length - DBF_HEADER_SIZE + 1) / DBF_HEADER_SIZE
end
@@ -156,11 +87,11 @@
def get_field_descriptors
@fields = []
@field_count.times do
name, type, length, decimal = @data_file.read(32).unpack('a10xax4CC')
if length > 0 && !name.strip.empty?
- @fields << Field.new(name.strip, type, length, decimal)
+ @fields << Field.new(name, type, length, decimal)
end
end
# adjust field count
@field_count = @fields.size
@fields
@@ -179,49 +110,93 @@
def seek(offset)
@data_file.seek(@header_length + offset)
end
def seek_to_record(index)
- seek(@record_length * index)
+ seek(index * @record_length)
end
- def unpack_field(field)
- @data_file.read(field.length).unpack("a#{field.length}")
- end
-
- def unpack_string(field)
- unpack_field(field).to_s
- end
-
- def unpack_integer(field)
- unpack_string(field).to_i
- end
-
- def unpack_float(field)
- unpack_string(field).to_f
- end
-
end
class FieldError < StandardError; end
class Field
- attr_accessor :type, :length, :decimal
+ attr_accessor :name, :type, :length, :decimal
def initialize(name, type, length, decimal)
raise FieldError, "field length must be greater than 0" unless length > 0
- self.name, self.type, self.length, self.decimal = name, type, length, decimal
+ self.name, self.type, self.length, self.decimal = name.strip, type, length, decimal
end
def name=(name)
@name = name.gsub(/\0/, '')
end
-
- def name
- @name
- end
+
end
class Record < Hash
+
+ def initialize(reader, data_file, memo_file)
+ @reader, @data_file, @memo_file = reader, data_file, memo_file
+ reader.fields.each do |field|
+ case field.type
+ when 'N' # number
+ self[field.name] = field.decimal == 0 ? unpack_string(field).to_i : unpack_string(field).to_f
+ when 'D' # date
+ raw = unpack_string(field).strip
+ unless raw.empty?
+ begin
+ self[field.name] = Time.gm(*raw.match(DATE_REGEXP).to_a.slice(1,3).map {|n| n.to_i})
+ rescue
+ self[field.name] = Date.new(*raw.match(DATE_REGEXP).to_a.slice(1,3).map {|n| n.to_i})
+ end
+ end
+ when 'M' # memo
+ starting_block = unpack_string(field).to_i
+ self[field.name] = read_memo(starting_block)
+ when 'L' # logical
+ self[field.name] = unpack_string(field) =~ /^(y|t)$/i ? true : false
+ else
+ self[field.name] = unpack_string(field)
+ end
+ end
+ self
+ end
+
+ def unpack_field(field)
+ @data_file.read(field.length).unpack("a#{field.length}")
+ end
+
+ def unpack_string(field)
+ unpack_field(field).to_s
+ end
+
+ def read_memo(start_block)
+ return nil if start_block == 0
+ @memo_file.seek(start_block * @reader.memo_block_size)
+ if @reader.memo_file_format == :fpt
+ memo_type, memo_size, memo_string = @memo_file.read(@reader.memo_block_size).unpack("NNa56")
+
+ memo_block_content_size = @reader.memo_block_size - FPT_BLOCK_HEADER_SIZE
+ if memo_size > memo_block_content_size
+ memo_string << @memo_file.read(memo_size - @reader.memo_block_size + FPT_BLOCK_HEADER_SIZE)
+ elsif memo_size > 0 and memo_size < memo_block_content_size
+ memo_string = memo_string[0, memo_size]
+ end
+ else
+ case @reader.version
+ when "83" # dbase iii
+ memo_string = ""
+ loop do
+ memo_string << block = @memo_file.read(512)
+ break if block.strip.size < 512
+ end
+ when "8b" # dbase iv
+ memo_type, memo_size = @memo_file.read(8).unpack("LL")
+ memo_string = @memo_file.read(memo_size)
+ end
+ end
+ memo_string
+ end
end
end