lib/perobs/FlatFile.rb in perobs-4.2.0 vs lib/perobs/FlatFile.rb in perobs-4.3.0
- old
+ new
@@ -291,34 +291,36 @@
# @return [String] Raw object data
def read_obj_by_address(addr, id)
header = FlatFileBlobHeader.read(@f, addr, id)
if header.id != id
PEROBS.log.fatal "Database index corrupted: Index for object " +
- "#{id} points to object with ID #{header.id}"
+ "#{id} points to object with ID #{header.id} at address #{addr}"
end
buf = nil
begin
@f.seek(addr + FlatFileBlobHeader::LENGTH)
buf = @f.read(header.length)
rescue IOError => e
- PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
+ PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
+ e.message
end
# Uncompress the data if the compression bit is set in the flags byte.
if header.is_compressed?
begin
buf = Zlib.inflate(buf)
rescue Zlib::BufError, Zlib::DataError
PEROBS.log.fatal "Corrupted compressed block with ID " +
- "#{header.id} found."
+ "#{id} found at address #{addr}."
end
end
if checksum(buf) != header.crc
- PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
+ PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
+ "at address #{addr}"
end
buf
end
@@ -337,11 +339,11 @@
# Clear alls marks.
def clear_all_marks
if @marks
@marks.clear
else
- @marks = IDList.new(@db_dir, 'marks', 8)
+ @marks = IDList.new(@db_dir, 'marks', item_counter)
end
end
# Eliminate all the holes in the file. This is an in-place
# implementation. No additional space will be needed on the file system.
@@ -450,20 +452,18 @@
# Recreate the index file and create an empty space list.
regenerate_index_and_spaces
end
- # Check (and repair) the FlatFile.
- # @param repair [Boolean] True if errors should be fixed.
+ # Check the FlatFile.
# @return [Integer] Number of errors found
- def check(repair = false)
+ def check()
errors = 0
return errors unless @f
t = Time.now
- PEROBS.log.info "Checking FlatFile database" +
- "#{repair ? ' in repair mode' : ''}..."
+ PEROBS.log.info "Checking FlatFile database..."
# First check the database blob file. Each entry should be readable and
# correct and all IDs must be unique. We use a shadow index to keep
# track of the already found IDs.
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
@@ -481,11 +481,10 @@
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
buf = @f.read(header.length)
if buf.bytesize != header.length
PEROBS.log.error "Premature end of file in blob with ID " +
"#{header.id}."
- discard_damaged_blob(header) if repair
errors += 1
next
end
# Uncompress the data if the compression bit is set in the mark
@@ -494,20 +493,18 @@
begin
buf = Zlib.inflate(buf)
rescue Zlib::BufError, Zlib::DataError
PEROBS.log.error "Corrupted compressed block with ID " +
"#{header.id} found."
- discard_damaged_blob(header) if repair
errors += 1
next
end
end
if header.crc && checksum(buf) != header.crc
PEROBS.log.error "Checksum failure while checking blob " +
"with ID #{header.id}"
- discard_damaged_blob(header) if repair
errors += 1
next
end
rescue IOError => e
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
@@ -519,26 +516,10 @@
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
"Addresses: #{previous_address}, #{header.addr}"
errors += 1
previous_header = FlatFileBlobHeader.read(@f, previous_address,
header.id)
- if repair
- # We have two blobs with the same ID and we must discard one of
- # them.
- if header.is_outdated?
- discard_damaged_blob(header)
- elsif previous_header.is_outdated?
- discard_damaged_blob(previous_header)
- else
- PEROBS.log.error "None of the blobs with same ID have " +
- "the outdated flag set. Deleting the smaller one."
- errors += 1
- discard_damaged_blob(header.length < previous_header.length ?
- header : previous_header)
- end
- next
- end
else
# ID is unique so far. Add it to the shadow index.
new_index.insert(header.id, header.addr)
end
end
@@ -551,53 +532,46 @@
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
# The blob file ends with a corrupted blob header.
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
'bytes found at the end of FlatFile.'
corrupted_blobs += 1
- if repair
- PEROBS.log.error "Truncating FlatFile to " +
- "#{end_of_last_healthy_blob} bytes by discarding " +
- "#{@f.size - end_of_last_healthy_blob} bytes"
- @f.truncate(end_of_last_healthy_blob)
- end
end
errors += corrupted_blobs
end
# We no longer need the new index.
new_index.close
new_index.erase
- if repair && corrupted_blobs > 0
- erase_index_files
- defragmentize
- regenerate_index_and_spaces
- elsif corrupted_blobs == 0
+ if corrupted_blobs == 0
# Now we check the index data. It must be correct and the entries must
# match the blob file. All entries in the index must be in the blob file
# and vise versa.
begin
index_ok = @index.check do |id, address|
- has_id_at?(id, address)
+ unless has_id_at?(id, address)
+ PEROBS.log.error "Index contains an entry for " +
+ "ID #{id} at address #{address} that is not in FlatFile"
+ false
+ else
+ true
+ end
end
x_check_errs = 0
space_check_ok = true
unless index_ok && (space_check_ok = @space_list.check(self)) &&
(x_check_errs = cross_check_entries) == 0
errors += 1 unless index_ok && space_check_ok
errors += x_check_errs
- regenerate_index_and_spaces if repair
end
rescue PEROBS::FatalError
errors += 1
- regenerate_index_and_spaces if repair
end
end
- sync if repair
- PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " +
+ PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
"#{errors} errors found."
errors
end
@@ -685,21 +659,11 @@
errors += 1
previous_header = FlatFileBlobHeader.read(@f, previous_address,
header.id)
# We have two blobs with the same ID and we must discard one of
# them.
- if header.is_outdated?
- discard_damaged_blob(header)
- elsif previous_header.is_outdated?
- discard_damaged_blob(previous_header)
- else
- PEROBS.log.error "None of the blobs with same ID have " +
- "the outdated flag set. Deleting the smaller one."
- errors += 1
- discard_damaged_blob(header.length < previous_header.length ?
- header : previous_header)
- end
+ discard_duplicate_blobs(header, previous_header)
else
# ID is unique so far. Add it to the shadow index.
@index.insert(header.id, header.addr)
end
@@ -923,9 +887,26 @@
def discard_damaged_blob(header)
PEROBS.log.error "Discarding corrupted data blob for ID #{header.id} " +
"at offset #{header.addr}"
header.clear_flags
+ end
+
+ def discard_duplicate_blobs(header, previous_header)
+ if header.is_outdated?
+ discard_damaged_blob(header)
+ elsif previous_header.is_outdated?
+ discard_damaged_blob(previous_header)
+ else
+ smaller, larger = header.length < previous_header.length ?
+ [ header, previous_header ] : [ previous_header, header ]
+ PEROBS.log.error "None of the blobs with same ID have " +
+ "the outdated flag set. Deleting the smaller one " +
+ "at address #{smaller.addr}"
+ discard_damaged_blob(smaller)
+ @space_list.add_space(smaller.addr, smaller.length)
+ @index.insert(larger.id, larger.addr)
+ end
end
def open_index_files(abort_on_missing_files = false)
begin
@index.open(abort_on_missing_files)