lib/perobs/FlatFile.rb in perobs-4.2.0 vs lib/perobs/FlatFile.rb in perobs-4.3.0

- old
+ new

@@ -291,34 +291,36 @@ # @return [String] Raw object data def read_obj_by_address(addr, id) header = FlatFileBlobHeader.read(@f, addr, id) if header.id != id PEROBS.log.fatal "Database index corrupted: Index for object " + - "#{id} points to object with ID #{header.id}" + "#{id} points to object with ID #{header.id} at address #{addr}" end buf = nil begin @f.seek(addr + FlatFileBlobHeader::LENGTH) buf = @f.read(header.length) rescue IOError => e - PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}" + PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " + + e.message end # Uncompress the data if the compression bit is set in the flags byte. if header.is_compressed? begin buf = Zlib.inflate(buf) rescue Zlib::BufError, Zlib::DataError PEROBS.log.fatal "Corrupted compressed block with ID " + - "#{header.id} found." + "#{id} found at address #{addr}." end end if checksum(buf) != header.crc - PEROBS.log.fatal "Checksum failure while reading blob ID #{id}" + PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " + + "at address #{addr}" end buf end @@ -337,11 +339,11 @@ # Clear alls marks. def clear_all_marks if @marks @marks.clear else - @marks = IDList.new(@db_dir, 'marks', 8) + @marks = IDList.new(@db_dir, 'marks', item_counter) end end # Eliminate all the holes in the file. This is an in-place # implementation. No additional space will be needed on the file system. @@ -450,20 +452,18 @@ # Recreate the index file and create an empty space list. regenerate_index_and_spaces end - # Check (and repair) the FlatFile. - # @param repair [Boolean] True if errors should be fixed. + # Check the FlatFile. # @return [Integer] Number of errors found - def check(repair = false) + def check() errors = 0 return errors unless @f t = Time.now - PEROBS.log.info "Checking FlatFile database" + - "#{repair ? ' in repair mode' : ''}..." + PEROBS.log.info "Checking FlatFile database..." # First check the database blob file. Each entry should be readable and # correct and all IDs must be unique. We use a shadow index to keep # track of the already found IDs. new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER, @@ -481,11 +481,10 @@ @f.seek(header.addr + FlatFileBlobHeader::LENGTH) buf = @f.read(header.length) if buf.bytesize != header.length PEROBS.log.error "Premature end of file in blob with ID " + "#{header.id}." - discard_damaged_blob(header) if repair errors += 1 next end # Uncompress the data if the compression bit is set in the mark @@ -494,20 +493,18 @@ begin buf = Zlib.inflate(buf) rescue Zlib::BufError, Zlib::DataError PEROBS.log.error "Corrupted compressed block with ID " + "#{header.id} found." - discard_damaged_blob(header) if repair errors += 1 next end end if header.crc && checksum(buf) != header.crc PEROBS.log.error "Checksum failure while checking blob " + "with ID #{header.id}" - discard_damaged_blob(header) if repair errors += 1 next end rescue IOError => e PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " + @@ -519,26 +516,10 @@ PEROBS.log.error "Multiple blobs for ID #{header.id} found. " + "Addresses: #{previous_address}, #{header.addr}" errors += 1 previous_header = FlatFileBlobHeader.read(@f, previous_address, header.id) - if repair - # We have two blobs with the same ID and we must discard one of - # them. - if header.is_outdated? - discard_damaged_blob(header) - elsif previous_header.is_outdated? - discard_damaged_blob(previous_header) - else - PEROBS.log.error "None of the blobs with same ID have " + - "the outdated flag set. Deleting the smaller one." - errors += 1 - discard_damaged_blob(header.length < previous_header.length ? - header : previous_header) - end - next - end else # ID is unique so far. Add it to the shadow index. new_index.insert(header.id, header.addr) end end @@ -551,53 +532,46 @@ if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size # The blob file ends with a corrupted blob header. PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " + 'bytes found at the end of FlatFile.' corrupted_blobs += 1 - if repair - PEROBS.log.error "Truncating FlatFile to " + - "#{end_of_last_healthy_blob} bytes by discarding " + - "#{@f.size - end_of_last_healthy_blob} bytes" - @f.truncate(end_of_last_healthy_blob) - end end errors += corrupted_blobs end # We no longer need the new index. new_index.close new_index.erase - if repair && corrupted_blobs > 0 - erase_index_files - defragmentize - regenerate_index_and_spaces - elsif corrupted_blobs == 0 + if corrupted_blobs == 0 # Now we check the index data. It must be correct and the entries must # match the blob file. All entries in the index must be in the blob file # and vise versa. begin index_ok = @index.check do |id, address| - has_id_at?(id, address) + unless has_id_at?(id, address) + PEROBS.log.error "Index contains an entry for " + + "ID #{id} at address #{address} that is not in FlatFile" + false + else + true + end end x_check_errs = 0 space_check_ok = true unless index_ok && (space_check_ok = @space_list.check(self)) && (x_check_errs = cross_check_entries) == 0 errors += 1 unless index_ok && space_check_ok errors += x_check_errs - regenerate_index_and_spaces if repair end rescue PEROBS::FatalError errors += 1 - regenerate_index_and_spaces if repair end end - sync if repair - PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " + + PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " + "#{errors} errors found." errors end @@ -685,21 +659,11 @@ errors += 1 previous_header = FlatFileBlobHeader.read(@f, previous_address, header.id) # We have two blobs with the same ID and we must discard one of # them. - if header.is_outdated? - discard_damaged_blob(header) - elsif previous_header.is_outdated? - discard_damaged_blob(previous_header) - else - PEROBS.log.error "None of the blobs with same ID have " + - "the outdated flag set. Deleting the smaller one." - errors += 1 - discard_damaged_blob(header.length < previous_header.length ? - header : previous_header) - end + discard_duplicate_blobs(header, previous_header) else # ID is unique so far. Add it to the shadow index. @index.insert(header.id, header.addr) end @@ -923,9 +887,26 @@ def discard_damaged_blob(header) PEROBS.log.error "Discarding corrupted data blob for ID #{header.id} " + "at offset #{header.addr}" header.clear_flags + end + + def discard_duplicate_blobs(header, previous_header) + if header.is_outdated? + discard_damaged_blob(header) + elsif previous_header.is_outdated? + discard_damaged_blob(previous_header) + else + smaller, larger = header.length < previous_header.length ? + [ header, previous_header ] : [ previous_header, header ] + PEROBS.log.error "None of the blobs with same ID have " + + "the outdated flag set. Deleting the smaller one " + + "at address #{smaller.addr}" + discard_damaged_blob(smaller) + @space_list.add_space(smaller.addr, smaller.length) + @index.insert(larger.id, larger.addr) + end end def open_index_files(abort_on_missing_files = false) begin @index.open(abort_on_missing_files)