FlatFile.rb in perobs-4.3.0

- old
+ new

@@ -291,34 +291,36 @@
     # @return [String] Raw object data
     def read_obj_by_address(addr, id)
       header = FlatFileBlobHeader.read(@f, addr, id)
       if header.id != id
         PEROBS.log.fatal "Database index corrupted: Index for object " +
-          "#{id} points to object with ID #{header.id}"
+          "#{id} points to object with ID #{header.id} at address #{addr}"
       end
 
       buf = nil
 
       begin
         @f.seek(addr + FlatFileBlobHeader::LENGTH)
         buf = @f.read(header.length)
       rescue IOError => e
-        PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
+        PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
+          e.message
       end
 
       # Uncompress the data if the compression bit is set in the flags byte.
       if header.is_compressed?
         begin
           buf = Zlib.inflate(buf)
         rescue Zlib::BufError, Zlib::DataError
           PEROBS.log.fatal "Corrupted compressed block with ID " +
-            "#{header.id} found."
+            "#{id} found at address #{addr}."
         end
       end
 
       if checksum(buf) != header.crc
-        PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
+        PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
+          "at address #{addr}"
       end
 
       buf
     end
 
@@ -337,11 +339,11 @@
     # Clear alls marks.
     def clear_all_marks
       if @marks
         @marks.clear
       else
-        @marks = IDList.new(@db_dir, 'marks', 8)
+        @marks = IDList.new(@db_dir, 'marks', item_counter)
       end
     end
 
     # Eliminate all the holes in the file. This is an in-place
     # implementation. No additional space will be needed on the file system.
@@ -450,20 +452,18 @@
 
       # Recreate the index file and create an empty space list.
       regenerate_index_and_spaces
     end
 
-    # Check (and repair) the FlatFile.
-    # @param repair [Boolean] True if errors should be fixed.
+    # Check the FlatFile.
     # @return [Integer] Number of errors found
-    def check(repair = false)
+    def check()
       errors = 0
       return errors unless @f
 
       t = Time.now
-      PEROBS.log.info "Checking FlatFile database" +
-        "#{repair ? ' in repair mode' : ''}..."
+      PEROBS.log.info "Checking FlatFile database..."
 
       # First check the database blob file. Each entry should be readable and
       # correct and all IDs must be unique. We use a shadow index to keep
       # track of the already found IDs.
       new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
@@ -481,11 +481,10 @@
               @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
               buf = @f.read(header.length)
               if buf.bytesize != header.length
                 PEROBS.log.error "Premature end of file in blob with ID " +
                   "#{header.id}."
-                discard_damaged_blob(header) if repair
                 errors += 1
                 next
               end
 
               # Uncompress the data if the compression bit is set in the mark
@@ -494,20 +493,18 @@
                 begin
                   buf = Zlib.inflate(buf)
                 rescue Zlib::BufError, Zlib::DataError
                   PEROBS.log.error "Corrupted compressed block with ID " +
                     "#{header.id} found."
-                  discard_damaged_blob(header) if repair
                   errors += 1
                   next
                 end
               end
 
               if header.crc && checksum(buf) != header.crc
                 PEROBS.log.error "Checksum failure while checking blob " +
                   "with ID #{header.id}"
-                discard_damaged_blob(header) if repair
                 errors += 1
                 next
               end
             rescue IOError => e
               PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
@@ -519,26 +516,10 @@
               PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
                 "Addresses: #{previous_address}, #{header.addr}"
               errors += 1
               previous_header = FlatFileBlobHeader.read(@f, previous_address,
                                                         header.id)
-              if repair
-                # We have two blobs with the same ID and we must discard one of
-                # them.
-                if header.is_outdated?
-                  discard_damaged_blob(header)
-                elsif previous_header.is_outdated?
-                  discard_damaged_blob(previous_header)
-                else
-                  PEROBS.log.error "None of the blobs with same ID have " +
-                    "the outdated flag set. Deleting the smaller one."
-                  errors += 1
-                  discard_damaged_blob(header.length < previous_header.length ?
-                                       header : previous_header)
-                end
-                next
-              end
             else
               # ID is unique so far. Add it to the shadow index.
               new_index.insert(header.id, header.addr)
             end
           end
@@ -551,53 +532,46 @@
         if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
           # The blob file ends with a corrupted blob header.
           PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
             'bytes found at the end of FlatFile.'
           corrupted_blobs += 1
-          if repair
-            PEROBS.log.error "Truncating FlatFile to " +
-              "#{end_of_last_healthy_blob} bytes by discarding " +
-              "#{@f.size - end_of_last_healthy_blob} bytes"
-            @f.truncate(end_of_last_healthy_blob)
-          end
         end
 
         errors += corrupted_blobs
       end
 
       # We no longer need the new index.
       new_index.close
       new_index.erase
 
-      if repair && corrupted_blobs > 0
-        erase_index_files
-        defragmentize
-        regenerate_index_and_spaces
-      elsif corrupted_blobs == 0
+      if corrupted_blobs == 0
         # Now we check the index data. It must be correct and the entries must
         # match the blob file. All entries in the index must be in the blob file
         # and vise versa.
         begin
           index_ok = @index.check do |id, address|
-            has_id_at?(id, address)
+            unless has_id_at?(id, address)
+              PEROBS.log.error "Index contains an entry for " +
+                "ID #{id} at address #{address} that is not in FlatFile"
+              false
+            else
+              true
+            end
           end
           x_check_errs = 0
           space_check_ok = true
           unless index_ok && (space_check_ok = @space_list.check(self)) &&
             (x_check_errs = cross_check_entries) == 0
             errors += 1 unless index_ok && space_check_ok
             errors += x_check_errs
-            regenerate_index_and_spaces if repair
           end
         rescue PEROBS::FatalError
           errors += 1
-          regenerate_index_and_spaces if repair
         end
       end
 
-      sync if repair
-      PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " +
+      PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
         "#{errors} errors found."
 
       errors
     end
 
@@ -685,21 +659,11 @@
               errors += 1
               previous_header = FlatFileBlobHeader.read(@f, previous_address,
                                                         header.id)
               # We have two blobs with the same ID and we must discard one of
               # them.
-              if header.is_outdated?
-                discard_damaged_blob(header)
-              elsif previous_header.is_outdated?
-                discard_damaged_blob(previous_header)
-              else
-                PEROBS.log.error "None of the blobs with same ID have " +
-                  "the outdated flag set. Deleting the smaller one."
-                errors += 1
-                discard_damaged_blob(header.length < previous_header.length ?
-                                     header : previous_header)
-              end
+              discard_duplicate_blobs(header, previous_header)
             else
               # ID is unique so far. Add it to the shadow index.
               @index.insert(header.id, header.addr)
             end
 
@@ -923,9 +887,26 @@
 
     def discard_damaged_blob(header)
       PEROBS.log.error "Discarding corrupted data blob for ID #{header.id} " +
         "at offset #{header.addr}"
       header.clear_flags
+    end
+
+    def discard_duplicate_blobs(header, previous_header)
+      if header.is_outdated?
+        discard_damaged_blob(header)
+      elsif previous_header.is_outdated?
+        discard_damaged_blob(previous_header)
+      else
+        smaller, larger = header.length < previous_header.length ?
+          [ header, previous_header ] : [ previous_header, header ]
+        PEROBS.log.error "None of the blobs with same ID have " +
+          "the outdated flag set. Deleting the smaller one " +
+          "at address #{smaller.addr}"
+        discard_damaged_blob(smaller)
+        @space_list.add_space(smaller.addr, smaller.length)
+        @index.insert(larger.id, larger.addr)
+      end
     end
 
     def open_index_files(abort_on_missing_files = false)
       begin
         @index.open(abort_on_missing_files)