lib/perobs/FlatFileBlobHeader.rb in perobs-4.0.0 vs lib/perobs/FlatFileBlobHeader.rb in perobs-4.1.0

- old
+ new

@@ -46,16 +46,17 @@ class FlatFileBlobHeader # The 'pack()' format of the header. FORMAT = 'CQQL' # The length of the header in bytes. - LENGTH = 21 + LENGTH = 25 VALID_FLAG_BIT = 0 COMPRESSED_FLAG_BIT = 2 OUTDATED_FLAG_BIT = 3 attr_reader :addr, :flags, :length, :id, :crc + attr_accessor :corruption_start # Create a new FlatFileBlobHeader with the given flags, length, id and crc. # @param file [File] the FlatFile that contains the header # @param addr [Integer] the offset address of the header in the file # @param flags [Integer] 8 bit number, see above @@ -67,81 +68,135 @@ @addr = addr @flags = flags @length = length @id = id @crc = crc + # This is only set if the header is preceded by a corrupted blob. + @corruption_start = nil end # Read the header from the given File. # @param file [File] - # @return FlatFileBlobHeader - def FlatFileBlobHeader::read(file) - begin - addr = file.pos - buf = file.read(LENGTH) - rescue IOError => e - PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}" - return nil - end + # @param addr [Integer] address in the file to start reading. If no + # address is specified use the current position in the file. + # @param id [Integer] Optional ID that the header should have. If no id is + # specified there is no check against the actual ID done. + # @return FlatFileBlobHeader or nil if there are no more blobs to read in + # the file. + def FlatFileBlobHeader::read(file, addr = nil, id = nil) + # If an address was specified we expect the read to always succeed. If + # no address is specified and we can't read the header we generate an + # error message but it is not fatal. + errors_are_fatal = !addr.nil? - return nil unless buf + mode = :searching_next_header + addr = file.pos unless addr + buf = nil + corruption_start = nil - if buf.length != LENGTH - PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " + - "bytes of #{LENGTH} could be read" - return nil - end + loop do + buf_with_crc = nil + begin + file.seek(addr) + buf_with_crc = file.read(LENGTH) + rescue IOError => e + if errors_are_fatal + PEROBS.log.fatal "Cannot read blob header in flat file DB at " + + "address #{addr}: #{e.message}" + else + PEROBS.log.error "Cannot read blob header in flat file DB: " + + e.message + return nil + end + end - FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT)) - end + # Did we read anything? + if buf_with_crc.nil? + if errors_are_fatal + PEROBS.log.fatal "Cannot read blob header " + + "#{id ? "for ID #{id} " : ''}at address #{addr}" + else + # We have reached the end of the file. + return nil + end + end - # Read the header from the given File. - # @param file [File] - # @param addr [Integer] address in the file to start reading - # @param id [Integer] Optional ID that the header should have - # @return FlatFileBlobHeader - def FlatFileBlobHeader::read_at(file, addr, id = nil) - buf = nil - begin - file.seek(addr) - buf = file.read(LENGTH) - rescue IOError => e - PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}" + # Did we get the full header? + if buf_with_crc.length != LENGTH + PEROBS.log.error "Incomplete FlatFileBlobHeader: Only " + + "#{buf_with_crc.length} " + + "bytes of #{LENGTH} could be read " + "#{id ? "for ID #{id} " : ''}at address #{addr}" + return nil + end + + # Check the CRC of the header + buf = buf_with_crc[0..-5] + crc = buf_with_crc[-4..-1].unpack('L')[0] + + if (read_crc = Zlib.crc32(buf, 0)) == crc + # We have found a valid header. + if corruption_start + PEROBS.log.error "FlatFile corruption ends at #{addr}. " + + "#{addr - corruption_start} bytes skipped. Some data may " + + "not be recoverable." + end + break + else + if errors_are_fatal + PEROBS.log.fatal "FlatFile Header CRC mismatch at address " + + "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " + + "#{'%08x' % crc}." + else + if corruption_start.nil? + PEROBS.log.error "FlatFile corruption found. The FlatFile " + + "Header CRC mismatch at address #{addr}. Header CRC is " + + "#{'%08x' % read_crc} but should be #{'%08x' % crc}. Trying " + + "to find the next header." + corruption_start = addr + end + # The blob file is corrupted. There is no valid header at the + # current position in the file. We now try to find the next valid + # header by iterating over the remainder of the file advanding one + # byte with each step until we hit the end of the file or find the + # next valid header. + addr += 1 + end + end end - if buf.nil? || buf.length != LENGTH - PEROBS.log.fatal "Cannot read blob header " + - "#{id ? "for ID #{id} " : ''}at address " + - "#{addr}" - end + header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT)) + if corruption_start + header.corruption_start = corruption_start + end + if id && header.id != id PEROBS.log.fatal "Mismatch between FlatFile index and blob file " + "found. FlatFile has entry with ID #{header.id} at address " + "#{addr}. Index has ID #{id} for this address." end return header end # Write the header to a given File. - # @param file [File] def write begin + buf = [ @flags, @length, @id, @crc].pack(FORMAT) + crc = Zlib.crc32(buf, 0) @file.seek(@addr) - @file.write([ @flags, @length, @id, @crc].pack(FORMAT)) + @file.write(buf + [ crc ].pack('L')) rescue IOError => e PEROBS.log.fatal "Cannot write blob header into flat file DB: " + e.message end end # Reset all the flags bit to 0. This marks the blob as invalid. - # @param file [File] The file handle of the blob file. - # @param addr [Integer] The address of the header def clear_flags @flags = 0 - write_flags + write end # Return true if the header is for a non-empty blob. def is_valid? bit_set?(VALID_FLAG_BIT) @@ -154,29 +209,18 @@ # Set the outdated bit. The entry will be invalid as soon as the current # transaction has been completed. def set_outdated_flag set_flag(OUTDATED_FLAG_BIT) - write_flags + write end # Return true if the blob contains outdated data. def is_outdated? bit_set?(OUTDATED_FLAG_BIT) end private - - def write_flags - begin - @file.seek(@addr) - @file.write([ @flags ].pack('C')) - @file.flush - rescue IOError => e - PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " + - "failed: #{e.message}" - end - end def bit_set?(n) mask = 1 << n @flags & mask == mask end