lib/perobs/FlatFileBlobHeader.rb in perobs-4.0.0 vs lib/perobs/FlatFileBlobHeader.rb in perobs-4.1.0
- old
+ new
@@ -46,16 +46,17 @@
class FlatFileBlobHeader
# The 'pack()' format of the header.
FORMAT = 'CQQL'
# The length of the header in bytes.
- LENGTH = 21
+ LENGTH = 25
VALID_FLAG_BIT = 0
COMPRESSED_FLAG_BIT = 2
OUTDATED_FLAG_BIT = 3
attr_reader :addr, :flags, :length, :id, :crc
+ attr_accessor :corruption_start
# Create a new FlatFileBlobHeader with the given flags, length, id and crc.
# @param file [File] the FlatFile that contains the header
# @param addr [Integer] the offset address of the header in the file
# @param flags [Integer] 8 bit number, see above
@@ -67,81 +68,135 @@
@addr = addr
@flags = flags
@length = length
@id = id
@crc = crc
+ # This is only set if the header is preceded by a corrupted blob.
+ @corruption_start = nil
end
# Read the header from the given File.
# @param file [File]
- # @return FlatFileBlobHeader
- def FlatFileBlobHeader::read(file)
- begin
- addr = file.pos
- buf = file.read(LENGTH)
- rescue IOError => e
- PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}"
- return nil
- end
+ # @param addr [Integer] address in the file to start reading. If no
+ # address is specified use the current position in the file.
+ # @param id [Integer] Optional ID that the header should have. If no id is
+ # specified there is no check against the actual ID done.
+ # @return FlatFileBlobHeader or nil if there are no more blobs to read in
+ # the file.
+ def FlatFileBlobHeader::read(file, addr = nil, id = nil)
+ # If an address was specified we expect the read to always succeed. If
+ # no address is specified and we can't read the header we generate an
+ # error message but it is not fatal.
+ errors_are_fatal = !addr.nil?
- return nil unless buf
+ mode = :searching_next_header
+ addr = file.pos unless addr
+ buf = nil
+ corruption_start = nil
- if buf.length != LENGTH
- PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " +
- "bytes of #{LENGTH} could be read"
- return nil
- end
+ loop do
+ buf_with_crc = nil
+ begin
+ file.seek(addr)
+ buf_with_crc = file.read(LENGTH)
+ rescue IOError => e
+ if errors_are_fatal
+ PEROBS.log.fatal "Cannot read blob header in flat file DB at " +
+ "address #{addr}: #{e.message}"
+ else
+ PEROBS.log.error "Cannot read blob header in flat file DB: " +
+ e.message
+ return nil
+ end
+ end
- FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
- end
+ # Did we read anything?
+ if buf_with_crc.nil?
+ if errors_are_fatal
+ PEROBS.log.fatal "Cannot read blob header " +
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
+ else
+ # We have reached the end of the file.
+ return nil
+ end
+ end
- # Read the header from the given File.
- # @param file [File]
- # @param addr [Integer] address in the file to start reading
- # @param id [Integer] Optional ID that the header should have
- # @return FlatFileBlobHeader
- def FlatFileBlobHeader::read_at(file, addr, id = nil)
- buf = nil
- begin
- file.seek(addr)
- buf = file.read(LENGTH)
- rescue IOError => e
- PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
+ # Did we get the full header?
+ if buf_with_crc.length != LENGTH
+ PEROBS.log.error "Incomplete FlatFileBlobHeader: Only " +
+ "#{buf_with_crc.length} " +
+ "bytes of #{LENGTH} could be read "
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
+ return nil
+ end
+
+ # Check the CRC of the header
+ buf = buf_with_crc[0..-5]
+ crc = buf_with_crc[-4..-1].unpack('L')[0]
+
+ if (read_crc = Zlib.crc32(buf, 0)) == crc
+ # We have found a valid header.
+ if corruption_start
+ PEROBS.log.error "FlatFile corruption ends at #{addr}. " +
+ "#{addr - corruption_start} bytes skipped. Some data may " +
+ "not be recoverable."
+ end
+ break
+ else
+ if errors_are_fatal
+ PEROBS.log.fatal "FlatFile Header CRC mismatch at address " +
+ "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " +
+ "#{'%08x' % crc}."
+ else
+ if corruption_start.nil?
+ PEROBS.log.error "FlatFile corruption found. The FlatFile " +
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}. Trying " +
+ "to find the next header."
+ corruption_start = addr
+ end
+ # The blob file is corrupted. There is no valid header at the
+ # current position in the file. We now try to find the next valid
+ # header by iterating over the remainder of the file advanding one
+ # byte with each step until we hit the end of the file or find the
+ # next valid header.
+ addr += 1
+ end
+ end
end
- if buf.nil? || buf.length != LENGTH
- PEROBS.log.fatal "Cannot read blob header " +
- "#{id ? "for ID #{id} " : ''}at address " +
- "#{addr}"
- end
+
header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
+ if corruption_start
+ header.corruption_start = corruption_start
+ end
+
if id && header.id != id
PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
"found. FlatFile has entry with ID #{header.id} at address " +
"#{addr}. Index has ID #{id} for this address."
end
return header
end
# Write the header to a given File.
- # @param file [File]
def write
begin
+ buf = [ @flags, @length, @id, @crc].pack(FORMAT)
+ crc = Zlib.crc32(buf, 0)
@file.seek(@addr)
- @file.write([ @flags, @length, @id, @crc].pack(FORMAT))
+ @file.write(buf + [ crc ].pack('L'))
rescue IOError => e
PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
e.message
end
end
# Reset all the flags bit to 0. This marks the blob as invalid.
- # @param file [File] The file handle of the blob file.
- # @param addr [Integer] The address of the header
def clear_flags
@flags = 0
- write_flags
+ write
end
# Return true if the header is for a non-empty blob.
def is_valid?
bit_set?(VALID_FLAG_BIT)
@@ -154,29 +209,18 @@
# Set the outdated bit. The entry will be invalid as soon as the current
# transaction has been completed.
def set_outdated_flag
set_flag(OUTDATED_FLAG_BIT)
- write_flags
+ write
end
# Return true if the blob contains outdated data.
def is_outdated?
bit_set?(OUTDATED_FLAG_BIT)
end
private
-
- def write_flags
- begin
- @file.seek(@addr)
- @file.write([ @flags ].pack('C'))
- @file.flush
- rescue IOError => e
- PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " +
- "failed: #{e.message}"
- end
- end
def bit_set?(n)
mask = 1 << n
@flags & mask == mask
end