module Amp module Repositories ## # This module adds verification to Mercurial repositories. # # The main public method provided is #verify. The rest are support methods that # will keep to themselves. # # This is directly ported from verify.py from the Mercurial source. This is for # the simple reason that, because we are re-implementing Mercurial, we should # rely on their verification over our own. If we discover bugs in their # verification, we'll patch them and send in the patches to selenic, but for now, we'll # trust that theirs is on the money. module Verification ## # Runs a verification sweep on the repository. # # @return [VerificationResult] the results of the verification, which # includes error messages, warning counts, and so on. def verify result = Verifier.new(self).verify end ## # Handles all logic for verifying a single repository and collecting the results. # # Public interface: initialize with a repository and run #verify. class Verifier attr_accessor :repository alias_method :repo, :repository attr_reader :changelog, :manifest ## # Creates a new Verifier. The Verifier can verify a Mercurial repository. # # @param [Repository] repo the repository this verifier will examine def initialize(repo) @repository = repo @result = VerificationResult.new(0, 0, 0, 0, 0) @bad_revisions = {} @changelog = repo.changelog @manifest = repo.manifest end ## # Runs a verification sweep on the repository this verifier is handling. # # @return [VerificationResult] the results of the verification, which # includes error messages, warning counts, and so on. def verify # Maps manifest node IDs to the link revision to which they belong manifest_linkrevs = Hash.new {|h,k| h[k] = []} # Maps filenames to a list of link revisions (global revision #s) in which # that file was changed file_linkrevs = Hash.new {|h, k| h[k] = []} # file_node_ids stores a hash for each file. The hash stored maps that file's node IDs # (the node stored in the file log itself) to the global "link revision index" - the # revision index in the changelog (and the one the user always sees) file_node_ids = Hash.new {|h, k| h[k] = {}} verify_changelog(manifest_linkrevs, file_linkrevs) verify_manifest(manifest_linkrevs, file_node_ids) verify_crosscheck(manifest_linkrevs, file_linkrevs, file_node_ids) UI.status("checking files") store_files = verify_store verify_files(file_linkrevs, file_node_ids, store_files) @result end ## # Verifies the changelog. Updates acceptable file_linkrevs and manifest_linkrevs # along the way, since the changelog knows which files have been changed when, # and which manifest entries go with which changelog entries. # # @param [Hash] manifest_linkrevs the mapping between manifest node IDs and changelog # revision numbers # @param [Hash] file_linkrevs a mapping between filenames and a list of changelog # revision numbers where the file was modified, added, or deleted. def verify_changelog(manifest_linkrevs, file_linkrevs) Amp::UI.status("checking changelog...") check_revlog(@changelog, "changelog") seen = {} # can't use the nice #each because it assumes functioning changelog and whatnot @changelog.size.times do |idx| node = @changelog.node_id_for_index idx check_entry(@changelog, idx, node, seen, [idx], "changelog") begin changelog_entry = @changelog.read(node) manifest_linkrevs[changelog_entry.first] << idx changelog_entry[3].each {|f| file_linkrevs[f] << idx} rescue Exception => err exception(idx, "unpacking changeset #{node.short_hex}:", err, "changelog") end end @result.changesets = @changelog.size end ## # Verifies the manifest and its nodes. Also updates file_node_ids to store the # node ID of files at given points in the manifest's history. # # @param [Hash] manifest_linkrevs the mapping between manifest node IDs and changelog # revision numbers # @param [Hash] file_node_ids maps filenames to a mapping from file node IDs to global # link revisions. def verify_manifest(manifest_linkrevs, file_node_ids) Amp::UI.status("checking manifests...") check_revlog(@manifest, "manifest") seen = {} @manifest.size.times do |idx| node = @manifest.node_id_for_index idx link_rev = check_entry(@manifest, idx, node, seen, manifest_linkrevs[node], "manifest") manifest_linkrevs.delete node begin @manifest.read_delta(node).each do |filename, file_node| if filename.empty? error(link_rev, "file without name in manifest") elsif filename != "/dev/null" file_node_map = file_node_ids[filename] file_node_map[file_node] ||= idx end end rescue Exception => err exception(idx, "reading manfiest delta #{node.short_hex}", err, "manifest") end end end ## # Crosschecks the changelog agains the manifest and vice-versa. There should be no # remaining unmatched manifest node IDs, nor any files not in file_node_map. # A few other checks, too. # # @param [Hash] manifest_linkrevs the mapping between manifest node IDs and changelog # revision numbers # @param [Hash] file_linkrevs a mapping between filenames and a list of changelog # revision numbers where the file was modified, added, or deleted. # @param [Hash] file_node_ids maps filenames to a mapping from file node IDs to global # link revisions. def verify_crosscheck(manifest_linkrevs, file_linkrevs, file_node_ids) Amp::UI.status("crosschecking files in changesets and manifests") # Check for node IDs found in the changelog, but not the manifest if @manifest.any? # check for any manifest node IDs we found in changesets, but not in the manifest manifest_linkrevs.map {|node, idx| [idx, node]}.sort.each do |idx, node| error(idx, "changeset refers to unknown manifest #{node.short_hex}") end # check for any file node IDs we found in the changeset, but not in the manifest file_linkrevs.sort.each do |file, _| if file_node_ids[file].empty? error(file_linkrevs[file].first, "in changeset but not in manifest", file) end end end # Check for node IDs found in the manifest, but not the changelog. if @changelog.any? file_node_ids.map {|file,_| file}.sort.each do |file| unless file_linkrevs[file] begin filelog = @repository.file(file) link_rev = file_node_ids[file].map {|node| filelog.link_revision_for_index(filelog.revision_index_for_node(node))}.min rescue link_rev = nil end error(link_rev, "in manifest but not in changeset", file) end end end end ## # Verifies the store, and returns a hash with names of files that are OK # # @return [Hash<String => Boolean>] a hash with filenames as keys and "true" or "false" # as values, indicating whether the file exists and is accessible def verify_store store_files = {} @repository.store.datafiles.each do |file, encoded_filename, size| if file.nil? || file.empty? error(nil, "can't decode filename from store: #{encoded_filename}") elsif size > 0 store_files[file] = true end end store_files end ## # Verifies the individual file logs one by one. # # @param [Hash] file_linkrevs a mapping between filenames and a list of changelog # revision numbers where the file was modified, added, or deleted. # @param [Hash] file_node_ids maps filenames to a mapping from file node IDs to global # link revisions. # @param [Hash] store_files a mapping keeping track of which file logs are in the store def verify_files(file_linkrevs, file_node_ids, store_files) files = (file_node_ids.keys + file_linkrevs.keys).uniq.sort @result.files = files.size files.each do |file| link_rev = file_linkrevs[file].first begin file_log = @repository.file(file) rescue Exception => err error(link_rev, "broken revlog! (#{err})", file) next end file_log.files.each do |ff| unless store_files.delete(ff) error(link_rev, "missing revlog!", ff) end end verify_filelog(file, file_log, file_linkrevs, file_node_ids) end end ## # Verifies a single file log. This is a complicated process - we need to cross- # check a lot of data, which is why this has been extracted into its own method. # # @param [String] filename the name of the file we're verifying # @param [FileLog] file_log the file log we're verifying # @param [Hash] file_linkrevs a mapping between filenames and a list of changelog # revision numbers where the file was modified, added, or deleted. # @param [Hash] file_node_ids maps filenames to a mapping from file node IDs to global # link revisions. def verify_filelog(file, file_log, file_linkrevs, file_node_ids) check_revlog(file_log, file) seen = {} file_log.index_size.times do |idx| @result.revisions += 1 node = file_log.node_id_for_index(idx) link_rev = check_entry(file_log, idx, node, seen, file_linkrevs[file], file) # Make sure that one of the manifests referenced the node ID. If not, one of our # manifests is wrong! if file_node_ids[file] if @manifest.any? && !file_node_ids[file][node] error(link_rev, "#{node.short_hex} not found in manifests", file) else file_node_ids[file].delete node end end # Make sure the size of the uncompressed file is correct. begin text = file_log.read node rename_info = file_log.renamed? node if text.size != file_log.uncompressed_size_for_index(idx) if file_log.decompress_revision(node).size != file_log.uncompressed_size_for_index(idx) error(link_rev, "unpacked size is #{text.size}, #{file_log.size(idx)} expected", file) end end rescue Exception => err exception(link_rev, "unpacking #{node.short_hex}", err, file) end # Check if we screwed up renaming a file (like lost the source revlog or something) begin if rename_info && rename_info.any? filelog_src = @repository.file(rename_info.first) if filelog_src.index_size == 0 error(link_rev, "empty or missing copy source revlog "+ "#{rename_info[0]}, #{rename_info[1].short_hex}", file) elsif rename_info[1] == RevlogSupport::Node::NULL_ID warn("#{file}@#{link_rev}: copy source revision is NULL_ID "+ "#{rename_info[0]}:#{rename_info[1].short_hex}", file) else rev = filelog_src.revision_index_for_node(rename_info[1]) end end rescue Exception => err exception(link_rev, "checking rename of #{node.short_hex}", err, file) end end # Final cross-check if file_node_ids[file] && file_node_ids[file].any? file_node_ids[file].map { |node, link_rev| [@manifest.link_revision_for_index(link_rev), node] }.sort.each do |link_rev, node| error(link_rev, "#{node.short_hex} in manifests not found", file) end end end private ## # Checks a revlog for inconsistencies with the main format, such as # having trailing bytes or incorrect formats # # @param [Revlog] log the log we will be verifying # @param [String] name the name of the file this log is stored in def check_revlog(log, name) #p name if log.empty? && (@changelog.any? || @revlog.any?) return error(0, "#{name} is empty or missing") end size_diffs = log.checksize # checksize returns a hash with these keys: index_diff, data_diff if size_diffs[:data_diff] != 0 error(nil, "data size off by #{size_diffs[:data_diff]} bytes", name) end if size_diffs[:index_diff] != 0 error(nil, "index off by #{size_diffs[:index_diff]} bytes", name) end v0 = RevlogSupport::Support::REVLOG_VERSION_0 if log.index.version != v0 warn("#{name} uses revlog format 1. changelog uses format 0.") if @changelog.index.version == v0 elsif log.index.version == v0 warn("#{name} uses revlog format 0. that's really old.") end end ## # Checks a single entry in a revision log for inconsistencies. # # @param [Revlog] log the revision log we're examining # @param [Fixnum] revision the index # of the revision being examined # @param [String] node the node ID of the revision being examined # @param [Hash] seen the list of node IDs we've already seen # @param [Array] ok_link_revisions the acceptable link revisions for the given entry # @param [String] filename the name of the file containing the revlog def check_entry(log, revision, node, seen, ok_link_revisions, filename) link_rev = log.link_revision_for_index log.revision_index_for_node(node) # is the link_revision invalid? if link_rev < 0 || (changelog.any? && ! ok_link_revisions.include?(link_rev)) problem = (link_rev < 0 || link_rev >= changelog.size) ? "nonexistent" : "unexpected" error(nil, "revision #{revision} points to #{problem} changeset #{link_rev}", filename) if ok_link_revisions.any? warn("(expected #{ok_link_revisions.join(" ")})") end link_rev = nil # don't use this link_revision, because it's clearly wrong. end begin log.parents_for_node(node).each do |parent| if !seen[parent] && parent != RevlogSupport::Node::NULL_ID error(link_rev, "unknown parent #{parent.short_hex} of #{node.short_hex}", filename) end end rescue StandardError => e # TODO: do real exception handling exception(link_rev, "error checking parents of #{node.short_hex}: ", e, filename) end if seen[node] error(link_rev, "duplicate revision #{revision} (#{seen[node]})", filename) end seen[node] = revision return link_rev end ## # Produce an error based on an exception. Matches mercurial's. # # @param [Fixnum] revision the link-revision the error is associated with # @param [String, #to_s] message the message to print with the error # @param [Exception] exception the exception that raised this error # @param [String, #to_s] filename (nil) the name of the file with an error. # nil for changelog/manifest def exception(revision, message, exception, filename) if exception.kind_of?(Interrupt) UI.warn("interrupted") raise end error(revision, "#{message} #{exception}\n", filename) end ## # Produce an error that looks like Mercurial's # meh compatibility makes me sad # # @param [Fixnum] revision the link-revision the error is associated with # @param [String, #to_s] message the message to print with the error # @param [String, #to_s] filename (nil) the name of the file with an error. # nil for changelog/manifest def error(revision, message, filename = nil) if revision @bad_revisions[revision] = true else revision = "?" end new_message = "#{revision}: #{message}" new_message = "#{filename}@#{new_message}" if filename UI.say new_message @result.errors += 1 end ## # Adds a warning to the results # # @param [String, #to_s] message the user's warning def warn(message) UI.say "warning: #{message}" @result.warnings += 1 end end ## # Simple struct that handles the results of a verification. class VerificationResult < Struct.new(:warnings, :errors, :revisions, :files, :changesets) def initialize(*args) super(*args) @warnings = 0 @errors = 0 @revisions = 0 @files = 0 @changesets = 0 end end end end end