# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object require_relative 'encoding_helper' require 'tempfile' require "rubygems/package" module Gitlab module Git class Repository include Gitlab::Git::Popen class NoRepository < StandardError; end # Default branch in the repository attr_accessor :root_ref # Full path to repo attr_reader :path # Directory name of repo attr_reader :name # Rugged repo object attr_reader :rugged # 'path' must be the path to a _bare_ git repository, e.g. # /path/to/my-repo.git def initialize(path) @path = path @name = path.split("/").last @root_ref = discover_default_branch end # Alias to old method for compatibility def raw rugged end def rugged @rugged ||= Rugged::Repository.new(path) rescue Rugged::RepositoryError, Rugged::OSError raise NoRepository.new('no repository for such path') end # Returns an Array of branch names # sorted by name ASC def branch_names branches.map(&:name) end # Returns an Array of Branches def branches rugged.branches.map do |rugged_ref| Branch.new(rugged_ref.name, rugged_ref.target) end.sort_by(&:name) end # Returns an Array of tag names def tag_names rugged.tags.map { |t| t.name } end # Returns an Array of Tags def tags rugged.references.each("refs/tags/*").map do |ref| message = nil if ref.target.is_a?(Rugged::Tag::Annotation) && ref.target.target.is_a?(Rugged::Commit) unless ref.target.target.message == ref.target.message message = ref.target.message.chomp end end Tag.new(ref.name, ref.target, message) end.sort_by(&:name) end # Returns an Array of branch and tag names def ref_names branch_names + tag_names end # Deprecated. Will be removed in 5.2 def heads rugged.references.each("refs/heads/*").map do |head| Gitlab::Git::Ref.new(head.name, head.target) end.sort_by(&:name) end def has_commits? !empty? end def empty? rugged.empty? end def repo_exists? !!rugged end # Discovers the default branch based on the repository's available branches # # - If no branches are present, returns nil # - If one branch is present, returns its name # - If two or more branches are present, returns current HEAD or master or first branch def discover_default_branch if branch_names.length == 0 nil elsif branch_names.length == 1 branch_names.first elsif rugged_head && branch_names.include?(Ref.extract_branch_name(rugged_head.name)) Ref.extract_branch_name(rugged_head.name) elsif branch_names.include?("master") "master" else branch_names.first end end def rugged_head rugged.head rescue Rugged::ReferenceError nil end # Archive Project to .tar.gz # # Already packed repo archives stored at # app_root/tmp/repositories/project_name/project_name-commit-id.tag.gz # def archive_repo(ref, storage_path, format = "tar.gz") ref ||= root_ref commit = Gitlab::Git::Commit.find(self, ref) return nil unless commit extension = nil git_archive_format = nil pipe_cmd = nil case format when "tar.bz2", "tbz", "tbz2", "tb2", "bz2" extension = ".tar.bz2" pipe_cmd = %W(bzip2) when "tar" extension = ".tar" pipe_cmd = %W(cat) when "zip" extension = ".zip" git_archive_format = "zip" pipe_cmd = %W(cat) else # everything else should fall back to tar.gz extension = ".tar.gz" git_archive_format = nil pipe_cmd = %W(gzip -n) end # Build file path file_name = self.name.gsub("\.git", "") + "-" + commit.id.to_s + extension file_path = File.join(storage_path, self.name, file_name) # Put files into a directory before archiving prefix = File.basename(self.name) + "/" # Create file if not exists unless File.exists?(file_path) FileUtils.mkdir_p File.dirname(file_path) # Create the archive in temp file, to avoid leaving a corrupt archive # to be downloaded by the next user if we get interrupted while # creating the archive. Note that we do not care about cleaning up # the temp file in that scenario, because GitLab cleans up the # directory holding the archive files periodically. temp_file_path = file_path + ".#{Process.pid}-#{Time.now.to_i}" archive_to_file(ref, prefix, temp_file_path, git_archive_format, pipe_cmd) # move temp file to persisted location FileUtils.move(temp_file_path, file_path) end file_path end # Return repo size in megabytes def size size = popen(%W(du -s), path).first.strip.to_i (size.to_f / 1024).round(2) end # Returns an array of BlobSnippets for files at the specified +ref+ that # contain the +query+ string. def search_files(query, ref = nil) greps = [] ref ||= root_ref populated_index(ref).each do |entry| # Discard submodules next if submodule?(entry) content = rugged.lookup(entry[:oid]).content greps += build_greps(content, query, ref, entry[:path]) end greps end # Use the Rugged Walker API to build an array of commits. # # Usage. # repo.log( # ref: 'master', # path: 'app/models', # limit: 10, # offset: 5, # ) # def log(options) default_options = { limit: 10, offset: 0, path: nil, ref: root_ref, follow: false, skip_merges: false } options = default_options.merge(options) options[:limit] ||= 0 options[:offset] ||= 0 actual_ref = options[:ref] || root_ref sha = sha_from_ref(actual_ref) build_log(sha, options) rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError # Return an empty array if the ref wasn't found [] end def sha_from_ref(ref) sha = rugged.rev_parse_oid(ref) object = rugged.lookup(sha) if object.kind_of?(Rugged::Commit) sha elsif object.respond_to?(:target) sha_from_ref(object.target.oid) end end # Return a collection of Rugged::Commits between the two SHA arguments. # def commits_between(from, to) walker = Rugged::Walker.new(rugged) walker.push(to) walker.hide(from) commits = walker.to_a walker.reset commits.reverse end # Returns the SHA of the most recent common ancestor of +from+ and +to+ def merge_base_commit(from, to) rugged.merge_base(from, to) end # Return an array of Diff objects that represent the diff # between +from+ and +to+. def diff(from, to, *paths) rugged.diff(from, to, paths: paths).patches.map do |p| Gitlab::Git::Diff.new(p) end end # Return the diff between +from+ and +to+ in a single patch string. def diff_text(from, to, *paths) # NOTE: It would be simpler to use the Rugged::Diff#patch method, but # that formats the diff text differently than Rugged::Patch#to_s for # changes to binary files. rugged.diff(from, to, paths: paths).patches.map do |p| p.to_s end.join("\n") end # Returns commits collection # # Ex. # repo.find_commits( # ref: 'master', # max_count: 10, # skip: 5, # order: :date # ) # # +options+ is a Hash of optional arguments to git # :ref is the ref from which to begin (SHA1 or name) # :contains is the commit contained by the refs from which to begin (SHA1 or name) # :max_count is the maximum number of commits to fetch # :skip is the number of commits to skip # :order is the commits order and allowed value is :date(default) or :topo # def find_commits(options = {}) actual_options = options.dup allowed_options = [:ref, :max_count, :skip, :contains, :order] actual_options.keep_if do |key| allowed_options.include?(key) end default_options = { skip: 0 } actual_options = default_options.merge(actual_options) walker = Rugged::Walker.new(rugged) if actual_options[:ref] walker.push(rugged.rev_parse_oid(actual_options[:ref])) elsif actual_options[:contains] branches_contains(actual_options[:contains]).each do |branch| walker.push(branch.target_id) end else rugged.references.each("refs/heads/*") do |ref| walker.push(ref.target_id) end end if actual_options[:order] == :topo walker.sorting(Rugged::SORT_TOPO) else walker.sorting(Rugged::SORT_DATE) end commits = [] offset = actual_options[:skip] limit = actual_options[:max_count] walker.each(offset: offset, limit: limit) do |commit| gitlab_commit = Gitlab::Git::Commit.decorate(commit) commits.push(gitlab_commit) end walker.reset commits rescue Rugged::OdbError [] end # Returns branch names collection that contains the special commit(SHA1 # or name) # # Ex. # repo.branch_names_contains('master') # def branch_names_contains(commit) branches_contains(commit).map { |c| c.name } end # Returns branch collection that contains the special commit(SHA1 or name) # # Ex. # repo.branch_names_contains('master') # def branches_contains(commit) commit_obj = rugged.rev_parse(commit) parent = commit_obj.parents.first unless commit_obj.parents.empty? walker = Rugged::Walker.new(rugged) rugged.branches.select do |branch| walker.push(branch.target_id) walker.hide(parent) if parent result = walker.any? { |c| c.oid == commit_obj.oid } walker.reset result end end # Get refs hash which key is SHA1 # and value is a Rugged::Reference def refs_hash # Initialize only when first call if @refs_hash.nil? @refs_hash = Hash.new { |h, k| h[k] = [] } rugged.references.each do |r| sha = rev_parse_target(r.target.oid).oid @refs_hash[sha] << r end end @refs_hash end # Lookup for rugged object by oid or ref name def lookup(oid_or_ref_name) rugged.rev_parse(oid_or_ref_name) end # Return hash with submodules info for this repository # # Ex. # { # "rack" => { # "id" => "c67be4624545b4263184c4a0e8f887efd0a66320", # "path" => "rack", # "url" => "git://github.com/chneukirchen/rack.git" # }, # "encoding" => { # "id" => .... # } # } # def submodules(ref) commit = rugged.rev_parse(ref) content = blob_content(commit, ".gitmodules") parse_gitmodules(commit, content) end # Return total commits count accessible from passed ref def commit_count(ref) walker = Rugged::Walker.new(rugged) walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE) walker.push(ref) walker.count end # Sets HEAD to the commit specified by +ref+; +ref+ can be a branch or # tag name or a commit SHA. Valid +reset_type+ values are: # # [:soft] # the head will be moved to the commit. # [:mixed] # will trigger a +:soft+ reset, plus the index will be replaced # with the content of the commit tree. # [:hard] # will trigger a +:mixed+ reset and the working directory will be # replaced with the content of the index. (Untracked and ignored files # will be left alone) def reset(ref, reset_type) rugged.reset(ref, reset_type) end # Mimic the `git clean` command and recursively delete untracked files. # Valid keys that can be passed in the +options+ hash are: # # :d - Remove untracked directories # :f - Remove untracked directories that are managed by a different # repository # :x - Remove ignored files # # The value in +options+ must evaluate to true for an option to take # effect. # # Examples: # # repo.clean(d: true, f: true) # Enable the -d and -f options # # repo.clean(d: false, x: true) # -x is enabled, -d is not def clean(options = {}) strategies = [:remove_untracked] strategies.push(:force) if options[:f] strategies.push(:remove_ignored) if options[:x] # TODO: implement this method end # Check out the specified ref. Valid options are: # # :b - Create a new branch at +start_point+ and set HEAD to the new # branch. # # * These options are passed to the Rugged::Repository#checkout method: # # :progress :: # A callback that will be executed for checkout progress notifications. # Up to 3 parameters are passed on each execution: # # - The path to the last updated file (or +nil+ on the very first # invocation). # - The number of completed checkout steps. # - The number of total checkout steps to be performed. # # :notify :: # A callback that will be executed for each checkout notification # types specified with +:notify_flags+. Up to 5 parameters are passed # on each execution: # # - An array containing the +:notify_flags+ that caused the callback # execution. # - The path of the current file. # - A hash describing the baseline blob (or +nil+ if it does not # exist). # - A hash describing the target blob (or +nil+ if it does not exist). # - A hash describing the workdir blob (or +nil+ if it does not # exist). # # :strategy :: # A single symbol or an array of symbols representing the strategies # to use when performing the checkout. Possible values are: # # :none :: # Perform a dry run (default). # # :safe :: # Allow safe updates that cannot overwrite uncommitted data. # # :safe_create :: # Allow safe updates plus creation of missing files. # # :force :: # Allow all updates to force working directory to look like index. # # :allow_conflicts :: # Allow checkout to make safe updates even if conflicts are found. # # :remove_untracked :: # Remove untracked files not in index (that are not ignored). # # :remove_ignored :: # Remove ignored files not in index. # # :update_only :: # Only update existing files, don't create new ones. # # :dont_update_index :: # Normally checkout updates index entries as it goes; this stops # that. # # :no_refresh :: # Don't refresh index/config/etc before doing checkout. # # :disable_pathspec_match :: # Treat pathspec as simple list of exact match file paths. # # :skip_locked_directories :: # Ignore directories in use, they will be left empty. # # :skip_unmerged :: # Allow checkout to skip unmerged files (NOT IMPLEMENTED). # # :use_ours :: # For unmerged files, checkout stage 2 from index (NOT IMPLEMENTED). # # :use_theirs :: # For unmerged files, checkout stage 3 from index (NOT IMPLEMENTED). # # :update_submodules :: # Recursively checkout submodules with same options (NOT # IMPLEMENTED). # # :update_submodules_if_changed :: # Recursively checkout submodules if HEAD moved in super repo (NOT # IMPLEMENTED). # # :disable_filters :: # If +true+, filters like CRLF line conversion will be disabled. # # :dir_mode :: # Mode for newly created directories. Default: +0755+. # # :file_mode :: # Mode for newly created files. Default: +0755+ or +0644+. # # :file_open_flags :: # Mode for opening files. Default: # IO::CREAT | IO::TRUNC | IO::WRONLY. # # :notify_flags :: # A single symbol or an array of symbols representing the cases in # which the +:notify+ callback should be invoked. Possible values are: # # :none :: # Do not invoke the +:notify+ callback (default). # # :conflict :: # Invoke the callback for conflicting paths. # # :dirty :: # Invoke the callback for "dirty" files, i.e. those that do not need # an update but no longer match the baseline. # # :updated :: # Invoke the callback for any file that was changed. # # :untracked :: # Invoke the callback for untracked files. # # :ignored :: # Invoke the callback for ignored files. # # :all :: # Invoke the callback for all these cases. # # :paths :: # A glob string or an array of glob strings specifying which paths # should be taken into account for the checkout operation. +nil+ will # match all files. Default: +nil+. # # :baseline :: # A Rugged::Tree that represents the current, expected contents of the # workdir. Default: +HEAD+. # # :target_directory :: # A path to an alternative workdir directory in which the checkout # should be performed. def checkout(ref, options = {}, start_point = "HEAD") if options[:b] rugged.branches.create(ref, start_point) options.delete(:b) end default_options = { strategy: :safe_create } rugged.checkout(ref, default_options.merge(options)) end # Delete the specified branch from the repository def delete_branch(branch_name) rugged.branches.delete(branch_name) end # Return an array of this repository's remote names def remote_names rugged.remotes.each_name.to_a end # Delete the specified remote from this repository. def remote_delete(remote_name) rugged.remotes.delete(remote_name) end # Add a new remote to this repository. Returns a Rugged::Remote object def remote_add(remote_name, url) rugged.remotes.create(remote_name, url) end # Update the specified remote using the values in the +options+ hash # # Example # repo.update_remote("origin", url: "path/to/repo") def remote_update(remote_name, options = {}) # TODO: Implement other remote options remote = rugged.remotes[remote_name] remote.url = options[:url] if options[:url] remote.save end # Fetch the specified remote def fetch(remote_name) rugged.remotes[remote_name].fetch end # Push +*refspecs+ to the remote identified by +remote_name+. def push(remote_name, *refspecs) rugged.remotes[remote_name].push(refspecs) end # Return a String containing the mbox-formatted diff between +from+ and # +to+ def format_patch(from, to) rugged.diff(from, to).patch from_sha = rugged.rev_parse_oid(from) to_sha = rugged.rev_parse_oid(to) commits_between(from_sha, to_sha).map do |commit| commit.to_mbox end.join("\n") end # Merge the +source_name+ branch into the +target_name+ branch. This is # equivalent to `git merge --no_ff +source_name+`, since a merge commit # is always created. def merge(source_name, target_name, options = {}) our_commit = rugged.branches[target_name].target their_commit = rugged.branches[source_name].target raise "Invalid merge target" if our_commit.nil? raise "Invalid merge source" if their_commit.nil? merge_index = rugged.merge_commits(our_commit, their_commit) return false if merge_index.conflicts? actual_options = options.merge( parents: [our_commit, their_commit], tree: merge_index.write_tree(rugged), update_ref: "refs/heads/#{target_name}" ) Rugged::Commit.create(rugged, actual_options) end def commits_since(from_date) walker = Rugged::Walker.new(rugged) walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE) rugged.references.each("refs/heads/*") do |ref| walker.push(ref.target_id) end commits = [] walker.each do |commit| break if commit.author[:time].to_date < from_date commits.push(commit) end commits end private # Return the object that +revspec+ points to. If +revspec+ is an # annotated tag, then return the tag's target instead. def rev_parse_target(revspec) obj = rugged.rev_parse(revspec) if obj.is_a?(Rugged::Tag::Annotation) obj.target else obj end end # Get the content of a blob for a given commit. If the blob is a commit # (for submodules) then return the blob's OID. def blob_content(commit, blob_name) blob_entry = tree_entry(commit, blob_name) if blob_entry[:type] == :commit blob_entry[:oid] else rugged.lookup(blob_entry[:oid]).content end end # Parses the contents of a .gitmodules file and returns a hash of # submodule information. def parse_gitmodules(commit, content) results = {} current = "" content.split("\n").each do |txt| if txt.match(/^\[/) current = txt.match(/(?<=").*(?=")/)[0] results[current] = {} else match_data = txt.match(/(\w+) = (.*)/) results[current][match_data[1]] = match_data[2] if match_data[1] == "path" results[current]["id"] = blob_content(commit, match_data[2]) end end end results end # Return an array of log commits, given an SHA hash and a hash of # options. def build_log(sha, options) # Instantiate a Walker and add the SHA hash walker = Rugged::Walker.new(rugged) walker.push(sha) commits = [] skipped = 0 current_path = options[:path] current_path = nil if current_path == '' limit = options[:limit].to_i offset = options[:offset].to_i skip_merges = options[:skip_merges] walker.sorting(Rugged::SORT_DATE) walker.each do |c| break if limit > 0 && commits.length >= limit if skip_merges # Skip merge commits next if c.parents.length > 1 end if !current_path || commit_touches_path?(c, current_path, options[:follow]) # This is a commit we care about, unless we haven't skipped enough # yet skipped += 1 commits.push(c) if skipped > offset end end walker.reset commits end # Returns true if the given commit affects the given path. If the # +follow+ option is true and the file specified by +path+ was renamed, # then the path value is set to the old path. def commit_touches_path?(commit, path, follow) if follow touches_path_diff?(commit, path) else touches_path_tree?(commit, path) end end # Returns true if +commit+ introduced changes to +path+, using commit # trees to make that determination. def touches_path_tree?(commit, path) parent = commit.parents[0] entry = tree_entry(commit, path) if parent.nil? # This is the root commit, return true if it has +path+ in its tree return entry != nil end parent_entry = tree_entry(parent, path) if entry.nil? && parent_entry.nil? false elsif entry.nil? || parent_entry.nil? true else entry[:oid] != parent_entry[:oid] end end # Find the entry for +path+ in the tree for +commit+ def tree_entry(commit, path) pathname = Pathname.new(path) tmp_entry = nil pathname.each_filename do |dir| if tmp_entry.nil? tmp_entry = commit.tree[dir] else tmp_entry = rugged.lookup(tmp_entry[:oid])[dir] end end tmp_entry end # Returns true if +commit+ introduced changes to +path+, using # Rugged::Diff objects to make that determination. This is slower than # comparing commit trees, but lets us use Rugged::Diff#find_similar to # detect file renames. def touches_path_diff?(commit, path) diff = commit.diff(reverse: true, paths: [path], disable_pathspec_match: true) return false if diff.deltas.empty? # If +path+ is a filename, not a directory, then we should only have # one delta. We don't need to follow renames for directories. return true if diff.deltas.length > 1 # Detect renames delta = diff.deltas.first if delta.added? full_diff = commit.diff(reverse: true) full_diff.find_similar! full_diff.each_delta do |full_delta| if full_delta.renamed? && path == full_delta.new_file[:path] # Look for the old path in ancestors path.replace(full_delta.old_file[:path]) end end end true end def archive_to_file(treeish = 'master', prefix = nil, filename = 'archive.tar.gz', format = nil, compress_cmd = %W(gzip)) git_archive_cmd = %W(git --git-dir=#{path} archive) git_archive_cmd << "--prefix=#{prefix}" if prefix git_archive_cmd << "--format=#{format}" if format git_archive_cmd += %W(-- #{treeish}) open(filename, 'w') do |file| # Create a pipe to act as the '|' in 'git archive ... | gzip' pipe_rd, pipe_wr = IO.pipe # Get the compression process ready to accept data from the read end # of the pipe compress_pid = spawn(*compress_cmd, :in => pipe_rd, :out => file) # The read end belongs to the compression process now; we should # close our file descriptor for it. pipe_rd.close # Start 'git archive' and tell it to write into the write end of the # pipe. git_archive_pid = spawn(*git_archive_cmd, :out => pipe_wr) # The write end belongs to 'git archive' now; close it. pipe_wr.close # When 'git archive' and the compression process are finished, we are # done. Process.waitpid(git_archive_pid) raise "#{git_archive_cmd.join(' ')} failed" unless $?.success? Process.waitpid(compress_pid) raise "#{compress_cmd.join(' ')} failed" unless $?.success? end end # Returns true if the index entry has the special file mode that denotes # a submodule. def submodule?(index_entry) index_entry[:mode] == 57344 end # Return a Rugged::Index that has read from the tree at +ref_name+ def populated_index(ref_name) tree = rugged.lookup(rugged.rev_parse_oid(ref_name)).tree index = rugged.index index.read_tree(tree) index end # Return an array of BlobSnippets for lines in +file_contents+ that match # +query+ def build_greps(file_contents, query, ref, filename) greps = [] file_contents.split("\n").each_with_index do |line, i| next unless line.match(/#{Regexp.escape(query)}/i) greps << Gitlab::Git::BlobSnippet.new( ref, file_contents.split("\n")[i - 3..i + 3], i - 2, filename ) end greps end end end end