# -*- coding: utf-8; mode: ruby; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- vim:fenc=utf-8:filetype=ruby:et:sw=2:ts=2:sts=2 require 'diff/lcs' require 'digest/sha1' require 'time' require 'git_commit_notifier/escape_helper' module GitCommitNotifier # Translates Git diff to HTML format class DiffToHtml include EscapeHelper # Integration map for commit message keywords to third-party links. INTEGRATION_MAP = { :mediawiki => { :search_for => /\[\[([^\[\]]+)\]\]/, :replace_with => '#{url}/\1' }, :redmine => { :search_for => lambda do |config| keywords = (config['redmine'] && config['redmine']['keywords']) || ["refs", "fixes"] /\b(?:#{keywords.join('\b|')})([\s&,]+\#\d+)+/i end, :replace_with => lambda do |m, url, config| # we can provide Proc that gets matched string and configuration url. # result should be in form of: # { :phrase => 'phrase started with', :links => [ { :title => 'title of url', :url => 'target url' }, ... ] } keywords = (config['redmine'] && config['redmine']['keywords']) || ["refs", "fixes"] match = m.match(/^(#{keywords.join('\b|')})(.*)$/i) return m unless match r = { :phrase => match[1] } captures = match[2].split(/[\s\&\,]+/).map { |m| (m =~ /(\d+)/) ? $1 : m }.reject { |c| c.empty? } r[:links] = captures.map { |mn| { :title => "##{mn}", :url => "#{url}/issues/show/#{mn}" } } r end }, :bugzilla => { :search_for => /\bBUG\s*(\d+)/i, :replace_with => '#{url}/show_bug.cgi?id=\1' }, :fogbugz => { :search_for => /\bbugzid:\s*(\d+)/i, :replace_with => '#{url}\1' } }.freeze # Maximum email line length in characters. MAX_LINE_LENGTH = 512 # Number of seconds per day. SECS_PER_DAY = 24 * 60 * 60 attr_accessor :file_prefix, :current_file_name attr_reader :result, :oldrev, :newrev, :rev, :ref_name, :config def initialize(config = nil) @config = config || {} @lines_added = 0 @file_added = false @file_removed = false @file_changes = [] @binary = false unless String.method_defined?(:encode!) require 'iconv' @ic = Iconv.new('UTF-8', 'UTF-8//IGNORE') end end def range_info(range) matches = range.match(/^@@ \-(\S+) \+(\S+)/) matches[1..2].map { |m| m.split(',')[0].to_i } end # Gets HTML class for specified diff line data. # @param [Hash] line Diff line data def line_class(line) case line[:op] when :removal; ' class="r"' when :addition; ' class="a"' else '' end end def add_block_to_results(block, escape) return if block.empty? block.each do |line| add_line_to_result(line, escape) end end # Gets lines_per_diff setting from {#config}. # @return [Fixnum, NilClass] Lines per diff limit. def lines_per_diff config['lines_per_diff'] end # Gets ignore_merge setting from {#config}. def ignore_merge? config['ignore_merge'] end # Gets show_summary setting from {#config}. def show_summary? config['show_summary'] end # Gets ignore_whitespace setting from {#config}. # @return [String] How whitespaces should be treated in diffs (none, all, change) def ignore_whitespace return 'all' if config['ignore_whitespace'].nil? return 'none' if !config['ignore_whitespace'] (['all', 'change', 'none'].include?(config['ignore_whitespace']) ? config['ignore_whitespace'] : 'all') end # Adds separator between diff blocks to @diff_result. # @return [NilClass] nil def add_separator @diff_result << '…' nil end # Adds notification to @diff_result about skipping of diff tail due to its large size. # @return [NilClass] nil def add_skip_notification @diff_result << 'Diff too large and stripped…' nil end def add_line_to_result(line, escape) klass = line_class(line) content = (escape == :escape) ? escape_content(line[:content]) : line[:content] padding = ' ' if klass != '' @diff_result << "\n#{line[:removed]}\n#{line[:added]}\n#{padding}#{content}" end def extract_block_content(block) block.collect { |b| b[:content] }.join("\n") end def lcs_diff(removals, additions) # arrays always have at least 1 element callback = DiffCallback.new s1 = extract_block_content(removals) s2 = extract_block_content(additions) s1 = tokenize_string(s1) s2 = tokenize_string(s2) Diff::LCS.traverse_balanced(s1, s2, callback) processor = ResultProcessor.new(callback.tags) diff_for_removals, diff_for_additions = processor.results result = [] ln_start = removals[0][:removed] diff_for_removals.each_with_index do |line, i| result << { :removed => ln_start + i, :added => nil, :op => :removal, :content => line} end ln_start = additions[0][:added] diff_for_additions.each_with_index do |line, i| result << { :removed => nil, :added => ln_start + i, :op => :addition, :content => line} end result end # Gets array of tokens from specified str. # @param [String] str Text to be splitted into tokens. # @return [Array(String)] Array of tokens. def tokenize_string(str) # tokenize by non-word characters tokens = [] token = '' str.scan(/./mu) do |ch| if ch =~ /[^\W_]/u token += ch else unless token.empty? tokens << token token = '' end tokens << ch end end tokens << token unless token.empty? tokens end def operation_description binary = @binary ? 'binary ' : '' op = if @file_removed "Deleted" elsif @file_added "Added" else "Changed" end file_name = @current_file_name text = "#{op} #{binary}file #{file_name}" # TODO: these filenames, etc, should likely be properly html escaped file_link = file_name if config['link_files'] file_link = if config["link_files"] == "gitweb" && config["gitweb"] "#{file_name}" elsif config["link_files"] == "gitorious" && config["gitorious"] "#{file_name}" elsif config["link_files"] == "trac" && config["trac"] "#{file_name}" elsif config["link_files"] == "cgit" && config["cgit"] "#{file_name}" elsif config["link_files"] == "gitlabhq" && config["gitlabhq"] if config["gitlabhq"]["version"] && config["gitlabhq"]["version"] < 1.2 "#{file_name}" elsif config["gitlabhq"]["version"] && config["gitlabhq"]["version"] >= 4.0 "#{file_name}" else "#{file_name}" end elsif config["link_files"] == "redmine" && config["redmine"] "#{file_name}" else file_name end end if show_summary? @file_changes << { :file_name => file_name, :text => text, } end "

#{op} #{binary}file #{file_link}

\n" end # Determines are two lines are sequentially placed in diff (no skipped lines between). # @return [Boolean] true if lines are sequential; otherwise false. def lines_are_sequential?(first, second) result = false [:added, :removed].each do |side| if !first[side].nil? && !second[side].nil? result = true if first[side] == (second[side] - 1) end end result end def add_changes_to_result return if @current_file_name.nil? @lines_added = 0 @diff_result << operation_description if !@diff_lines.empty? && !@too_many_files @diff_result << '' removals = [] additions = [] lines = if lines_per_diff.nil? line_budget = nil @diff_lines else line_budget = lines_per_diff - @lines_added @diff_lines.slice(0, line_budget) end lines.each_with_index do |line, index| removals << line if line[:op] == :removal additions << line if line[:op] == :addition if line[:op] == :unchanged || index == lines.size - 1 # unchanged line or end of block, add prev lines to result if removals.size > 0 && additions.size > 0 # block of removed and added lines - perform intelligent diff add_block_to_results(lcs_diff(removals, additions), :dont_escape) else # some lines removed or added - no need to perform intelligent diff add_block_to_results(removals + additions, :escape) end removals = [] additions = [] if index > 0 && index != lines.size - 1 prev_line = lines[index - 1] add_separator unless lines_are_sequential?(prev_line, line) end add_line_to_result(line, :escape) if line[:op] == :unchanged end @lines_added += 1 end add_skip_notification if !line_budget.nil? && line_budget < @diff_lines.size @diff_result << '
' @diff_lines = [] end # reset values @right_ln = nil @left_ln = nil @file_added = false @file_removed = false @binary = false end RE_DIFF_FILE_NAME = /^diff\s\-\-git\sa\/(.*)\sb\// RE_DIFF_SHA = /^index [0-9a-fA-F]+\.\.([0-9a-fA-F]+)/ def diff_for_revision(content) @left_ln = @right_ln = nil @diff_result = [] @diff_lines = [] @removed_files = [] @current_file_name = nil @current_sha = nil @too_many_files = false lines = content.split("\n") if config['too_many_files'] && config['too_many_files'].to_i > 0 file_count = lines.inject(0) do |count, line| (line =~ RE_DIFF_FILE_NAME) ? (count + 1) : count end if file_count >= config['too_many_files'].to_i @too_many_files = true end end lines.each do |line| case line when RE_DIFF_FILE_NAME then file_name = $1 add_changes_to_result @current_file_name = file_name when RE_DIFF_SHA then @current_sha = $1 else op = line[0, 1] if @left_ln.nil? || op == '@' process_info_line(line, op) else process_code_line(line, op) end end end add_changes_to_result @diff_result.join("\n") end def process_code_line(line, op) if op == '-' @diff_lines << { :removed => @left_ln, :added => nil, :op => :removal, :content => line[1..-1] } @left_ln += 1 elsif op == '+' @diff_lines << { :added => @right_ln, :removed => nil, :op => :addition, :content => line[1..-1] } @right_ln += 1 else @diff_lines << { :added => @right_ln, :removed => @left_ln, :op => :unchanged, :content => line } @right_ln += 1 @left_ln += 1 end end def process_info_line(line, op) if line =~/^deleted\sfile\s/ @file_removed = true elsif line =~ /^\-\-\-\s/ && line =~ /\/dev\/null/ @file_added = true elsif line =~ /^\+\+\+\s/ && line =~ /\/dev\/null/ @file_removed = true elsif line =~ /^Binary files \/dev\/null/ # Binary files /dev/null and ... differ (addition) @binary = true @file_added = true elsif line =~ /\/dev\/null differ/ # Binary files ... and /dev/null differ (removal) @binary = true @file_removed = true elsif op == '@' @left_ln, @right_ln = range_info(line) end end def extract_diff_from_git_show_output(content) diff = [] diff_found = false content.split("\n").each do |line| diff_found = true if line =~ /^diff\s\-\-git/ next unless diff_found diff << line end diff.join("\n") end def extract_commit_info_from_git_show_output(content) result = { :message => [], :commit => '', :author => '', :date => '', :email => '', :committer => '', :commit_date => '', :committer_email => '' } message = [] content.split("\n").each do |line| break if line =~ /^diff/ # end of commit info case line when /^commit / result[:commit] = line[7..-1] when /^Author:/ result[:author], result[:email] = author_name_and_email(line[12..-1]) when /^AuthorDate:/ result[:date] = line[12..-1] when /^Commit:/ result[:committer], result[:commit_email] = author_name_and_email(line[12..-1]) when /^CommitDate:/ result[:commit_date] = line[12..-1] when /^Merge:/ result[:merge] = line[7..-1] else message << line.strip end end # Strip blank lines off top and bottom of message while !message.empty? && message.first.empty? message.shift end while !message.empty? && message.last.empty? message.pop end result[:message] = message result end def message_array_as_html(message) message_map(message.collect { |m| CGI.escapeHTML(m) }.join('
')) end def author_name_and_email(info) # input string format: "autor name " return [$1, $2] if info =~ /^([^\<]+)\s+\<\s*(.*)\s*\>\s*$/ # normal operation # incomplete author info - return it as author name [info, ''] end def first_sentence(message_array) msg = message_array.first.to_s.strip return message_array.first if msg.empty? || msg =~ /^Merge\:/ msg end def unique_commits_per_branch? ! ! config['unique_commits_per_branch'] end def branch_name ref_name.split('/').last end def old_commit?(commit_info) return false if ! config.include?('skip_commits_older_than') || (config['skip_commits_older_than'].to_i <= 0) commit_when = Time.parse(commit_info[:date]) (Time.now - commit_when) > (SECS_PER_DAY * config['skip_commits_older_than'].to_i) end def merge_commit?(commit_info) ! commit_info[:merge].nil? end def truncate_long_lines(text) str = "" # Match encoding of output string to that of input string str.force_encoding(text.encoding) if str.respond_to?(:force_encoding) StringIO.open(str, "w") do |output| input = StringIO.new(text, "r") input.each_line "\n" do |line| if line.length > MAX_LINE_LENGTH && MAX_LINE_LENGTH >= 9 # Truncate the line line.slice!(MAX_LINE_LENGTH-3..-1) # Ruby < 1.9 doesn't know how to slice between # characters, so deal specially with that case # so that we don't truncate in the middle of a UTF8 sequence, # which would be invalid. unless line.respond_to?(:force_encoding) # If the last remaining character is part of a UTF8 multibyte character, # keep truncating until we go past the start of a UTF8 character. # This assumes that this is a UTF8 string, which may be a false assumption # unless somebody has taken care to check the encoding of the source file. # We truncate at most 6 additional bytes, which is the length of the longest # UTF8 sequence 6.times do c = line[-1, 1].to_i break if (c & 0x80) == 0 # Last character is plain ASCII: don't truncate line.slice!(-1, 1) # Truncate character break if (c & 0xc0) == 0xc0 # Last character was the start of a UTF8 sequence, so we can stop now end end # Append three dots to the end of line to indicate it's been truncated # (avoiding ellipsis character so as not to introduce more encoding issues) line << "...\n" end output << line end output.string end end # Commit to link mapping. COMMIT_LINK_MAP = { :gitweb => lambda { |config, commit| "#{commit}" }, :gitorious => lambda { |config, commit| "#{commit}" }, :trac => lambda { |config, commit| "#{commit}" }, :cgit => lambda { |config, commit| "#{commit}" }, :gitlabhq => lambda { |config, commit| "#{commit}" }, :redmine => lambda { |config, commit| "#{commit}" }, :default => lambda { |config, commit| commit.to_s } }.freeze # Gets HTML markup for specified commit. # @param [String] commit Unique identifier of commit. # @return [String] HTML markup for specified commit. # @see COMMIT_LINK_MAP def markup_commit_for_html(commit) mode = (config["link_files"] || "default").to_sym mode = :default unless config.has_key?(mode.to_s) mode = :default unless COMMIT_LINK_MAP.has_key?(mode) COMMIT_LINK_MAP[mode].call(config, commit) end def diff_for_commit(commit) @current_commit = commit raw_diff = truncate_long_lines(Git.show(commit, :ignore_whitespace => ignore_whitespace)) raise "git show output is empty" if raw_diff.empty? if raw_diff.respond_to?(:encode!) unless raw_diff.valid_encoding? raw_diff.encode!("UTF-16", "UTF-8", :invalid => :replace, :undef => :replace) raw_diff.encode!("UTF-8", "UTF-16") end else raw_diff = @ic.iconv(raw_diff) end commit_info = extract_commit_info_from_git_show_output(raw_diff) return nil if old_commit?(commit_info) changed_files = "" if merge_commit?(commit_info) changed_file_list = [] merge_revisions = commit_info[:merge].split merge_revisions.map!{|rev| rev.chomp("...")} merge_first_parent = merge_revisions.slice!(0) merge_revisions.each do |merge_other_parent| changed_file_list += Git.changed_files(merge_first_parent, merge_other_parent) end changed_files = "Changed files:\n\n#{changed_file_list.uniq.join()}\n" end title = "
" title += "
Commit
#{markup_commit_for_html(commit_info[:commit])}
\n" title += "
Branch
#{CGI.escapeHTML(branch_name)}
\n" if branch_name title += "
Author
#{CGI.escapeHTML(commit_info[:author])} <#{commit_info[:email]}>
\n" # Show separate committer name/email only if it differs from author if commit_info[:author] != commit_info[:committer] || commit_info[:email] != commit_info[:commit_email] title += "
Committer
#{CGI.escapeHTML(commit_info[:committer])} <#{commit_info[:commit_email]}>
\n" end title += "
Date
#{CGI.escapeHTML commit_info[:date]}
\n" multi_line_message = commit_info[:message].count > 1 title += "
Message
#{message_array_as_html(commit_info[:message])}
\n" title += "
" @file_changes = [] text = "" html_diff = diff_for_revision(extract_diff_from_git_show_output(raw_diff)) message_array = message_array_as_html(changed_files.split("\n")) if show_summary? title += "" text += "\n" end text += "#{raw_diff}" text += "#{changed_files}\n\n\n" html = title html += html_diff html += message_array html += "

" commit_info[:message] = first_sentence(commit_info[:message]) { :commit_info => commit_info, :html_content => html, :text_content => text } end def diff_for_lightweight_tag(tag, rev, change_type) if change_type == :delete message = "Remove Lightweight Tag #{tag}" html = "
" html += "
Tag
#{CGI.escapeHTML(tag)} (removed)
\n" html += "
Type
lightweight
\n" html += "
Commit
#{markup_commit_for_html(rev)}
\n" html += "
" text = "Remove Tag: #{tag}\n" text += "Type: lightweight\n" text += "Commit: #{rev}\n" else message = "#{change_type == :create ? "Add" : "Update"} Lightweight Tag #{tag}" html = "
" html += "
Tag
#{CGI.escapeHTML(tag)} (#{change_type == :create ? "added" : "updated"})
\n" html += "
Type
lightweight
\n" html += "
Commit
#{markup_commit_for_html(rev)}
\n" html += "
" text = "Tag: #{tag} (#{change_type == :create ? "added" : "updated"})\n" text += "Type: lightweight\n" text += "Commit: #{rev}\n" end commit_info = { :commit => rev, :message => message } @result << { :commit_info => commit_info, :html_content => html, :text_content => text } end def diff_for_annotated_tag(tag, rev, change_type) commit_info = { :commit => rev } if change_type == :delete message = "Remove Annotated Tag #{tag}" html = "
" html += "
Tag
#{CGI.escapeHTML(tag)} (removed)
\n" html += "
Type
annotated
\n" html += "
" text = message commit_info[:message] = message else tag_info = Git.tag_info(ref_name) message = tag_info[:subject] || "#{change_type == :create ? "Add" : "Update"} Annotated Tag #{tag}" html = "
" html += "
Tag
#{CGI.escapeHTML(tag)} (#{change_type == :create ? "added" : "updated"})
\n" html += "
Type
annotated
\n" html += "
Commit
#{markup_commit_for_html(tag_info[:tagobject])}
\n" html += "
Tagger
#{CGI.escapeHTML(tag_info[:taggername])} #{CGI.escapeHTML(tag_info[:taggeremail])}
\n" message_array = tag_info[:contents].split("\n") multi_line_message = message_array.count > 1 html += "
Message
#{message_array_as_html(message_array)}
\n" html += "
" text = "Tag: #{tag} (#{change_type == :create ? "added" : "updated"})\n" text += "Type: annotated\n" text += "Commit: #{tag_info[:tagobject]}\n" text += "Tagger: #{tag_info[:taggername]} #{tag_info[:taggeremail]}\n" text += "Message: #{tag_info[:contents]}\n" commit_info[:message] = message commit_info[:author], commit_info[:email] = author_name_and_email("#{tag_info[:taggername]} #{tag_info[:taggeremail]}") end @result << { :commit_info => commit_info, :html_content => html, :text_content => text } end def diff_for_branch(branch, rev, change_type) commits = case change_type when :delete puts "ignoring branch delete" [] when :create, :update # Note that "unique_commits_per_branch" really means "consider commits # on this branch without regard to whether they occur on other branches" # The flag unique_to_current_branch passed to new_commits means the # opposite: "consider only commits that are unique to this branch" Git.new_commits(oldrev, newrev, ref_name, !unique_commits_per_branch?) end # Add each diff to @result commits.each do |commit| commit_result = diff_for_commit(commit) next if commit_result.nil? @result << commit_result end end def clear_result @result = [] end def diff_between_revisions(rev1, rev2, repo, ref_name) clear_result # Cleanup revs @oldrev = Git.rev_parse(rev1) @newrev = Git.rev_parse(rev2) @ref_name = ref_name # Establish the type of change change_type = if @oldrev =~ /^0+$/ :create elsif @newrev =~ /^0+$/ :delete else :update end # Establish type of the revs @oldrev_type = Git.rev_type(@oldrev) @newrev_type = Git.rev_type(@newrev) if newrev =~ /^0+$/ @rev_type = @oldrev_type @rev = @oldrev else @rev_type = @newrev_type @rev = @newrev end # Determine what to do based on the ref_name and the rev_type case "#{@ref_name},#{@rev_type}" when %r!^refs/tags/(.+),commit$! # Change to an unannotated tag diff_for_lightweight_tag($1, @rev, change_type) when %r!^refs/tags/(.+),tag$! # Change to a annotated tag diff_for_annotated_tag($1, @rev, change_type) when %r!^refs/heads/(.+),commit$! # Change on a branch diff_for_branch($1, @rev, change_type) when %r!^refs/remotes/(.+),commit$! # Remote branch puts "Ignoring #{change_type} on remote branch #{$1}" else # Something we don't understand puts "Unknown change type #{ref_name},#{@rev_type}" end # Remove merge commits if required if ignore_merge? @result.reject! { |commit| merge_commit?(commit[:commit_info]) } end # If a block was given, pass it the results, in turn @result.each { |commit| yield @result.size, commit } if block_given? end def message_replace!(message, search_for, replace_with) if replace_with.kind_of?(Proc) message.gsub!(Regexp.new(search_for)) do |m| r = replace_with.call(m) r[:phrase] + ' ' + r[:links].map { |m| "#{m[:title]}" }.join(', ') end else full_replace_with = "\\0" message.gsub!(Regexp.new(search_for), full_replace_with) end end def do_message_integration(message) return message unless config['message_integration'].respond_to?(:each_pair) config['message_integration'].each_pair do |pm, url| pm_def = DiffToHtml::INTEGRATION_MAP[pm.to_sym] or next search_for = pm_def[:search_for] search_for = search_for.kind_of?(Proc) ? search_for.call(@config) : search_for replace_with = pm_def[:replace_with] replace_with = replace_with.kind_of?(Proc) ? lambda { |m| pm_def[:replace_with].call(m, url, @config) } : replace_with.gsub('#{url}', url) message_replace!(message, search_for, replace_with) end message end def do_message_map(message) return message unless config['message_map'].respond_to?(:each_pair) config['message_map'].each_pair do |search_for, replace_with| message_replace!(message, Regexp.new(search_for), replace_with) end message end def message_map(message) do_message_map(do_message_integration(message)) end end end