#!/usr/bin/env ruby -s require 'ostruct' source_files = if STDIN.tty? || $tty `git rev-parse --show-toplevel &> /dev/null` if $?.success? # we're inside a git repo so # get list of files from git `git ls-files -z #{ARGV.join(' ')}`.split("\0") else # we are not inside a git repo: # find all files in current dir `find #{ARGV.empty? ? Dir.pwd : ARGV.join(' ')} -print0`.split("\0") end else # assume we're running it in a pipeline # and read list of filenames from STDIN STDIN.read.split($/).map(&:chomp) end # exclude binary files from stats # (files with NUL in file header) # # much slower alternative: # # `egrep -q '\\x00' #{file}` ; $? == 0 # # note: git itself uses the first # 8,000 characters of a file, but # looking at the first 16 is fine # for our purposes... for now :-) # see buffer_is_binary() function # in the "git" source repository! source_files.delete_if { |file| ( File.extname(file) == '.pdf' || # skip bl**dy PDF documents File.basename(file) =~ /\A\..*\z/ || # skip hidden ".*" files !File.exist?(file) || # skip non-existent paths !File.file?(file) || # skip directories !File.size?(file) || # skip empty files !File.read(file, 16)["\0"].nil? # skip binary files ) && ( $verbose && warn("SKIPPING #{file}...") ; true ) } BLANKS = %r{\A\s*\Z}.freeze COMMENTS = { # FIXME does not work for multi-line comments # (for the languages that support them) '*.rb' => %r{\A\s*(#.*)\s*\Z}, '*.sh' => %r{\A\s*(#.*)\s*\Z}, '*.xml' => %r{\A\s*()\s*\Z}, '*.html' => %r{\A\s*()\s*\Z}, '*.css' => %r{\A\s*(/\*.*\*/)\s*\Z}, '*.js' => %r{\A\s*(//.*|/\*.*\*/)\s*\Z}, }.freeze source_stats = source_files.each_with_object({}) { |file, stats| file_ext = '*' + File.extname(file) # e.g. '*.rb' or '*' if no ext! stats_for_ext = begin stats[file_ext] ||= OpenStruct.new({ file_count: 0, line_count: 0, blank_count: 0, comment_count: 0, }) end file_content = File.read(file, :encoding => 'UTF-8') unless file_content.valid_encoding? file_content = File.read(file, :encoding => 'ISO-8859-1') # FIXME what about file encodings other than these two??? end source_lines = file_content.each_line stats_for_ext.file_count += 1 stats_for_ext.line_count += source_lines.count stats_for_ext.blank_count += source_lines.grep(BLANKS).count next unless COMMENTS[file_ext] # only scan for comments if a regex exists! stats_for_ext.comment_count += source_lines.grep(COMMENTS[file_ext]).count } source_stats.values.each do |stats_for_ext| stats_for_ext.code_count = stats_for_ext.line_count - ( stats_for_ext.blank_count + stats_for_ext.comment_count ) end sort_metric = case when $files then :file_count when $lines then :line_count when $blank then :blank_count when $comment then :comment_count when $code then :code_count else :code_count end source_stats = Hash[ source_stats.sort_by { |_, stats| stats.send(sort_metric) }.reverse ] source_stats["TOTAL"] = OpenStruct.new({ file_count: source_stats.values.map(&:file_count).reduce(:+) || 0, line_count: source_stats.values.map(&:line_count).reduce(:+) || 0, blank_count: source_stats.values.map(&:blank_count).reduce(:+) || 0, comment_count: source_stats.values.map(&:comment_count).reduce(:+) || 0, code_count: source_stats.values.map(&:code_count).reduce(:+) || 0, }) # # JSON formatting for non-TTY output # unless STDOUT.tty? || $tty require 'json' class OpenStruct def to_json(*args) self.to_h.to_json(args) end end puts source_stats.to_json exit end # # fancy formatting for TTY output # class String def commify gsub(/(\d)(?=(\d{3})+(\..*)?$)/,'\1,') end end class Numeric def commify to_s.commify end end source_stats.values.each do |stats_for_ext| stats_for_ext.file_count = stats_for_ext.file_count.commify stats_for_ext.line_count = stats_for_ext.line_count.commify stats_for_ext.blank_count = stats_for_ext.blank_count.commify stats_for_ext.comment_count = stats_for_ext.comment_count.commify stats_for_ext.code_count = stats_for_ext.code_count.commify end # widest_file_ext = source_stats.keys.map(&:length).max # widest_file_count = source_stats.values.map(&:file_count).map(&:length).max # widest_line_count = source_stats.values.map(&:line_count).map(&:length).max # widest_blank_count = source_stats.values.map(&:blank_count).map(&:length).max # widest_comment_count = source_stats.values.map(&:comment_count).map(&:length).max # widest_code_count = source_stats.values.map(&:code_count).map(&:length).max totals = source_stats.delete("TOTAL").to_h.values TEMPLATE = " %-13s %12s %12s %12s %12s %12s".freeze DIVIDER = ('-' * 80).freeze # `loc` uses 80 columns puts format("%s\n#{TEMPLATE}\n%s", DIVIDER, *%w(Language Files Lines Blank Comment Code), DIVIDER, ) source_stats.each do |file_ext, stats| puts format(TEMPLATE, file_ext, stats.file_count, stats.line_count, stats.blank_count, stats.comment_count, stats.code_count, ) end puts format("%s\n#{TEMPLATE}\n%s", DIVIDER, "Total", *totals, DIVIDER, ) # That's all Folks!