#!/usr/bin/env ruby -s # # rubocop:disable Layout/AlignHash # rubocop:disable Layout/ElseAlignment # rubocop:disable Layout/EndAlignment # rubocop:disable Layout/IndentationWidth # # rubocop:disable Style/EmptyCaseCondition # rubocop:disable Style/GlobalVars # rubocop:disable Style/RegexpLiteral # require 'English' require 'ostruct' source_files = if STDIN.tty? || $tty `git rev-parse --show-toplevel &> /dev/null` if $CHILD_STATUS.success? # we're inside a git repo so # get list of files from git `git ls-files -z #{ARGV.join(' ')}`.split("\0") else # we are not inside a git repo: # find all files in current dir `find #{ARGV.empty? ? Dir.pwd : ARGV.join(' ')} -print0`.split("\0") end else # assume we're running it in a pipeline # and read list of filenames from STDIN STDIN.read.split($RS).map(&:chomp) end # exclude binary files from stats # (files with NUL in file header) # # much slower alternative: # # `egrep -q '\\x00' #{file}` ; $? == 0 # # note: git itself uses the first # 8,000 characters of a file, but # looking at the first 16 is fine # for our purposes... for now :-) # see buffer_is_binary() function # in the "git" source repository! source_files.delete_if { |file| ( File.extname(file) == '.pdf' || # skip bl**dy PDF documents File.basename(file) =~ /\A\..*\z/ || # skip hidden ".*" files !File.exist?(file) || # skip non-existent paths !File.file?(file) || # skip directories !File.size?(file) || # skip empty files !File.read(file, 16)["\0"].nil? # skip binary files ) && ($verbose && warn("SKIPPING #{file}..."); true) } BLANKS = Hash.new(%r{\A\s*\Z}.freeze) # TODO: ext-specific regex for blanks? COMMENTS = { # FIXME: does not work for multi-line comments # (for the languages that support them) '*.rb' => %r{\A\s*(#.*)\s*\Z}, '*.sh' => %r{\A\s*(#.*)\s*\Z}, '*.xml' => %r{\A\s*()\s*\Z}, '*.html' => %r{\A\s*()\s*\Z}, '*.css' => %r{\A\s*(/\*.*\*/)\s*\Z}, '*.js' => %r{\A\s*(//.*|/\*.*\*/)\s*\Z}, }.freeze STATS_FOR_FILE = Hash.new do |stats_for_file, (file, blank_re, comment_re)| file_content = File.read(file, encoding: 'UTF-8') unless file_content.valid_encoding? file_content = File.read(file, encoding: 'ISO-8859-1') # FIXME: what about file encodings other than these two??? end lines = file_content.each_line stats_for_file[[file, blank_re, comment_re]] = OpenStruct.new( line_count: line_count = lines.count, blank_count: blank_count = lines.grep(blank_re).count, comment_count: comment_count = lines.grep(comment_re).count, code_count: (line_count - blank_count - comment_count), ) end STATS_FOR = Hash.new do |stats_for_ext, ext| stats_for_ext[ext] = OpenStruct.new( file_count: 0, line_count: 0, blank_count: 0, comment_count: 0, code_count: 0, ) end source_files.each do |file| ext = '*' + File.extname(file) # e.g. '*.rb' or '*' if no ext! blank_regex = BLANKS[ext] comment_regex = COMMENTS[ext] stats_for_file = STATS_FOR_FILE[[file, blank_regex, comment_regex]] stats_for_ext = STATS_FOR[ext] stats_for_ext.file_count += 1 stats_for_ext.line_count += stats_for_file.line_count stats_for_ext.blank_count += stats_for_file.blank_count stats_for_ext.comment_count += stats_for_file.comment_count stats_for_ext.code_count += stats_for_file.code_count end sort_metric = case when $files then :file_count when $lines then :line_count when $blank then :blank_count when $comment then :comment_count when $code then :code_count else :code_count end source_stats = Hash[ STATS_FOR.sort_by { |_, stats| stats.send(sort_metric) }.reverse ] source_stats['TOTAL'] = OpenStruct.new( file_count: source_stats.values.map(&:file_count).reduce(:+) || 0, line_count: source_stats.values.map(&:line_count).reduce(:+) || 0, blank_count: source_stats.values.map(&:blank_count).reduce(:+) || 0, comment_count: source_stats.values.map(&:comment_count).reduce(:+) || 0, code_count: source_stats.values.map(&:code_count).reduce(:+) || 0, ) # # JSON formatting for non-TTY output # unless STDOUT.tty? || $tty require 'json' class OpenStruct def to_json(*args) to_h.to_json(args) end end puts source_stats.to_json exit end # # fancy formatting for TTY output # class String def commify gsub(/(\d)(?=(\d{3})+(\..*)?$)/, '\1,') end end class Numeric def commify to_s.commify end end source_stats.values.each do |stats_for_ext| stats_for_ext.file_count = stats_for_ext.file_count.commify stats_for_ext.line_count = stats_for_ext.line_count.commify stats_for_ext.blank_count = stats_for_ext.blank_count.commify stats_for_ext.comment_count = stats_for_ext.comment_count.commify stats_for_ext.code_count = stats_for_ext.code_count.commify end # widest_file_ext = source_stats.keys.map(&:length).max # widest_file_count = source_stats.values.map(&:file_count).map(&:length).max # widest_line_count = source_stats.values.map(&:line_count).map(&:length).max # widest_blank_count = source_stats.values.map(&:blank_count).map(&:length).max # widest_comment_count = source_stats.values.map(&:comment_count).map(&:length).max # widest_code_count = source_stats.values.map(&:code_count).map(&:length).max DIVIDER = ('-' * 80) # because loc uses 80 columns TEMPLATE = ' %-13s %12s %12s %12s %12s %12s'.freeze puts format( "#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}", 'Language', 'Files', 'Lines', 'Blank', 'Comment', 'Code' ) source_stats.each do |file_ext, stats| puts format( TEMPLATE, file_ext, stats.file_count, stats.line_count, stats.blank_count, stats.comment_count, stats.code_count, ) end puts format( "#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}", 'Total', *source_stats.delete('TOTAL').to_h.values ) # # rubocop:enable Style/RegexpLiteral # rubocop:enable Style/GlobalVars # rubocop:enable Style/EmptyCaseCondition # # rubocop:enable Layout/IndentationWidth # rubocop:enable Layout/EndAlignment # rubocop:enable Layout/ElseAlignment # rubocop:enable Layout/AlignHash # # That's all Folks!