#!/usr/bin/env ruby -s

require 'ostruct'

source_files = if STDIN.tty? || $tty
  `git rev-parse --show-toplevel &> /dev/null`
  if $?.success?
    # we're inside a git repo so
    # get list of files from git
    `git ls-files -z #{ARGV.join(' ')}`.split("\0")
  else
    # we are not inside a git repo:
    # find all files in current dir
    `find #{ARGV.empty? ? Dir.pwd : ARGV.join(' ')} -print0`.split("\0")
  end
else
  # assume we're running it in a pipeline
  # and read list of filenames from STDIN
  STDIN.read.split($/).map(&:chomp)
end

# exclude binary files from stats
# (files with NUL in file header)
#
# much slower alternative:
#
# `egrep -q '\\x00' #{file}` ; $? == 0
#
# note: git itself uses the first
# 8,000 characters of a file, but
# looking at the first 16 is fine
# for our purposes... for now :-)
# see buffer_is_binary() function
# in the "git" source repository!
source_files.delete_if { |file|
  (
    File.extname(file) == '.pdf'      || # skip bl**dy PDF documents
    File.basename(file) =~ /\A\..*\z/ || # skip hidden ".*" files
    !File.exist?(file)                || # skip non-existent paths
    !File.file?(file)                 || # skip directories
    !File.size?(file)                 || # skip empty files
    !File.read(file, 16)["\0"].nil?      # skip binary files
  ) && ( $verbose && warn("SKIPPING #{file}...") ; true )
}

BLANKS = %r{\A\s*\Z}.freeze

COMMENTS = {
  # FIXME does not work for multi-line comments
  #       (for the languages that support them)
  '*.rb'   => %r{\A\s*(#.*)\s*\Z},
  '*.sh'   => %r{\A\s*(#.*)\s*\Z},
  '*.xml'  => %r{\A\s*(<!--.*-->)\s*\Z},
  '*.html' => %r{\A\s*(<!--.*-->)\s*\Z},
  '*.css'  => %r{\A\s*(/\*.*\*/)\s*\Z},
  '*.js'   => %r{\A\s*(//.*|/\*.*\*/)\s*\Z},
}.freeze

source_stats = source_files.each_with_object({}) { |file, stats|
  file_ext = '*' + File.extname(file) # e.g. '*.rb' or '*' if no ext!
  stats_for_ext = begin
    stats[file_ext] ||= OpenStruct.new({
      file_count:    0,
      line_count:    0,
      blank_count:   0,
      comment_count: 0,
    })
  end
  file_content = File.read(file, :encoding => 'UTF-8')
  unless file_content.valid_encoding?
    file_content = File.read(file, :encoding => 'ISO-8859-1')
    # FIXME what about file encodings other than these two???
  end
  source_lines = file_content.each_line
  stats_for_ext.file_count    += 1
  stats_for_ext.line_count    += source_lines.count
  stats_for_ext.blank_count   += source_lines.grep(BLANKS).count
  next unless COMMENTS[file_ext] # only scan for comments if a regex exists!
  stats_for_ext.comment_count += source_lines.grep(COMMENTS[file_ext]).count
}

source_stats.values.each do |stats_for_ext|
  stats_for_ext.code_count = stats_for_ext.line_count - (
    stats_for_ext.blank_count + stats_for_ext.comment_count
  )
end

sort_metric = case
  when $files   then :file_count
  when $lines   then :line_count
  when $blank   then :blank_count
  when $comment then :comment_count
  when $code    then :code_count
  else :code_count
end

source_stats = Hash[
  source_stats.sort_by { |_, stats|
    stats.send(sort_metric)
  }.reverse
]

source_stats["TOTAL"] = OpenStruct.new({
  file_count:    source_stats.values.map(&:file_count).reduce(:+)    || 0,
  line_count:    source_stats.values.map(&:line_count).reduce(:+)    || 0,
  blank_count:   source_stats.values.map(&:blank_count).reduce(:+)   || 0,
  comment_count: source_stats.values.map(&:comment_count).reduce(:+) || 0,
  code_count:    source_stats.values.map(&:code_count).reduce(:+)    || 0,
})

#
# JSON formatting for non-TTY output
#

unless STDOUT.tty? || $tty
  require 'json'

  class OpenStruct
    def to_json(*args)
      self.to_h.to_json(args)
    end
  end

  puts source_stats.to_json

  exit
end

#
# fancy formatting for TTY output
#

class String
  def commify
    gsub(/(\d)(?=(\d{3})+(\..*)?$)/,'\1,')
  end
end

class Numeric
  def commify
    to_s.commify
  end
end

source_stats.values.each do |stats_for_ext|
  stats_for_ext.file_count    = stats_for_ext.file_count.commify
  stats_for_ext.line_count    = stats_for_ext.line_count.commify
  stats_for_ext.blank_count   = stats_for_ext.blank_count.commify
  stats_for_ext.comment_count = stats_for_ext.comment_count.commify
  stats_for_ext.code_count    = stats_for_ext.code_count.commify
end

# widest_file_ext      = source_stats.keys.map(&:length).max
# widest_file_count    = source_stats.values.map(&:file_count).map(&:length).max
# widest_line_count    = source_stats.values.map(&:line_count).map(&:length).max
# widest_blank_count   = source_stats.values.map(&:blank_count).map(&:length).max
# widest_comment_count = source_stats.values.map(&:comment_count).map(&:length).max
# widest_code_count    = source_stats.values.map(&:code_count).map(&:length).max

totals = source_stats.delete("TOTAL").to_h.values

TEMPLATE = " %-13s %12s %12s %12s %12s %12s".freeze
DIVIDER  = ('-' * 80).freeze # `loc` uses 80 columns

puts format("%s\n#{TEMPLATE}\n%s",
  DIVIDER,
  *%w(Language Files Lines Blank Comment Code),
  DIVIDER,
)

source_stats.each do |file_ext, stats|
  puts format(TEMPLATE,
    file_ext,
    stats.file_count,
    stats.line_count,
    stats.blank_count,
    stats.comment_count,
    stats.code_count,
  )
end

puts format("%s\n#{TEMPLATE}\n%s",
  DIVIDER,
  "Total", *totals,
  DIVIDER,
)

# That's all Folks!