#!/usr/bin/env ruby
# frozen_string_literal: true

require 'optparse'
require 'csv'

opts = {}
OptionParser.new do |options|
  options.banner = "USAGE:
    #{$PROGRAM_NAME} [opts]
    EXAMPLE w/ Tenable Nessus Results:
    #{$PROGRAM_NAME} \\
      --csv-a vuln_scan1.csv \\
      --csv-b vuln_scan2.csv \\
      --csv-diff vuln_scan_diff.csv \\
      --exclude-column-names 'Synopsis,Description,Solution,See Also,Plugin Output,Asset UUID,Vulnerability State,IP Address,FQDN,NetBios,OS,MAC Address,Plugin Family,CVSS Base Score,CVSS Temporal Score,CVSS Temporal Vector,CVSS Vector,CVSS3 Base Score,CVSS3 Temporal Score,CVSS3 Temporal Vector,CVSS3 Vector,System Type,Host Start,Host End,Vulnerability Priority Rating (VPR),First Found,Last Found,Host Scan Schedule ID,Host Scan ID,Indexed At,Last Authenticated Results Date,Last Unauthenticated Results Date,Tracked,Risk Factor,Severity,Original Severity,Modification,Plugin Family ID,Plugin Type,Plugin Version,Service,Plugin Modification Date,Plugin Publication Date,Checks for Malware,Exploit Available,Exploited by Malware,Exploited by Nessus,CANVAS,D2 Elliot,Metasploit,Core Exploits,ExploitHub,Default Account,Patch Available,In The News,Unsupported By Vendor,Last Fixed'

    EXAMPLE w/ OpenVAS Results:
    #{$PROGRAM_NAME} \\
      --csv-a vuln_scan1.csv \\
      --csv-b vuln_scan2.csv \\
      --csv-diff vuln_scan_diff.csv \\
      --exclude-column-names 'Timestamp, Result ID'
  "

  options.on('-aCSV', '--csv-a=CSV', '<Required - First CSV to Compare)>') do |c1|
    opts[:c1_path] = c1
  end

  options.on('-bCSV', '--csv-b=CSV', '<Required - Second CSV to Compare)>') do |c2|
    opts[:c2_path] = c2
  end

  options.on('-dDIFF', '--csv-diff=DIFF', '<Required - Path of CSV Diff to Generate)>') do |d|
    opts[:diff_path] = d
  end

  options.on('-eNAMES', '--exclude-column-names=NAMES', '<Optional - Comma-Delimited List of Column Names to Exclude)>') do |n|
    opts[:column_names_to_exclude] = n
  end

  options.on('-n', '--no-headers', '<Optional - Redact CSV Headers in CSV Diff>') do |h|
    opts[:no_headers] = h
  end
end.parse!

if opts.empty?
  puts `#{$PROGRAM_NAME} --help`
  exit 1
end

def ommit_columns_by_index(opts = {})
  csv = opts[:csv]
  columns_index_arr = opts[:columns_index_arr]

  csv.each do |line_arr|
    line_arr.delete_if.with_index do |_, index|
      columns_index_arr.include?(index)
    end
  end
end

def csv_diff(opts = {})
  c1_path = opts[:c1_path]
  c2_path = opts[:c2_path]
  diff_path = opts[:diff_path]
  no_headers = opts[:no_headers]
  column_names_to_exclude = opts[:column_names_to_exclude].to_s.split(',')

  csv1 = CSV.open(
    c1_path,
    'rb',
    col_sep: ',',
    quote_char: '"',
    force_quotes: true
  ).read

  csv2 = CSV.open(
    c2_path,
    'rb',
    col_sep: ',',
    quote_char: '"',
    force_quotes: true
  ).read

  if csv1.length >= csv2.length
    larger_csv = csv1
    larger_csv_orig = CSV.open(
      c1_path,
      'rb',
      col_sep: ',',
      quote_char: '"',
      force_quotes: true
    ).read

    smaller_csv = csv2
  end

  if csv2.length >= csv1.length
    larger_csv = csv2
    larger_csv_orig = CSV.open(
      c2_path,
      'rb',
      col_sep: ',',
      quote_char: '"',
      force_quotes: true
    ).read

    smaller_csv = csv1
  end

  # Exclude the column values for diff to ensure the same rows
  # with for example different timestamps aren't included.
  columns_index_arr = []
  column_names_to_exclude&.each do |cn|
    column_name = cn.strip.chomp
    column_index = smaller_csv.first.find_index(column_name)
    columns_index_arr.push(column_index)
  end

  if columns_index_arr.any?
    larger_csv = ommit_columns_by_index(
      csv: larger_csv,
      columns_index_arr: columns_index_arr
    )

    smaller_csv = ommit_columns_by_index(
      csv: smaller_csv,
      columns_index_arr: columns_index_arr
    )
  end

  # Write diff with redacted columns (to find differences we care about)
  CSV.open(diff_path, 'w', col_sep: ',', quote_char: '"', force_quotes: true) do |csv|
    larger_csv.each do |line_arr|
      line = line_arr.join(',')
      csv.puts line_arr if smaller_csv.select { |sc| sc.join(',') == line }.empty?
    end
  end
  diff_csv = CSV.open(
    diff_path,
    'rb',
    col_sep: ',',
    quote_char: '"',
    force_quotes: true
  ).read

  # Write diff again with all columns.
  CSV.open(diff_path, 'w', col_sep: ',', quote_char: '"', force_quotes: true) do |csv|
    csv.puts larger_csv_orig.first unless no_headers
    larger_csv_orig.each_with_index do |line_arr, index|
      # Use larger_csv instead of orig to compare column redaction to diff_csv
      line = larger_csv[index].join(',')
      csv.puts line_arr if diff_csv.select { |dc| dc.join(',') == line }.any?
    end
  end
end

c1_path = opts[:c1_path]
c2_path = opts[:c2_path]
diff_path = opts[:diff_path]
column_names_to_exclude = opts[:column_names_to_exclude]

no_headers = true if opts[:no_headers]
no_headers ||= false

# Compare which two is larger
csv_diff(
  c1_path: c1_path,
  c2_path: c2_path,
  diff_path: diff_path,
  no_headers: no_headers,
  column_names_to_exclude: column_names_to_exclude
)