# frozen_string_literal: true

module SmarterCSV
  class SmarterCSVException < StandardError; end
  class HeaderSizeMismatch < SmarterCSVException; end
  class IncorrectOption < SmarterCSVException; end
  class ValidationError < SmarterCSVException; end
  class DuplicateHeaders < SmarterCSVException; end
  class MissingKeys < SmarterCSVException; end # previously known as MissingHeaders
  class NoColSepDetected < SmarterCSVException; end
  class KeyMappingError < SmarterCSVException; end

  # first parameter: filename or input object which responds to readline method
  def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint/UnusedMethodArgument
    initialize_variables

    options = process_options(given_options)

    @enforce_utf8 = options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
    @verbose = options[:verbose]

    begin
      fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")

      if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
        puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
      end

      # auto-detect the row separator
      options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
      # attempt to auto-detect column separator
      options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto

      skip_lines(fh, options)

      @headers, header_size = process_headers(fh, options)
      @headerA = @headers # @headerA is deprecated, use @headers

      puts "Effective headers:\n#{pp(@headers)}\n" if @verbose

      header_validations(@headers, options)

      # in case we use chunking.. we'll need to set it up..
      if options[:chunk_size].to_i > 0
        use_chunks = true
        chunk_size = options[:chunk_size].to_i
        @chunk_count = 0
        chunk = []
      else
        use_chunks = false
      end

      # now on to processing all the rest of the lines in the CSV file:
      # fh.each_line |line|
      until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
        line = readline_with_counts(fh, options)

        # replace invalid byte sequence in UTF-8 with question mark to avoid errors
        line = enforce_utf8_encoding(line, options) if @enforce_utf8

        print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if @verbose

        next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any

        # cater for the quoted csv data containing the row separator carriage return character
        # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
        # by detecting the existence of an uneven number of quote characters
        multiline = count_quote_chars(line, options[:quote_char]).odd?

        while multiline
          next_line = fh.readline(options[:row_sep])
          next_line = enforce_utf8_encoding(next_line, options) if @enforce_utf8
          line += next_line
          @file_line_count += 1

          break if fh.eof? # Exit loop if end of file is reached

          multiline = count_quote_chars(line, options[:quote_char]).odd?
        end

        # :nocov:
        if multiline && @verbose
          print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count
        end
        # :nocov:

        line.chomp!(options[:row_sep])

        # --- SPLIT LINE & DATA TRANSFORMATIONS ------------------------------------------------------------
        dataA, _data_size = parse(line, options, header_size)

        dataA.map!{|x| x.strip} if options[:strip_whitespace]

        # if all values are blank, then ignore this line
        next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))

        # --- HASH TRANSFORMATIONS ------------------------------------------------------------
        hash = @headers.zip(dataA).to_h

        hash = hash_transformations(hash, options)

        # --- HASH VALIDATIONS ----------------------------------------------------------------
        # will go here, and be able to:
        #  - validate correct format of the values for fields
        #  - required fields to be non-empty
        #  - ...
        # -------------------------------------------------------------------------------------

        next if options[:remove_empty_hashes] && hash.empty?

        puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting
        # optional adding of csv_line_number to the hash to help debugging
        hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]

        # process the chunks or the resulting hash
        if use_chunks
          chunk << hash # append temp result to chunk

          if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
            # do something with the chunk
            if block_given?
              yield chunk # do something with the hashes in the chunk in the block
            else
              @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear)
            end
            @chunk_count += 1
            chunk.clear # re-initialize for next chunk of data
          else
            # the last chunk may contain partial data, which is handled below
          end
          # while a chunk is being filled up we don't need to do anything else here

        else # no chunk handling
          if block_given?
            yield [hash] # do something with the hash in the block (better to use chunking here)
          else
            @result << hash
          end
        end
      end

      # print new line to retain last processing line message
      print "\n" if @verbose

      # handling of last chunk:
      if !chunk.nil? && chunk.size > 0
        # do something with the chunk
        if block_given?
          yield chunk # do something with the hashes in the chunk in the block
        else
          @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear)
        end
        @chunk_count += 1
        # chunk = [] # initialize for next chunk of data
      end
    ensure
      fh.close if fh.respond_to?(:close)
    end

    if block_given?
      @chunk_count # when we do processing through a block we only care how many chunks we processed
    else
      @result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
    end
  end

  class << self
    def count_quote_chars(line, quote_char)
      return 0 if line.nil? || quote_char.nil? || quote_char.empty?

      count = 0
      escaped = false

      line.each_char do |char|
        if char == '\\' && !escaped
          escaped = true
        else
          count += 1 if char == quote_char && !escaped
          escaped = false
        end
      end

      count
    end

    def has_acceleration?
      @has_acceleration ||= !!defined?(parse_csv_line_c)
    end

    protected

    # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
    # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
    BLANK_RE = /\A\s*\z/.freeze

    def blank?(value)
      case value
      when String
        BLANK_RE.match?(value)
      when NilClass
        true
      when Array
        value.all? { |elem| blank?(elem) }
      when Hash
        value.values.all? { |elem| blank?(elem) } # Focus on values only
      else
        false
      end
    end

    private

    def enforce_utf8_encoding(line, options)
      # return line unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i

      line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence])
    end
  end
end