require "fixed_width_file_parser/version"

module FixedWidthFileParser
  # Parse a fixed width file, yielding the proper data for each line based on the fields passed in
  #
  # @param filepath [String] The path to the file to be parsed.
  # @param fields [Array(Hash{name => String, position => Range|Integer})] An array of field hashes, each containing a `name` and a `position`.
  # @yield [Hash] Yields a hash object based on the fields provided.
  #
  # @example
    # filepath = 'path/to/file'
    # fields = [
    #   { name: 'first_name', position: 0..10 },
    #   { name: 'middle_initial', position: 11 },
    #   { name: 'last_name', position: 12..25 }
    # ]
    #
    # FixedWidthFileParser.parse(filepath, fields) do |row|
    #   puts row
    # end

  def self.parse(filepath, fields, options = {})
    # Set options, or use default
    force_utf8_encoding = options.fetch(:force_utf8_encoding, true)

    # Verify `filepath` is a String
    unless filepath.is_a?(String)
      raise '`filepath` must be a String'
    end

    # Verify `fields` is an array
    if fields.is_a?(Array)
      # Verify fields is not emtpy
      if fields.empty?
        raise '`fields` must contain at least 1 item'
      end
    else
      raise '`fields` must be an Array'
    end

    # Verify each field has a `name` and `position`
    unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
      raise 'Each field hash must include a `name` and a `position`'
    end

    # Verify that each `position` is either a Range or an Integer
    unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
      raise "Each field's `position` must be a Range or an Integer"
    end

    GC.start

    file = File.open(filepath)

    while !file.eof?
      line = file.readline
      # If the current line is blank, skip to the next line
      # chomp to remove "\n" and "\r\n"
      next if line.chomp.empty?

      # Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
      if force_utf8_encoding
        # Handle UTF Invalid Byte Sequence Errors
        # e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
        line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
      end

      line_fields = {}
      fields.each do |field|
        line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil :  line[ field[:position] ].strip
      end

      yield(line_fields)
    end

    GC.start

    file.close
  end

  def self.parse_in_batches(filepath, fields, options = {})
    # Set options, or use default
    batch_size = options.fetch(:batch_size, 1000)
    force_utf8_encoding = options.fetch(:force_utf8_encoding, true)

    # Verify `filepath` is a String
    unless filepath.is_a?(String)
      raise '`filepath` must be a String'
    end

    # Verify `fields` is an array
    if fields.is_a?(Array)
      # Verify fields is not emtpy
      if fields.empty?
        raise '`fields` must contain at least 1 item'
      end
    else
      raise '`fields` must be an Array'
    end

    # Verify each field has a `name` and `position`
    unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
      raise 'Each field hash must include a `name` and a `position`'
    end

    # Verify that each `position` is either a Range or an Integer
    unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
      raise "Each field's `position` must be a Range or an Integer"
    end

    GC.start

    File.open(filepath) do |file|
      file.lazy.drop(1).each_slice(batch_size) do |lines|
        lines.each do |line|
          # If the current line is blank, skip to the next line
          # chomp to remove "\n" and "\r\n"
          next if line.chomp.empty?

          # Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
          if force_utf8_encoding
            # Handle UTF Invalid Byte Sequence Errors
            # e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
            line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
          end

          line_fields = {}
          fields.each do |field|
            line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil :  line[ field[:position] ].strip
          end

          yield(line_fields)
        end

        GC.start
      end
    end
  end
end