# frozen_string_literal: true

# Usage:
#   GitLS.files -> Array of strings as files.
#   This will be identical output to git ls-files
require 'stringio'

module GitLS # rubocop:disable Metrics/ModuleLength
  class Error < StandardError; end

  class << self
    def files(path = nil)
      path = path ? ::File.join(path, '.git/index') : '.git/index'

      read(path, false)
    end

    private

    def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
      begin
        # reading the whole file into memory is faster than lots of ::File#read
        # the biggest it's going to be is 10s of megabytes, well within ram.
        file = ::StringIO.new(::File.read(path, mode: 'rb'))
      rescue ::Errno::ENOENT => e
        raise ::GitLS::Error, "Not a git directory: #{e.message}"
      end

      buf = ::String.new
      # 4-byte signature:
      # The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
      # 4-byte version number:
      # The current supported versions are 2, 3 and 4.
      # 32-bit number of index entries.
      sig = file.read(4, buf)
      raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC'

      git_index_version = file.read(4, buf).unpack1('N')
      entries = file.read(4, buf).unpack1('N')

      files = ::Array.new(entries)
      files = case git_index_version
      when 2 then files_2(files, file)
      when 3 then files_3(files, file)
      when 4 then files_4(files, file)
      else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'"
      end

      read_extensions(files, file, path, buf)
    end

    def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength
      extension = file.read(4, buf)
      if extension == 'link'
        read_link_extension(files, file, path, buf)
      elsif extension.match?(/\A[A-Z]{4}\z/)
        size = file.read(4, buf).unpack1('N')
        file.seek(size, 1)
        read_extensions(files, file, path, buf)
      else
        return files if file.seek(16, 1) && file.eof?

        raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}"
      end
    end

    def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength
      file.seek(4, 1) # skip size

      sha = file.read(20, buf)

      split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false)

      ewah_each_value(file, buf) do |pos|
        split_files[pos] = nil
      end

      ewah_each_value(file, buf) do |pos|
        replacement_file = files.shift
        # the documentation *implies* that this *may* get a new filename
        # i can't get it to happen though
        # :nocov:
        split_files[pos] = replacement_file unless replacement_file.empty?
        # :nocov:
      end

      split_files.compact!
      split_files.concat(files)
      split_files.sort!

      read_extensions(split_files, file, path, buf)
    end

    # format is defined here:
    # https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap
    def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
      uncompressed_pos = 0

      file.seek(4, 1) # skip 4 byte uncompressed_bits_count.
      compressed_bytes = file.read(4, buf).unpack1('N') * 8

      final_file_pos = file.pos + compressed_bytes

      until file.pos == final_file_pos
        run_length_word = file.read(8, buf).unpack1('Q>')
        # 1st bit
        run_bit = run_length_word & 1
        # the next 32 bits, masked, multiplied by 64
        run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64
        # the next 31 bits
        literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0)

        if run_bit == 1
          run_length.times do
            yield uncompressed_pos
            uncompressed_pos += 1
          end
        else
          uncompressed_pos += run_length
        end

        next unless literal_length > 0

        words = file.read(8 * literal_length, buf).unpack('B64' * literal_length)
        words.each do |word|
          word.each_char.reverse_each do |char|
            yield(uncompressed_pos) if char == '1'

            uncompressed_pos += 1
          end
        end
      end

      file.seek(4, 1) # bitmap metadata for adding to bitmaps
    end

    def files_2(files, file) # rubocop:disable Metrics/MethodLength
      files.map! do
        file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)

        length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length
        if length < 0xFFF
          path = file.read(length)
          # :nocov:
        else
          # i can't test this i just get ENAMETOOLONG a lot
          # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
          # and length is a 12 byte number: 4096 max.
          path = file.readline("\0").chop!
          file.seek(-1, 1)
          # :nocov:
        end
        file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls
        path.force_encoding(Encoding::UTF_8)
        path
      end
    end

    def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
      files.map! do
        file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
        flags = file.getbyte
        extended_flag = (flags & 0b0100_0000) > 0
        length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
        file.seek(2, 1) if extended_flag

        if length < 0xFFF
          path = file.read(length)
          # :nocov:
        else
          # i can't test this i just get ENAMETOOLONG a lot
          # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
          # and length is a 12 byte number: 4096 max.
          path = file.readline("\0").chop!
          file.seek(-1, 1)
          # :nocov:
        end
        file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls
        path.force_encoding(Encoding::UTF_8)
        path
      end
    end

    def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
      prev_entry_path = ''
      files.map! do
        file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
        flags = file.getbyte
        extended_flag = (flags & 0b0100_0000) > 0
        length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
        file.seek(2, 1) if extended_flag

        # documentation for this number from
        # https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format
        # offset encoding:
        #   n bytes with MSB set in all but the last one.
        #   The offset is then the number constructed by
        #   concatenating the lower 7 bit of each byte, and
        #   for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
        #   to the result.
        read_offset = 0
        prev_read_offset = file.getbyte
        n = 1
        while (prev_read_offset & 0b1000_0000) > 0
          read_offset += (prev_read_offset & 0b0111_1111)
          read_offset += 2**(7 * n)
          n += 1
          prev_read_offset = file.getbyte
        end
        read_offset += prev_read_offset

        initial_part_length = prev_entry_path.bytesize - read_offset

        if length < 0xFFF
          rest = file.read(length - initial_part_length)
          file.seek(1, 1) # the NUL
          # :nocov:
        else
          # i can't test this i just get ENAMETOOLONG a lot
          # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
          # and length is a 12 byte number: 4096 max.
          rest = file.readline("\0").chop!
          file.seek(-1, 1)
          # :nocov:
        end

        prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest
        prev_entry_path.force_encoding(Encoding::UTF_8)
      end
    end
  end
end