lib/git_ls.rb in git_ls-0.4.0 vs lib/git_ls.rb in git_ls-0.5.0

- old
+ new

@@ -1,195 +1,197 @@ # frozen_string_literal: true # Usage: # GitLS.files -> Array of strings as files. # This will be identical output to git ls-files +require 'stringio' + module GitLS # rubocop:disable Metrics/ModuleLength class Error < StandardError; end class << self - def files(path = ::Dir.pwd) + def files(path = nil) + path = path ? ::File.join(path, '.git/index') : '.git/index' + read(path, false) end - def headers(path = ::Dir.pwd) - read(path, true) - end - private - def read(path, return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - path = ::File.join(path, '.git/index') if ::File.directory?(path) - file = ::File.new(path) + def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize + begin + # reading the whole file into memory is faster than lots of ::File#read + # the biggest it's going to be is 10s of megabytes, well within ram. + file = ::StringIO.new(::File.read(path, mode: 'rb')) + rescue ::Errno::ENOENT => e + raise ::GitLS::Error, "Not a git directory: #{e.message}" + end + buf = ::String.new # 4-byte signature: # The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") # 4-byte version number: # The current supported versions are 2, 3 and 4. # 32-bit number of index entries. - sig, git_index_version, length = file.read(12, buf).unpack('a4NN') - raise ::GitLS::Error, ".git/index file not found at #{path}" unless sig == 'DIRC' + sig = file.read(4, buf) + raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC' - return { git_index_version: git_index_version, length: length } if return_headers_only + git_index_version = file.read(4, buf).unpack1('N') + entries = file.read(4, buf).unpack1('N') - files = ::Array.new(length) - case git_index_version + files = ::Array.new(entries) + files = case git_index_version when 2 then files_2(files, file) when 3 then files_3(files, file) when 4 then files_4(files, file) - else raise ::GitLS::Error, 'Unrecognized git index version' + else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'" end - extensions(files, file, buf) - files - rescue ::Errno::ENOENT => e - raise ::GitLS::Error, "Not a git directory: #{e.message}" - ensure - # :nocov: - # coverage tracking for branches in ensure blocks is weird - file&.close - # :nocov: - files + read_extensions(files, file, path, buf) end - def extensions(files, file, buf) - case file.read(4, buf) - when 'link' then link_extension(files, file, buf) - when /[A-Z]{4}/ then ignored_extension(files, file, buf) + def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength + extension = file.read(4, buf) + if extension == 'link' + read_link_extension(files, file, path, buf) + elsif extension.match?(/\A[A-Z]{4}\z/) + size = file.read(4, buf).unpack1('N') + file.seek(size, 1) + read_extensions(files, file, path, buf) else - return if (file.pos += 16) && file.eof? + return files if file.seek(16, 1) && file.eof? - raise ::GitLS::Error, "Unrecognized .git/index extension #{buf.inspect}" + raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}" end end - def ignored_extension(files, file, buf) - size = file.read(4, buf).unpack1('N') - file.pos += size - extensions(files, file, buf) - end + def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength + file.seek(4, 1) # skip size - def link_extension(files, file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - file.pos += 4 # size = file.read(4, buf).unpack1('N') - sha = file.read(20, buf) - new_files = files.dup + split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false) - files.replace files("#{::File.dirname(file.path)}/sharedindex.#{sha.unpack1('H*')}") - ewah_each_value(file, buf) do |pos| - files[pos] = nil + split_files[pos] = nil end ewah_each_value(file, buf) do |pos| - replacement_file = new_files.shift + replacement_file = files.shift # the documentation *implies* that this *may* get a new filename # i can't get it to happen though # :nocov: - files[pos] = replacement_file unless replacement_file.empty? + split_files[pos] = replacement_file unless replacement_file.empty? # :nocov: end - files.compact! - files.concat(new_files) - files.sort! + split_files.compact! + split_files.concat(files) + split_files.sort! - extensions(files, file, buf) + read_extensions(split_files, file, path, buf) end # format is defined here: # https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize uncompressed_pos = 0 - file.pos += 4 # uncompressed_bits_count = file.read(4, buf).unpack1('N') + + file.seek(4, 1) # skip 4 byte uncompressed_bits_count. compressed_bytes = file.read(4, buf).unpack1('N') * 8 final_file_pos = file.pos + compressed_bytes until file.pos == final_file_pos run_length_word = file.read(8, buf).unpack1('Q>') # 1st bit run_bit = run_length_word & 1 - # the next 32 bits, masked, multiplied by 64 (which is shifted by 6 places) - run_length = ((run_length_word >> 1) & 0xFFFF_FFFF) << 6 + # the next 32 bits, masked, multiplied by 64 + run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64 # the next 31 bits - literal_length = (run_length_word >> 33) + literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0) if run_bit == 1 run_length.times do yield uncompressed_pos uncompressed_pos += 1 end else uncompressed_pos += run_length end - literal_length.times do - word = file.read(8, buf).unpack1('B*').reverse - word.each_char do |char| + next unless literal_length > 0 + + words = file.read(8 * literal_length, buf).unpack('B64' * literal_length) + words.each do |word| + word.each_char.reverse_each do |char| yield(uncompressed_pos) if char == '1' uncompressed_pos += 1 end end end - file.pos += 4 # bitmap metadata for adding to bitmaps + file.seek(4, 1) # bitmap metadata for adding to bitmaps end def files_2(files, file) # rubocop:disable Metrics/MethodLength files.map! do - file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha) - length = ((file.getbyte & 0b0000_1111) << 8) + file.getbyte # find the 12 byte length + file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha) + + length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length if length < 0xFFF path = file.read(length) # :nocov: else # i can't test this i just get ENAMETOOLONG a lot - path = file.readline("\0").chop - file.pos -= 1 + # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac + # and length is a 12 byte number: 4096 max. + path = file.readline("\0").chop! + file.seek(-1, 1) # :nocov: end - file.pos += 8 - ((length - 2) % 8) # 1-8 bytes padding of nuls + file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls + path.force_encoding(Encoding::UTF_8) path end end def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize files.map! do - file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha) + file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha) + flags = file.getbyte + extended_flag = (flags & 0b0100_0000) > 0 + length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length + file.seek(2, 1) if extended_flag - flags = file.getbyte * 256 + file.getbyte - extended_flag = (flags & 0b0100_0000_0000_0000).positive? - file.pos += 2 if extended_flag - - length = flags & 0b0000_1111_1111_1111 if length < 0xFFF path = file.read(length) # :nocov: else # i can't test this i just get ENAMETOOLONG a lot - path = file.readline("\0").chop - file.pos -= 1 + # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac + # and length is a 12 byte number: 4096 max. + path = file.readline("\0").chop! + file.seek(-1, 1) # :nocov: end - - file.pos += 8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8) # 1-8 bytes padding of nuls + file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls + path.force_encoding(Encoding::UTF_8) path end end def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize prev_entry_path = '' files.map! do - file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha) - flags = file.getbyte * 256 + file.getbyte - file.pos += 2 if (flags & 0b0100_0000_0000_0000).positive? + file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha) + flags = file.getbyte + extended_flag = (flags & 0b0100_0000) > 0 + length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length + file.seek(2, 1) if extended_flag - length = flags & 0b0000_1111_1111_1111 - # documentation for this number from # https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format # offset encoding: # n bytes with MSB set in all but the last one. # The offset is then the number constructed by @@ -197,30 +199,34 @@ # for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) # to the result. read_offset = 0 prev_read_offset = file.getbyte n = 1 - while (prev_read_offset & 0b1000_0000).positive? - read_offset += (prev_read_offset - 0b1000_0000) + while (prev_read_offset & 0b1000_0000) > 0 + read_offset += (prev_read_offset & 0b0111_1111) read_offset += 2**(7 * n) n += 1 prev_read_offset = file.getbyte end read_offset += prev_read_offset initial_part_length = prev_entry_path.bytesize - read_offset if length < 0xFFF rest = file.read(length - initial_part_length) - file.pos += 1 # the NUL + file.seek(1, 1) # the NUL # :nocov: else # i can't test this i just get ENAMETOOLONG a lot - rest = file.readline("\0").chop + # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac + # and length is a 12 byte number: 4096 max. + rest = file.readline("\0").chop! + file.seek(-1, 1) # :nocov: end prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest + prev_entry_path.force_encoding(Encoding::UTF_8) end end end end