# Based on work by Oleksandr Korniienko
module IdPack
  class UuidPacker

    # calculate bits in number
    def bin_pow(num)
      pow = 0
      pow += 1 until (num >> pow).zero?
      pow
    end

    # transform string of valid characters to useful array (del = true if we need
    # delimiter)
    def alphanum_to_array(alphanum_string, del)

      alphanum_array = []
      el = alphanum_string.length

      # max number of bits coding by one character (some characters will be one
      # bit less)
      pow = bin_pow (el - 1)

      # how many characters will be one bit less
      lowhi = 2**pow - el

      # if delimited we can't use last characters
      if del
        el    -= 1
        pow    = bin_pow(el - 1)
        lowhi  = 2**pow - el
        lowhi  = -1 if lowhi.zero?
        # first element include main data about alphabet and delimiter character
        alphanum_array.push [lowhi, alphanum_string[el], pow]
      else
        # first element include main data about alphabet
        lowhi = -1 if lowhi.zero?
        alphanum_array.push [lowhi, '', pow]
      end

      lowhi     = 0 if lowhi == -1
      char_item = 0

      # loop by characters and get code for each one
      until char_item == el
        if char_item < lowhi
          alphanum_array.push [char_item, alphanum_string[char_item], pow - 1]
        else
          alphanum_array.push [lowhi + char_item, alphanum_string[char_item], pow]
        end
        char_item += 1
      end
      alphanum_array
    end

    # compress UUIDs array
    def alphanum_compress(arr, alphanum_string, order)
      # length of UUID in bits
      uuid_bit_length = 128

      # compress without delta
      nresult        = ''
      alphanum_array = alphanum_to_array alphanum_string, false
      pow            = alphanum_array[0][2]
      lowhi          = alphanum_array[0][0]

      # first bit equal 0 => compress without delta
      achr = 0
      rest = 1

      # loop by UUIDs
      arr.each do |item|

        # remove '-' characters from UUID
        curr = item.delete('-').to_i(16)

        # get base binary code (BBC)
        achr += (curr << rest)

        # look for number of bits in BBC
        rest += uuid_bit_length

        # create symbols to compressed string
        until rest < pow

          power_c  = pow - 1
          code     = (
            (achr & (2**power_c - 1)) + 2**power_c
          ).to_s(2).reverse.to_i(2) >> 1

          power_c += 1 if code >= lowhi

          # decrease number of bits in BBC
          rest -= power_c

          # get reverse bits from the end of BBC to create new symbol
          code = (
            (achr & (2**power_c - 1)) + 2**power_c
          ).to_s(2).reverse.to_i(2) >> 1

          # add new symbol
          nresult += alphanum_array.assoc(code)[1]

          # remove used bits from BBC
          achr >>= power_c
        end
      end

      # check if we have tail of BBC
      if rest.positive?
        code   = ((achr & (2**rest - 1)) + 2**rest).to_s(2).reverse.to_i(2) >> 1
        code <<= pow - rest - 1
        code <<= 1 if code >= lowhi

        # add tail symbol
        nresult += alphanum_array.assoc(code)[1]
      end

      # compress with delta
      arr = arr.sort

      # first character is delimiter => compress with delta : delimiter (last
      # character in alphabet) always has code of all ones
      dresult        = alphanum_array[-1][1]
      alphanum_array = alphanum_to_array alphanum_string, true
      pow            = alphanum_array[0][2]

      if pow > 1
        lowhi = alphanum_array[0][0]
        prev  = 0

        # loop by UUIDs
        arr.each do |item|

          # remove '-' characters from UUID
          curr = item.delete('-').to_i(16)

          # calculate delta
          curr   -= prev
          prev    = item.delete('-').to_i(16)
          binlog  = bin_pow curr
          binlog  = uuid_bit_length if binlog >= uuid_bit_length - pow

          # get BBC for only current UUID
          achr = curr

          # look for number of bits in BBC (also for only current UUID)
          rest = binlog

          # create symbols to compressed string
          until rest < pow
            power_c  = pow - 1
            code     = (
              (achr & (2**power_c - 1)) +
              2**power_c
            ).to_s(2).reverse.to_i(2) >> 1

            power_c += 1 if code >= lowhi

            # decrease number of bits in BBC
            rest -= power_c

            # get reverse bits from the end of BBC to create new symbol
            code = (
              (achr & (2**power_c - 1)) +
              2**power_c
            ).to_s(2).reverse.to_i(2) >> 1

            # add new symbol
            dresult += alphanum_array.assoc(code)[1]

            # remove used bits from BBC
            achr >>= power_c
          end

          # check if we have tail of BBC for current UUID
          if rest.positive?
            code   = (
              (achr & (2**rest - 1)) + 2**rest
            ).to_s(2).reverse.to_i(2) >> 1

            code <<= pow - rest - 1
            code <<= 1 if code >= lowhi

            # add tail symbol for current UUID
            dresult += alphanum_array.assoc(code)[1]
          end

          # add delimiter if we use less symbols than for whole UUID
          dresult += alphanum_array[0][1] if binlog < uuid_bit_length
        end
      else
        order = true
      end

      result = nresult

      # get better result or non delta if we need to keep order
      result = dresult if dresult.length < nresult.length && !order
      result
    end

    # decompress UUIDs array
    def alphanum_decompress(str, alphanum_string)
      # length of UUID in bits
      uuid_bit_length = 128

      result = []
      alphanum_array = alphanum_to_array alphanum_string, false

      # check if delta used when compress
      if (
          alphanum_array.rassoc(str[0])[0] &
          (2**(alphanum_array.rassoc(str[0])[2] - 1))
        ) != 0

        # delta used
        alphanum_array = alphanum_to_array alphanum_string, true
        pow            = alphanum_array[0][2]
        lowhi          = alphanum_array[0][0]
        prev           = 0
        item           = 1
        achr           = 0
        rest           = 0

        # loop by symbols of compressed string starting from second (the first is
        # header) to next after last (for BBC length processing after last)
        while item <= str.length

          # we catch delimiter or we get BBC with length equal whole UUID
          if str[item] == alphanum_array[0][1] || rest >= uuid_bit_length

            # if BBC length than we need to look to current symbol one more time
            # if it is delimiter
            item -= 1 if rest >= uuid_bit_length

            # calculate UUID from delta
            achr += prev
            prev = achr

            # transform UUID to hexadecimal
            curr = prev.to_s(16)

            # add first characters if UUID start with 0
            curr = '0' * (uuid_bit_length / 4 - curr.length) + curr

            # add '-' characters from UUID
            curr = [
              curr[0..7],
              curr[8..11],
              curr[12..15],
              curr[16..19],
              curr[20..31],
            ].join('-')

            # add new UUID to array
            result.push curr
            achr = 0
            rest = 0

          # if we become last symbol we need no to symbol processing
          elsif item < str.length

            # reverse symbol code to BBC bits
            code = (
              alphanum_array.rassoc(str[item])[0] + 2**alphanum_array.rassoc(str[item])[2]
            ).to_s(2).reverse.to_i(2) >> 1

            # add bits to BBC
            achr += code << rest

            # look for number of bits in BBC
            rest += pow
            rest -= 1 if code < lowhi

          end

          item += 1

        end

      else
        # delta not used
        achr  = 0
        rest  = 0
        pow   = alphanum_array[0][2]
        lowhi = alphanum_array[0][0]

        # for first bit processing
        frst = true
        item = 0

        # loop by symbols of compressed string
        while item < str.length

          # reverse symbol code to BBC bits
          code =
            (
              alphanum_array.rassoc(str[item])[0] +
              2**alphanum_array.rassoc(str[item])[2]
            )
              .to_s(2).reverse.to_i(2) >> 1

          # add bits to BBC
          achr += code << rest

          # look for number of bits in BBC
          rest += pow
          rest -= 1 if alphanum_array.rassoc(str[item])[0] < lowhi

          # first bit processing
          if frst
            frst = false
            achr >>= 1
            rest -= 1
          end

          # we get BBC with length equal whole UUID
          if rest >= uuid_bit_length

            # calculate number of bits in BBC
            rest -= uuid_bit_length

            # transform UUID to hexadecimal
            curr = (achr & (2**uuid_bit_length - 1)).to_s(16)

            # add first characters if UUID start with 0
            curr = '0' * (uuid_bit_length / 4 - curr.length) + curr

            # add '-' characters from UUID
            curr = [
              curr[0..7],
              curr[8..11],
              curr[12..15],
              curr[16..19],
              curr[20..31],
            ].join('-')

            # add new UUID to array
            result.push curr

            # remove used bits from BBC
            achr >>= uuid_bit_length

          end

          item += 1
        end
      end
      result
    end
  end
end