lib/tokogen/generator.rb in tokogen-0.1.2 vs lib/tokogen/generator.rb in tokogen-0.2.1

- old
+ new

@@ -1,67 +1,57 @@ # frozen_string_literal: true module Tokogen class Generator - DEFAULT_ALPHABET = (('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a).join.freeze + class AssertionFail < StandardError; end - attr_reader :randomness_source + attr_reader :randomness_source, :alphabet - def initialize(randomness_source:, alphabet: DEFAULT_ALPHABET) + def initialize(randomness_source:, alphabet:) @randomness_source = randomness_source @alphabet = alphabet + + @alphabet_size = @alphabet.size + @max_char_index = @alphabet_size - 1 + @bits_per_char = @max_char_index.bit_length end - def generate(length) - token_bits_amount = length * bits_per_char + def generate(length) # rubocop:disable Metrics/AbcSize + token_bits_amount = length * @bits_per_char bytes_to_read = full_bytes_in_bits(token_bits_amount) bytes = random_bytes(bytes_to_read) - bits = bytes.unpack('b*')[0] + splitter = BitSplitter.new(bytes.each_byte) + combiner = BitCombiner.new(splitter.each, @bits_per_char) # It's possible we've read a couple exta bits of randomness, # since randomness is rounded to bytes. # Here we only take first `length` of bit that we need. - bit_string_split(bits, bits_per_char) - .take(length) - .map { |index| alphabet_char(index) } - .join + indexes = combiner.each.take(length) + raise AssertionFail, 'Invalid length' if indexes.size != length + indexes.map do |index| + # We split out random data into chunks of bits with fixed length. + # Therefore it's possible to have an index value that is larger than + # an alphabet size. + # In this case we'd resolve to nil, so we're just using modulo of the + # alphabet size. This will probably ruin the distribution that + # the randromness source provides, but it will at least work. + # If you don't want this behavior, just ensure you're using an alphabet + # with an even size - then there will always be a bijection between + # the generated indicies and the alphabet and the described issue + # will never occur. + alphabet_char(index % @alphabet_size) + end.join end def random_bytes(size) @randomness_source.random_bytes(size) end - def max_char_index - @alphabet.size - 1 + def alphabet_char(index) + @alphabet[index] end - def bits_per_char - max_char_index.bit_length - end + private def full_bytes_in_bits(bits) (bits + 7) >> 3 - end - - private - - def bit_string_split(bits, bits_per_char, &block) # rubocop:disable Metrics/MethodLength - top = max_char_index - curry = 0 - last_curry = 0 - bits.each_char.each_slice(bits_per_char).map do |binary_ord| - val = binary_ord.join.to_i(2) + curry - last_curry = curry - if val <= top - current = val - curry = 0 - else - current = top - curry = val % top - end - current - end.each(&block) - end - - def alphabet_char(index) - @alphabet[index] end end end