lib/regexp-examples/groups.rb in regexp-examples-1.1.2 vs lib/regexp-examples/groups.rb in regexp-examples-1.1.3

- old
+ new

@@ -19,16 +19,22 @@ # Override to preserve subgroups GroupResult.new(super.to_s, group_id, subgroups) end end + # A helper method for mixing in to Group classes... + # Needed because sometimes (for performace) group results are lazy enumerators; + # Meanwhile other times (again, for performance!) group results are just arrays module ForceLazyEnumerators def force_if_lazy(arr_or_enum) arr_or_enum.respond_to?(:force) ? arr_or_enum.force : arr_or_enum end end + # A helper method for mixing in to Group classes... + # Needed for generating a complete results set when the ignorecase + # regexp option has been set module GroupWithIgnoreCase include ForceLazyEnumerators attr_reader :ignorecase def result group_result = super @@ -41,17 +47,22 @@ group_result end end end + # A helper method for mixing in to Group classes... + # Uses Array#sample to randomly choose one result from all + # possible examples module RandomResultBySample include ForceLazyEnumerators def random_result force_if_lazy(result).sample(1) end end + # The most "basic" possible group. + # For example, /x/ contains one SingleCharGroup class SingleCharGroup include RandomResultBySample prepend GroupWithIgnoreCase def initialize(char, ignorecase) @char = char @@ -72,10 +83,15 @@ def result [GroupResult.new('')] end end + # The most generic type of group, which contains 0 or more characters. + # Technically, this is the ONLY type of group that is truly necessary + # However, having others both improves performance through various optimisations, + # and clarifies the code's intention. + # The most common example of CharGroups is: /[abc]/ class CharGroup include RandomResultBySample prepend GroupWithIgnoreCase def initialize(chars, ignorecase) @chars = chars @@ -87,10 +103,12 @@ GroupResult.new(result) end end end + # A special case of CharGroup, for the pattern /./ + # (For example, we never need to care about ignorecase here!) class DotGroup include RandomResultBySample attr_reader :multiline def initialize(multiline) @multiline = multiline @@ -102,10 +120,13 @@ GroupResult.new(result) end end end + # A collection of other groups. Basically any regex that contains + # brackets will be parsed using one of these. The simplest example is: + # /(a)/ - Which is a MultiGroup, containing one SingleCharGroup class MultiGroup attr_reader :group_id def initialize(groups, group_id) @groups = groups @group_id = group_id @@ -129,39 +150,58 @@ GroupResult.new(result, group_id) end end end + # A boolean "or" group. + # The implementation is to pass in 2 set of (repeaters of) groups. + # The simplest example is: /a|b/ + # If you have more than one boolean "or" operator, then this is initially + # parsed as an OrGroup containing another OrGroup. However, in order to avoid + # probability distribution issues in Regexp#random_example, this then gets + # simplified down to one OrGroup containing 3+ repeaters. class OrGroup + attr_reader :repeaters_list + def initialize(left_repeaters, right_repeaters) - @left_repeaters = left_repeaters - @right_repeaters = right_repeaters + @repeaters_list = [left_repeaters, *merge_if_orgroup(right_repeaters)] end def result result_by_method(:map_results) end def random_result - # TODO: This logic is flawed in terms of choosing a truly "random" example! - # E.g. /a|b|c|d/.random_example will choose a letter with the following probabilities: - # a = 50%, b = 25%, c = 12.5%, d = 12.5% - # In order to fix this, I must either apply some weighted selection logic, - # or change how the OrGroup examples are generated - i.e. make this class work with >2 repeaters result_by_method(:map_random_result).sample(1) end private def result_by_method(method) - left_result = RegexpExamples.public_send(method, @left_repeaters) - right_result = RegexpExamples.public_send(method, @right_repeaters) - left_result.concat(right_result).flatten.uniq.map do |result| - GroupResult.new(result) + repeaters_list.map do |repeaters| + RegexpExamples.public_send(method, repeaters) end + .inject(:concat) + .map do |result| + GroupResult.new(result) + end + .uniq end + + def merge_if_orgroup(repeaters) + if repeaters.size == 1 && repeaters.first.is_a?(OrGroup) + repeaters.first.repeaters_list + else + [repeaters] + end + end end + # This is a bit magic... + # We substitute backreferences with PLACEHOLDERS. These are then, later, + # replaced by the appropriate value. (See BackReferenceReplacer) + # The simplest example is /(a) \1/ - So, we temporarily treat the "result" + # of /\1/ as being "__1__". It later gets updated. class BackReferenceGroup include RandomResultBySample attr_reader :id def initialize(id) @id = id