lib/regexp-examples/groups.rb in regexp-examples-1.1.2 vs lib/regexp-examples/groups.rb in regexp-examples-1.1.3
- old
+ new
@@ -19,16 +19,22 @@
# Override to preserve subgroups
GroupResult.new(super.to_s, group_id, subgroups)
end
end
+ # A helper method for mixing in to Group classes...
+ # Needed because sometimes (for performace) group results are lazy enumerators;
+ # Meanwhile other times (again, for performance!) group results are just arrays
module ForceLazyEnumerators
def force_if_lazy(arr_or_enum)
arr_or_enum.respond_to?(:force) ? arr_or_enum.force : arr_or_enum
end
end
+ # A helper method for mixing in to Group classes...
+ # Needed for generating a complete results set when the ignorecase
+ # regexp option has been set
module GroupWithIgnoreCase
include ForceLazyEnumerators
attr_reader :ignorecase
def result
group_result = super
@@ -41,17 +47,22 @@
group_result
end
end
end
+ # A helper method for mixing in to Group classes...
+ # Uses Array#sample to randomly choose one result from all
+ # possible examples
module RandomResultBySample
include ForceLazyEnumerators
def random_result
force_if_lazy(result).sample(1)
end
end
+ # The most "basic" possible group.
+ # For example, /x/ contains one SingleCharGroup
class SingleCharGroup
include RandomResultBySample
prepend GroupWithIgnoreCase
def initialize(char, ignorecase)
@char = char
@@ -72,10 +83,15 @@
def result
[GroupResult.new('')]
end
end
+ # The most generic type of group, which contains 0 or more characters.
+ # Technically, this is the ONLY type of group that is truly necessary
+ # However, having others both improves performance through various optimisations,
+ # and clarifies the code's intention.
+ # The most common example of CharGroups is: /[abc]/
class CharGroup
include RandomResultBySample
prepend GroupWithIgnoreCase
def initialize(chars, ignorecase)
@chars = chars
@@ -87,10 +103,12 @@
GroupResult.new(result)
end
end
end
+ # A special case of CharGroup, for the pattern /./
+ # (For example, we never need to care about ignorecase here!)
class DotGroup
include RandomResultBySample
attr_reader :multiline
def initialize(multiline)
@multiline = multiline
@@ -102,10 +120,13 @@
GroupResult.new(result)
end
end
end
+ # A collection of other groups. Basically any regex that contains
+ # brackets will be parsed using one of these. The simplest example is:
+ # /(a)/ - Which is a MultiGroup, containing one SingleCharGroup
class MultiGroup
attr_reader :group_id
def initialize(groups, group_id)
@groups = groups
@group_id = group_id
@@ -129,39 +150,58 @@
GroupResult.new(result, group_id)
end
end
end
+ # A boolean "or" group.
+ # The implementation is to pass in 2 set of (repeaters of) groups.
+ # The simplest example is: /a|b/
+ # If you have more than one boolean "or" operator, then this is initially
+ # parsed as an OrGroup containing another OrGroup. However, in order to avoid
+ # probability distribution issues in Regexp#random_example, this then gets
+ # simplified down to one OrGroup containing 3+ repeaters.
class OrGroup
+ attr_reader :repeaters_list
+
def initialize(left_repeaters, right_repeaters)
- @left_repeaters = left_repeaters
- @right_repeaters = right_repeaters
+ @repeaters_list = [left_repeaters, *merge_if_orgroup(right_repeaters)]
end
def result
result_by_method(:map_results)
end
def random_result
- # TODO: This logic is flawed in terms of choosing a truly "random" example!
- # E.g. /a|b|c|d/.random_example will choose a letter with the following probabilities:
- # a = 50%, b = 25%, c = 12.5%, d = 12.5%
- # In order to fix this, I must either apply some weighted selection logic,
- # or change how the OrGroup examples are generated - i.e. make this class work with >2 repeaters
result_by_method(:map_random_result).sample(1)
end
private
def result_by_method(method)
- left_result = RegexpExamples.public_send(method, @left_repeaters)
- right_result = RegexpExamples.public_send(method, @right_repeaters)
- left_result.concat(right_result).flatten.uniq.map do |result|
- GroupResult.new(result)
+ repeaters_list.map do |repeaters|
+ RegexpExamples.public_send(method, repeaters)
end
+ .inject(:concat)
+ .map do |result|
+ GroupResult.new(result)
+ end
+ .uniq
end
+
+ def merge_if_orgroup(repeaters)
+ if repeaters.size == 1 && repeaters.first.is_a?(OrGroup)
+ repeaters.first.repeaters_list
+ else
+ [repeaters]
+ end
+ end
end
+ # This is a bit magic...
+ # We substitute backreferences with PLACEHOLDERS. These are then, later,
+ # replaced by the appropriate value. (See BackReferenceReplacer)
+ # The simplest example is /(a) \1/ - So, we temporarily treat the "result"
+ # of /\1/ as being "__1__". It later gets updated.
class BackReferenceGroup
include RandomResultBySample
attr_reader :id
def initialize(id)
@id = id