-
9
Dir[File.dirname(__FILE__) + '/regexp-examples/*.rb'].each {|file| require file }
-
-
1
module RegexpExamples
-
1
class CaptureGroupResult < String
-
1
attr_reader :group_id, :subgroups
-
1
def initialize(group_id, subgroups, values)
-
248
@group_id = group_id
-
248
@subgroups = subgroups
-
248
super(values)
-
end
-
-
1
def all_subgroups
-
168
[self, subgroups].flatten
-
end
-
-
# Overridden in order to preserve the @group_id and @subgroups
-
1
def *(int)
-
79
self.class.new(group_id, subgroups, super)
-
end
-
# Overridden in order to preserve the @group_id and @subgroups
-
1
def gsub(regex)
-
28
self.class.new(group_id, subgroups, super)
-
end
-
end
-
-
1
class BackReferenceReplacer
-
1
def substitute_backreferences(full_examples)
-
54
full_examples.map! do |full_example|
-
148
if full_example.is_a? String
-
130
[full_example]
-
else
-
18
full_example.map! do |partial_example|
-
112
partial_example.gsub(/__(\w+)__/) do |match|
-
28
find_backref_for(full_example, $1)
-
end
-
end
-
end
-
end
-
54
full_examples
-
end
-
-
1
private
-
1
def find_backref_for(full_example, group_id)
-
28
full_example.each do |partial_example|
-
75
next unless partial_example.respond_to?(:group_id)
-
74
partial_example.all_subgroups.each do |subgroup|
-
77
return subgroup if subgroup.group_id == group_id
-
end
-
end
-
end
-
-
end
-
-
end
-
1
module RegexpExamples
-
# Number of times to repeat for Star and Plus repeaters
-
1
TIMES = 2
-
-
# Maximum number of characters returned from a char set, to reduce output spam
-
# For example:
-
# If MaxGroupResults = 5, then
-
# \d = [0, 1, 2, 3, 4]
-
1
MaxGroupResults = 5
-
-
1
module CharSets
-
1
Lower = Array('a'..'z')
-
1
Upper = Array('A'..'Z')
-
1
Digit = Array('0'..'9')
-
# 45.chr = "-". Need to make sure this is at the START of the array, or things break
-
# This is because of the /[a-z]/ regex syntax, and how it's being parsed
-
39
Punct = [45..45, 33..44, 46..47, 58..64, 91..96, 123..126].map { |r| r.map { |val| val.chr } }.flatten
-
1
Hex = Array('a'..'f') | Array('A'..'F') | Digit
-
1
Any = Lower | Upper | Digit | Punct
-
end
-
-
# Map of special regex characters, to their associated character sets
-
1
BackslashCharMap = {
-
'd' => CharSets::Digit,
-
'D' => CharSets::Lower | CharSets::Upper | CharSets::Punct,
-
'w' => CharSets::Lower | CharSets::Upper | CharSets::Digit | ['_'],
-
32
'W' => CharSets::Punct.reject { |val| val == '_' },
-
's' => [' ', "\t", "\n", "\r", "\v", "\f"],
-
'S' => CharSets::Any - [' ', "\t", "\n", "\r", "\v", "\f"],
-
'h' => CharSets::Hex,
-
'H' => CharSets::Any - CharSets::Hex,
-
-
't' => ["\t"], # tab
-
'n' => ["\n"], # new line
-
'r' => ["\r"], # carriage return
-
'f' => ["\f"], # form feed
-
'a' => ["\a"], # alarm
-
'v' => ["\v"], # vertical tab
-
'e' => ["\e"], # escape
-
}
-
end
-
-
1
module RegexpExamples
-
# Given an array of arrays of strings,
-
# returns all possible perutations,
-
# for strings created by joining one
-
# element from each array
-
#
-
# For example:
-
# permutations_of_strings [ ['a'], ['b'], ['c', 'd', 'e'] ] #=> ['acb', 'abd', 'abe']
-
# permutations_of_strings [ ['a', 'b'], ['c', 'd'] ] #=> [ 'ac', 'ad', 'bc', 'bd' ]
-
1
def self.permutations_of_strings(arrays_of_strings, options={})
-
271
first = arrays_of_strings.shift
-
271
return first if arrays_of_strings.empty?
-
164
first.product( permutations_of_strings(arrays_of_strings, options) ).map do |result|
-
237
if options[:no_join]
-
73
result.flatten
-
else
-
164
join_preserving_capture_groups(result)
-
end
-
end
-
end
-
-
1
def self.join_preserving_capture_groups(result)
-
164
result.flatten!
-
164
subgroups = result
-
328
.select { |partial| partial.respond_to? :group_id }
-
.map(&:all_subgroups)
-
.flatten
-
-
164
if subgroups.empty?
-
123
result.join
-
else
-
41
CaptureGroupResult.new(nil, subgroups, result.join)
-
end
-
end
-
end
-
-
1
module RegexpExamples
-
1
class Parser
-
1
attr_reader :regexp_string
-
1
def initialize(regexp_string)
-
54
@regexp_string = regexp_string
-
54
@num_groups = 0
-
54
@current_position = 0
-
end
-
-
1
def parse
-
101
repeaters = []
-
101
while @current_position < regexp_string.length
-
312
group = parse_group(repeaters)
-
312
break if group.is_a? MultiGroupEnd
-
271
repeaters = [] if group.is_a? OrGroup
-
271
@current_position += 1
-
271
repeaters << parse_repeater(group)
-
end
-
101
repeaters
-
end
-
-
1
private
-
-
1
def parse_group(repeaters)
-
312
char = regexp_string[@current_position]
-
312
case char
-
when '('
-
41
group = parse_multi_group
-
when ')'
-
41
group = parse_multi_end_group
-
when '['
-
14
group = parse_char_group
-
when '.'
-
1
group = parse_dot_group
-
when '|'
-
6
group = parse_or_group(repeaters)
-
when '\\'
-
36
group = parse_after_backslash_group
-
else
-
173
group = parse_single_char_group(char)
-
end
-
312
group
-
end
-
-
1
def parse_after_backslash_group
-
36
@current_position += 1
-
case
-
when rest_of_string =~ /\A(\d+)/
-
19
@current_position += ($1.length - 1) # In case of 10+ backrefs!
-
19
group = parse_backreference_group($1)
-
when rest_of_string =~ /\Ak<([^>]+)>/ # Named capture group
-
1
@current_position += ($1.length + 2)
-
1
group = parse_backreference_group($1)
-
when BackslashCharMap.keys.include?(regexp_string[@current_position])
-
14
group = CharGroup.new(
-
BackslashCharMap[regexp_string[@current_position]])
-
# TODO: There are also a bunch of multi-char matches to watch out for:
-
# http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals
-
else
-
2
group = parse_single_char_group( regexp_string[@current_position] )
-
# TODO: What about cases like \A, \z, \Z ?
-
36
end
-
36
group
-
end
-
-
1
def parse_repeater(group)
-
271
char = regexp_string[@current_position]
-
271
case char
-
when '*'
-
5
repeater = parse_star_repeater(group)
-
when '+'
-
4
repeater = parse_plus_repeater(group)
-
when '?'
-
11
repeater = parse_question_mark_repeater(group)
-
when '{'
-
4
repeater = parse_range_repeater(group)
-
else
-
247
repeater = parse_one_time_repeater(group)
-
end
-
271
repeater
-
end
-
-
1
def parse_multi_group
-
41
@current_position += 1
-
41
@num_groups += 1
-
41
group_id = nil # init
-
41
rest_of_string.match(/\A(\?)?(:|!|=|<(!|=|[^!=][^>]*))?/) do |match|
-
case
-
when match[1].nil? # e.g. /(normal)/
-
38
group_id = @num_groups.to_s
-
when match[2] == ':' # e.g. /(?:nocapture)/
-
1
@current_position += 2
-
1
group_id = nil
-
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
-
# TODO: Raise exception
-
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
-
# TODO: Raise exception
-
else # e.g. /(?<name>namedgroup)/
-
2
@current_position += (match[3].length + 3)
-
2
group_id = match[3]
-
41
end
-
end
-
41
groups = parse
-
41
MultiGroup.new(groups, group_id)
-
end
-
-
1
def parse_multi_end_group
-
41
MultiGroupEnd.new
-
end
-
-
1
def parse_char_group
-
14
chars = []
-
14
@current_position += 1
-
14
if regexp_string[@current_position] == ']'
-
# Beware of the sneaky edge case:
-
# /[]]/ (match "]")
-
1
chars << ']'
-
1
@current_position += 1
-
end
-
until regexp_string[@current_position] == ']' \
-
14
&& !regexp_string[0..@current_position-1].match(/[^\\](\\{2})*\\\z/)
-
# Beware of having an ODD number of "\" before the "]", e.g.
-
# /[\]]/ (match "]")
-
# /[\\]/ (match "\")
-
# /[\\\]]/ (match "\" or "]")
-
41
chars << regexp_string[@current_position]
-
41
@current_position += 1
-
end
-
14
CharGroup.new(chars)
-
end
-
-
1
def parse_dot_group
-
1
DotGroup.new
-
end
-
-
1
def parse_or_group(left_repeaters)
-
6
@current_position += 1
-
6
right_repeaters = parse
-
6
OrGroup.new(left_repeaters, right_repeaters)
-
end
-
-
-
1
def parse_single_char_group(char)
-
175
SingleCharGroup.new(char)
-
end
-
-
1
def parse_backreference_group(match)
-
20
BackReferenceGroup.new(match)
-
end
-
-
1
def parse_star_repeater(group)
-
5
@current_position += 1
-
5
StarRepeater.new(group)
-
end
-
-
1
def parse_plus_repeater(group)
-
4
@current_position += 1
-
4
PlusRepeater.new(group)
-
end
-
-
1
def parse_question_mark_repeater(group)
-
11
@current_position += 1
-
11
QuestionMarkRepeater.new(group)
-
end
-
-
1
def parse_range_repeater(group)
-
4
match = rest_of_string.match(/\A\{(\d+)(,)?(\d+)?\}/)
-
4
@current_position += match[0].size
-
4
min = match[1].to_i if match[1]
-
4
has_comma = !match[2].nil?
-
4
max = match[3].to_i if match[3]
-
4
RangeRepeater.new(group, min, has_comma, max)
-
end
-
-
1
def parse_one_time_repeater(group)
-
247
OneTimeRepeater.new(group)
-
end
-
-
1
def rest_of_string
-
98
regexp_string[@current_position..-1]
-
end
-
end
-
end
-
-
1
class Regexp
-
1
module Examples
-
1
def examples
-
54
partial_examples =
-
RegexpExamples::Parser.new(source)
-
.parse
-
114
.map {|repeater| repeater.result}
-
54
full_examples = RegexpExamples::permutations_of_strings(partial_examples.dup, no_join: true)
-
54
full_examples_with_backrefs = \
-
RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples)
-
54
full_examples_with_backrefs.map(&:join)
-
end
-
end
-
1
include Examples
-
end
-
-
1
module RegexpExamples
-
1
class BaseRepeater
-
1
attr_reader :group
-
1
def initialize(group)
-
271
@group = group
-
end
-
-
1
def result(min_repeats, max_repeats)
-
271
group_results = @group.result[0 .. MaxGroupResults-1]
-
271
results = []
-
271
min_repeats.upto(max_repeats) do |repeats|
-
299
group_results.each do |group_result|
-
413
results << group_result * repeats
-
end
-
end
-
271
results.uniq
-
end
-
end
-
-
1
class OneTimeRepeater < BaseRepeater
-
1
def initialize(group)
-
247
super
-
end
-
-
1
def result
-
247
super(1, 1)
-
end
-
end
-
-
1
class StarRepeater < BaseRepeater
-
1
def initialize(group)
-
5
super
-
end
-
-
1
def result
-
5
super(0, TIMES)
-
end
-
end
-
-
1
class PlusRepeater < BaseRepeater
-
1
def initialize(group)
-
4
super
-
end
-
-
1
def result
-
4
super(1, TIMES)
-
end
-
end
-
-
1
class QuestionMarkRepeater < BaseRepeater
-
1
def initialize(group)
-
11
super
-
end
-
-
1
def result
-
11
super(0, 1)
-
end
-
end
-
-
1
class RangeRepeater < BaseRepeater
-
1
def initialize(group, min, has_comma, max)
-
4
super(group)
-
4
@min = min
-
4
if max
-
1
@max = max
-
3
elsif has_comma
-
1
@max = min + TIMES
-
else
-
2
@max = min
-
end
-
end
-
-
1
def result
-
4
super(@min, @max)
-
end
-
end
-
end
-
-
1
RSpec.describe Regexp, "#examples" do
-
1
def self.examples_exist_and_match(*regexps)
-
7
regexps.each do |regexp|
-
54
it do
-
54
regexp_examples = regexp.examples
-
54
expect(regexp_examples).not_to be_empty
-
202
regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
-
# Note: /\A...\z/ is used, to prevent misleading examples from passing the test.
-
# For example, we don't want things like:
-
# /a*/.examples to include "xyz"
-
# /a|b/.examples to include "bad"
-
end
-
end
-
end
-
-
1
context 'returns matching strings' do
-
1
context "for basic repeaters" do
-
1
examples_exist_and_match(
-
/a/,
-
/a*/,
-
/a+/,
-
/a?/,
-
/a{1}/,
-
/a{1,}/,
-
/a{1,2}/
-
)
-
end
-
-
1
context "for basic groups" do
-
1
examples_exist_and_match(
-
/[a]/,
-
/(a)/,
-
/a|b/,
-
/./
-
)
-
end
-
-
1
context "for complex char groups (square brackets)" do
-
1
examples_exist_and_match(
-
/[abc]/,
-
/[a-c]/,
-
/[abc-e]/,
-
/[^a-zA-Z]/,
-
/[\w]/,
-
/[]]/, # TODO: How to suppress annoying warnings on this test?
-
/[\]]/,
-
/[\\]/,
-
/[\\\]]/,
-
/[\n-\r]/,
-
/[\-]/,
-
/[%-+]/ # This regex is "supposed to" match some surprising things!!!
-
)
-
end
-
-
1
context "for complex multi groups" do
-
1
examples_exist_and_match(
-
/(normal)/,
-
/(?:nocapture)/,
-
/(?<name>namedgroup)/,
-
/(?<name>namedgroup) \k<name>/
-
)
-
# TODO: These are not yet implemented
-
# (expect to raise exception)
-
# /(?=lookahead)/,
-
# /(?!neglookahead)/,
-
# /(?<=lookbehind)/,
-
# /(?<!neglookbehind)/,
-
end
-
-
1
context "for escaped characters" do
-
1
examples_exist_and_match(
-
/\w/,
-
/\W/,
-
/\s/,
-
/\S/,
-
/\d/,
-
/\D/,
-
/\h/,
-
/\H/,
-
/\t/,
-
/\n/,
-
/\f/,
-
/\a/,
-
/\v/,
-
/\e/
-
)
-
end
-
-
1
context "for backreferences" do
-
1
examples_exist_and_match(
-
/(repeat) \1/,
-
/(ref1) (ref2) \1 \2/,
-
/((ref2)ref1) \1 \2/,
-
/((ref1and2)) \1 \2/,
-
/(one)(two)(three)(four)(five)(six)(seven)(eight)(nine)(ten) \10\9\8\7\6\5\4\3\2\1/,
-
/(a?(b?(c?(d?(e?)))))/
-
)
-
end
-
-
1
context "for complex patterns" do
-
# Longer combinations of the above
-
1
examples_exist_and_match(
-
/https?:\/\/(www\.)github\.com/,
-
/(I(N(C(E(P(T(I(O(N)))))))))*/,
-
/[\w]{1}/,
-
/((a?b*c+)) \1/,
-
/((a?b*c+)?) \1/,
-
/a|b|c|d/,
-
/a+|b*|c?/
-
)
-
end
-
end
-
end