data/generate_constants.rb in unicode-emoji-3.6.0 vs data/generate_constants.rb in unicode-emoji-3.7.0
- old
+ new
@@ -95,14 +95,11 @@
emoji_valid_flag_sequence = \
pack_and_join(VALID_REGION_FLAGS)
emoji_well_formed_flag_sequence = \
- "(?:" +
- pack_and_join(REGIONAL_INDICATORS) +
- pack_and_join(REGIONAL_INDICATORS) +
- ")"
+ '\p{RI}{2}'
emoji_valid_core_sequence = \
join(
# emoji_character,
emoji_keycap_sequence,
@@ -126,22 +123,22 @@
emoji_valid_tag_sequence = \
"(?:" +
pack(EMOJI_TAG_BASE_FLAG) +
"(?:" + VALID_SUBDIVISIONS.sort_by(&:length).reverse.map{ |sd|
- Regexp.escape(sd.tr("\u{20}-\u{7E}", "\u{E0020}-\u{E007E}"))
+ sd.tr("\u{30}-\u{39}\u{61}-\u{7A}", "\u{E0030}-\u{E0039}\u{E0061}-\u{E007A}")
}.join("|") + ")" +
pack(CANCEL_TAG) +
")"
emoji_well_formed_tag_sequence = \
"(?:" +
join(
non_component_emoji_presentation_sequence,
emoji_modifier_sequence,
) +
- pack_and_join(TAGS) + "+" +
+ pack_and_join(SPEC_TAGS) + "{1,30}" +
pack(CANCEL_TAG) +
")"
# Sort to make sure complex sequences match first
emoji_rgi_zwj_sequence = \
@@ -178,19 +175,39 @@
emoji_valid_zwj_sequence,
emoji_well_formed_tag_sequence,
emoji_well_formed_core_sequence,
)
+ emoji_possible_modification = \
+ join(
+ emoji_modifier,
+ pack([EMOJI_VARIATION_SELECTOR, EMOJI_KEYCAP_SUFFIX]) + "?",
+ "[-]+" # raw tags
+ )
+
+ emoji_possible_zwj_element = \
+ join(
+ emoji_well_formed_flag_sequence,
+ emoji_character + emoji_possible_modification + "?"
+ )
+
+ emoji_possible = \
+ emoji_possible_zwj_element + "(?:" + pack(ZWJ) + emoji_possible_zwj_element + ")*"
+
regexes = {}
# Matches basic singleton emoji and all kind of sequences, but restrict zwj and tag sequences to known sequences (rgi)
regexes[:REGEX] = Regexp.compile(emoji_rgi_sequence)
# Matches basic singleton emoji and all kind of valid sequences
regexes[:REGEX_VALID] = Regexp.compile(emoji_valid_sequence)
# Matches basic singleton emoji and all kind of sequences
regexes[:REGEX_WELL_FORMED] = Regexp.compile(emoji_well_formed_sequence)
+
+ # Quick test which might lead to false positves
+ # See https://www.unicode.org/reports/tr51/#EBNF_and_Regex
+ regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible)
# Matches only basic single, non-textual emoji
# Ignores "components" like modifiers or simple digits
regexes[:REGEX_BASIC] = Regexp.compile(
"(?!" + emoji_component + ")" + emoji_presentation_sequence