data/generate_constants.rb in unicode-emoji-3.8.0 vs data/generate_constants.rb in unicode-emoji-4.0.0
- old
+ new
@@ -67,19 +67,27 @@
character_class(rangify(ords))
end
end
def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_component:, emoji_presentation:, text_presentation:, picto:, picto_no_emoji:)
+ visual_component = pack_and_join(VISUAL_COMPONENT)
+
emoji_presentation_sequence = \
join(
text_presentation + pack(EMOJI_VARIATION_SELECTOR),
emoji_presentation + "(?!" + pack(TEXT_VARIATION_SELECTOR) + ")" + pack(EMOJI_VARIATION_SELECTOR) + "?",
)
non_component_emoji_presentation_sequence = \
"(?!" + emoji_component + ")" + emoji_presentation_sequence
+ basic_emoji = \
+ join(
+ non_component_emoji_presentation_sequence,
+ visual_component,
+ )
+
text_keycap_sequence = \
pack_and_join(EMOJI_KEYCAPS) + pack(EMOJI_KEYCAP_SUFFIX)
text_presentation_sequence = \
join(
@@ -167,69 +175,77 @@
join(
emoji_rgi_zwj_sequence,
emoji_rgi_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
)
emoji_rgi_sequence_include_text = \
join(
emoji_rgi_zwj_sequence,
emoji_rgi_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
text_emoji,
)
emoji_rgi_include_mqe_sequence = \
join(
emoji_rgi_include_mqe_zwj_sequence,
emoji_rgi_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
)
emoji_rgi_include_mqe_uqe_sequence = \
join(
emoji_rgi_include_mqe_uqe_zwj_sequence,
text_emoji, # also uqe
emoji_rgi_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
)
emoji_valid_sequence = \
join(
emoji_valid_zwj_sequence,
emoji_valid_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
)
emoji_valid_sequence_include_text = \
join(
emoji_valid_zwj_sequence,
emoji_valid_tag_sequence,
emoji_valid_flag_sequence,
emoji_core_sequence,
+ visual_component,
text_emoji,
)
emoji_well_formed_sequence = \
join(
emoji_valid_zwj_sequence,
emoji_well_formed_tag_sequence,
emoji_well_formed_flag_sequence,
emoji_core_sequence,
+ visual_component,
)
emoji_well_formed_sequence_include_text = \
join(
emoji_valid_zwj_sequence,
emoji_well_formed_tag_sequence,
emoji_well_formed_flag_sequence,
emoji_core_sequence,
+ visual_component,
text_emoji,
)
emoji_possible_modification = \
join(
@@ -277,23 +293,30 @@
# Quick test which might lead to false positves
# See https://www.unicode.org/reports/tr51/#EBNF_and_Regex
regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible)
- # Matches only basic single, non-textual emoji, ignores "components" like modifiers or simple digits
- regexes[:REGEX_BASIC] = Regexp.compile(non_component_emoji_presentation_sequence)
+ # Matches only basic single, non-textual emoji, ignores some components like simple digits
+ regexes[:REGEX_BASIC] = Regexp.compile(basic_emoji)
- # Matches only basic single, textual emoji, ignores "components" like modifiers or simple digits
+ # Matches only basic single, textual emoji, ignores components like modifiers or simple digits
regexes[:REGEX_TEXT] = Regexp.compile(text_emoji)
- # Same as \p{Emoji} - to be removed or renamed
- regexes[:REGEX_ANY] = Regexp.compile(emoji_character)
+ # Export regexes for Emoji properties so they can be used with newer Unicode than Ruby's
+ regexes[:REGEX_PROP_EMOJI] = Regexp.compile(emoji_character)
+ regexes[:REGEX_PROP_MODIFIER] = Regexp.compile(emoji_modifier)
+ regexes[:REGEX_PROP_MODIFIER_BASE] = Regexp.compile(emoji_modifier_base)
+ regexes[:REGEX_PROP_COMPONENT] = Regexp.compile(emoji_component)
+ regexes[:REGEX_PROP_PRESENTATION] = Regexp.compile(emoji_presentation)
+ # Same goes for ExtendedPictographic
regexes[:REGEX_PICTO] = Regexp.compile(picto)
-
regexes[:REGEX_PICTO_NO_EMOJI] = Regexp.compile(picto_no_emoji)
+ # Emoji keycaps
+ regexes[:REGEX_EMOJI_KEYCAP] = Regexp.compile(emoji_keycap_sequence)
+
regexes
end
regexes = compile(
emoji_character: pack_and_join(EMOJI_CHAR),
@@ -311,10 +334,10 @@
emoji_character: "\\p{Emoji}",
emoji_modifier: "\\p{EMod}",
emoji_modifier_base: "\\p{EBase}",
emoji_component: "\\p{EComp}",
emoji_presentation: "\\p{EPres}",
- text_presentation: "\\p{Emoji}(?<!\\p{EPres})",
+ text_presentation: "[\\p{Emoji}&&\\P{EPres}]",
picto: "\\p{ExtPict}",
- picto_no_emoji: "\\p{ExtPict}(?<!\\p{Emoji})"
+ picto_no_emoji: "[\\p{ExtPict}&&\\P{Emoji}]"
)
write_regexes(native_regexes, File.expand_path("../lib/unicode/emoji/generated_native", __dir__))