data/generate_constants.rb in unicode-emoji-3.8.0 vs data/generate_constants.rb in unicode-emoji-4.0.0

- old
+ new

@@ -67,19 +67,27 @@ character_class(rangify(ords)) end end def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_component:, emoji_presentation:, text_presentation:, picto:, picto_no_emoji:) + visual_component = pack_and_join(VISUAL_COMPONENT) + emoji_presentation_sequence = \ join( text_presentation + pack(EMOJI_VARIATION_SELECTOR), emoji_presentation + "(?!" + pack(TEXT_VARIATION_SELECTOR) + ")" + pack(EMOJI_VARIATION_SELECTOR) + "?", ) non_component_emoji_presentation_sequence = \ "(?!" + emoji_component + ")" + emoji_presentation_sequence + basic_emoji = \ + join( + non_component_emoji_presentation_sequence, + visual_component, + ) + text_keycap_sequence = \ pack_and_join(EMOJI_KEYCAPS) + pack(EMOJI_KEYCAP_SUFFIX) text_presentation_sequence = \ join( @@ -167,69 +175,77 @@ join( emoji_rgi_zwj_sequence, emoji_rgi_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, ) emoji_rgi_sequence_include_text = \ join( emoji_rgi_zwj_sequence, emoji_rgi_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, text_emoji, ) emoji_rgi_include_mqe_sequence = \ join( emoji_rgi_include_mqe_zwj_sequence, emoji_rgi_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, ) emoji_rgi_include_mqe_uqe_sequence = \ join( emoji_rgi_include_mqe_uqe_zwj_sequence, text_emoji, # also uqe emoji_rgi_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, ) emoji_valid_sequence = \ join( emoji_valid_zwj_sequence, emoji_valid_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, ) emoji_valid_sequence_include_text = \ join( emoji_valid_zwj_sequence, emoji_valid_tag_sequence, emoji_valid_flag_sequence, emoji_core_sequence, + visual_component, text_emoji, ) emoji_well_formed_sequence = \ join( emoji_valid_zwj_sequence, emoji_well_formed_tag_sequence, emoji_well_formed_flag_sequence, emoji_core_sequence, + visual_component, ) emoji_well_formed_sequence_include_text = \ join( emoji_valid_zwj_sequence, emoji_well_formed_tag_sequence, emoji_well_formed_flag_sequence, emoji_core_sequence, + visual_component, text_emoji, ) emoji_possible_modification = \ join( @@ -277,23 +293,30 @@ # Quick test which might lead to false positves # See https://www.unicode.org/reports/tr51/#EBNF_and_Regex regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible) - # Matches only basic single, non-textual emoji, ignores "components" like modifiers or simple digits - regexes[:REGEX_BASIC] = Regexp.compile(non_component_emoji_presentation_sequence) + # Matches only basic single, non-textual emoji, ignores some components like simple digits + regexes[:REGEX_BASIC] = Regexp.compile(basic_emoji) - # Matches only basic single, textual emoji, ignores "components" like modifiers or simple digits + # Matches only basic single, textual emoji, ignores components like modifiers or simple digits regexes[:REGEX_TEXT] = Regexp.compile(text_emoji) - # Same as \p{Emoji} - to be removed or renamed - regexes[:REGEX_ANY] = Regexp.compile(emoji_character) + # Export regexes for Emoji properties so they can be used with newer Unicode than Ruby's + regexes[:REGEX_PROP_EMOJI] = Regexp.compile(emoji_character) + regexes[:REGEX_PROP_MODIFIER] = Regexp.compile(emoji_modifier) + regexes[:REGEX_PROP_MODIFIER_BASE] = Regexp.compile(emoji_modifier_base) + regexes[:REGEX_PROP_COMPONENT] = Regexp.compile(emoji_component) + regexes[:REGEX_PROP_PRESENTATION] = Regexp.compile(emoji_presentation) + # Same goes for ExtendedPictographic regexes[:REGEX_PICTO] = Regexp.compile(picto) - regexes[:REGEX_PICTO_NO_EMOJI] = Regexp.compile(picto_no_emoji) + # Emoji keycaps + regexes[:REGEX_EMOJI_KEYCAP] = Regexp.compile(emoji_keycap_sequence) + regexes end regexes = compile( emoji_character: pack_and_join(EMOJI_CHAR), @@ -311,10 +334,10 @@ emoji_character: "\\p{Emoji}", emoji_modifier: "\\p{EMod}", emoji_modifier_base: "\\p{EBase}", emoji_component: "\\p{EComp}", emoji_presentation: "\\p{EPres}", - text_presentation: "\\p{Emoji}(?<!\\p{EPres})", + text_presentation: "[\\p{Emoji}&&\\P{EPres}]", picto: "\\p{ExtPict}", - picto_no_emoji: "\\p{ExtPict}(?<!\\p{Emoji})" + picto_no_emoji: "[\\p{ExtPict}&&\\P{Emoji}]" ) write_regexes(native_regexes, File.expand_path("../lib/unicode/emoji/generated_native", __dir__))