lib/unicode/emoji.rb in unicode-emoji-2.9.0 vs lib/unicode/emoji.rb in unicode-emoji-3.0.0
- old
+ new
@@ -1,215 +1,33 @@
# frozen_string_literal: true
require "unicode/version"
require_relative "emoji/constants"
-require_relative "emoji/index"
module Unicode
module Emoji
- PROPERTY_NAMES = {
- E: "Emoji",
- B: "Emoji_Modifier_Base",
- M: "Emoji_Modifier",
- C: "Emoji_Component",
- P: "Emoji_Presentation",
- X: "Extended_Pictographic",
- }
+ autoload :INDEX, File.expand_path('emoji/index', __dir__)
- EMOJI_VARIATION_SELECTOR = 0xFE0F
- TEXT_VARIATION_SELECTOR = 0xFE0E
- EMOJI_TAG_BASE_FLAG = 0x1F3F4
- CANCEL_TAG = 0xE007F
- TAGS = [*0xE0020..0xE007E]
- EMOJI_KEYCAP_SUFFIX = 0x20E3
- ZWJ = 0x200D
- REGIONAL_INDICATORS = [*0x1F1E6..0x1F1FF]
-
- EMOJI_CHAR = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:E) }.keys.freeze
- EMOJI_PRESENTATION = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:P) }.keys.freeze
- TEXT_PRESENTATION = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:E) && !props.include?(:P) }.keys.freeze
- EMOJI_COMPONENT = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:C) }.keys.freeze
- EMOJI_MODIFIER_BASES = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:B) }.keys.freeze
- EMOJI_MODIFIERS = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:M) }.keys.freeze
-
- EXTENDED_PICTOGRAPHIC = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:X) }.keys.freeze
- EXTENDED_PICTOGRAPHIC_NO_EMOJI= INDEX[:PROPERTIES].select{ |ord, props| props.include?(:X) && !props.include?(:E) }.keys.freeze
- EMOJI_KEYCAPS = INDEX[:KEYCAPS].freeze
- VALID_REGION_FLAGS = INDEX[:FLAGS].freeze
- VALID_SUBDIVISIONS = INDEX[:SD].freeze
- RECOMMENDED_SUBDIVISION_FLAGS = INDEX[:TAGS].freeze
- RECOMMENDED_ZWJ_SEQUENCES = INDEX[:ZWJ].freeze
-
- LIST = INDEX[:LIST].freeze.each_value(&:freeze)
- LIST_REMOVED_KEYS = [
- "Smileys & People",
- ]
-
- pack = ->(ord){ Regexp.escape(Array(ord).pack("U*")) }
- join = -> (*strings){ "(?:" + strings.join("|") + ")" }
- pack_and_join = ->(ords){ join[*ords.map{ |ord| pack[ord] }] }
-
- if EMOJI_VERSION == Unicode::Version.emoji_version
- emoji_character = "\\p{Emoji}"
- emoji_modifier = "\\p{Emoji Modifier}"
- emoji_modifier_base = "\\p{Emoji Modifier Base}"
- emoji_component = "\\p{Emoji Component}"
- emoji_presentation = "\\p{Emoji Presentation}"
- picto = "\\p{Extended Pictographic}"
- picto_no_emoji = "\\p{Extended Pictographic}(?<!\\p{Emoji})"
- else
- emoji_character = pack_and_join[EMOJI_CHAR]
- emoji_modifier = pack_and_join[EMOJI_MODIFIERS]
- emoji_modifier_base = pack_and_join[EMOJI_MODIFIER_BASES]
- emoji_component = pack_and_join[EMOJI_COMPONENT]
- emoji_presentation = pack_and_join[EMOJI_PRESENTATION]
- picto = pack_and_join[EXTENDED_PICTOGRAPHIC]
- picto_no_emoji = pack_and_join[EXTENDED_PICTOGRAPHIC_NO_EMOJI]
+ %w[
+ EMOJI_CHAR EMOJI_CHAR EMOJI_PRESENTATION TEXT_PRESENTATION EMOJI_COMPONENT EMOJI_MODIFIER_BASES
+ EMOJI_MODIFIERS EXTENDED_PICTOGRAPHIC EXTENDED_PICTOGRAPHIC_NO_EMOJI EMOJI_KEYCAPS VALID_REGION_FLAGS
+ VALID_SUBDIVISIONS RECOMMENDED_SUBDIVISION_FLAGS RECOMMENDED_ZWJ_SEQUENCES LIST LIST_REMOVED_KEYS
+ ].each do |const_name|
+ autoload const_name, File.expand_path('emoji/lazy_constants', __dir__)
end
- emoji_presentation_sequence = \
- join[
- pack_and_join[TEXT_PRESENTATION] + pack[EMOJI_VARIATION_SELECTOR],
- emoji_presentation + "(?!" + pack[TEXT_VARIATION_SELECTOR] + ")" + pack[EMOJI_VARIATION_SELECTOR] + "?",
- ]
-
- non_component_emoji_presentation_sequence = \
- "(?!" + emoji_component + ")" + emoji_presentation_sequence
-
- text_presentation_sequence = \
- join[
- pack_and_join[TEXT_PRESENTATION]+ "(?!" + join[emoji_modifier, pack[EMOJI_VARIATION_SELECTOR]] + ")" + pack[TEXT_VARIATION_SELECTOR] + "?",
- emoji_presentation + pack[TEXT_VARIATION_SELECTOR]
- ]
-
- emoji_modifier_sequence = \
- emoji_modifier_base + emoji_modifier
-
- emoji_keycap_sequence = \
- pack_and_join[EMOJI_KEYCAPS] + pack[[EMOJI_VARIATION_SELECTOR, EMOJI_KEYCAP_SUFFIX]]
-
- emoji_valid_flag_sequence = \
- pack_and_join[VALID_REGION_FLAGS]
-
- emoji_well_formed_flag_sequence = \
- "(?:" +
- pack_and_join[REGIONAL_INDICATORS] +
- pack_and_join[REGIONAL_INDICATORS] +
- ")"
-
- emoji_valid_core_sequence = \
- join[
- # emoji_character,
- emoji_keycap_sequence,
- emoji_modifier_sequence,
- non_component_emoji_presentation_sequence,
- emoji_valid_flag_sequence,
- ]
-
- emoji_well_formed_core_sequence = \
- join[
- # emoji_character,
- emoji_keycap_sequence,
- emoji_modifier_sequence,
- non_component_emoji_presentation_sequence,
- emoji_well_formed_flag_sequence,
- ]
-
- emoji_rgi_tag_sequence = \
- pack_and_join[RECOMMENDED_SUBDIVISION_FLAGS]
-
- emoji_valid_tag_sequence = \
- "(?:" +
- pack[EMOJI_TAG_BASE_FLAG] +
- "(?:" + VALID_SUBDIVISIONS.map{ |sd| Regexp.escape(sd.tr("\u{20}-\u{7E}", "\u{E0020}-\u{E007E}"))}.join("|") + ")" +
- pack[CANCEL_TAG] +
- ")"
-
- emoji_well_formed_tag_sequence = \
- "(?:" +
- join[
- non_component_emoji_presentation_sequence,
- emoji_modifier_sequence,
- ] +
- pack_and_join[TAGS] + "+" +
- pack[CANCEL_TAG] +
- ")"
-
- emoji_rgi_zwj_sequence = \
- pack_and_join[RECOMMENDED_ZWJ_SEQUENCES]
-
- emoji_valid_zwj_element = \
- join[
- emoji_modifier_sequence,
- emoji_presentation_sequence,
- emoji_character,
- ]
-
- emoji_valid_zwj_sequence = \
- "(?:" +
- "(?:" + emoji_valid_zwj_element + pack[ZWJ] + ")+" + emoji_valid_zwj_element +
- ")"
-
- emoji_rgi_sequence = \
- join[
- emoji_rgi_zwj_sequence,
- emoji_rgi_tag_sequence,
- emoji_valid_core_sequence,
- ]
-
- emoji_valid_sequence = \
- join[
- emoji_valid_zwj_sequence,
- emoji_valid_tag_sequence,
- emoji_valid_core_sequence,
- ]
-
- emoji_well_formed_sequence = \
- join[
- emoji_valid_zwj_sequence,
- emoji_well_formed_tag_sequence,
- emoji_well_formed_core_sequence,
- ]
-
- # Matches basic singleton emoji and all kind of sequences, but restrict zwj and tag sequences to known sequences (rgi)
- REGEX = Regexp.compile(emoji_rgi_sequence)
-
- # Matches basic singleton emoji and all kind of valid sequences
- REGEX_VALID = Regexp.compile(emoji_valid_sequence)
-
- # Matches basic singleton emoji and all kind of sequences
- REGEX_WELL_FORMED = Regexp.compile(emoji_well_formed_sequence)
-
- # Matches only basic single, non-textual emoji
- # Ignores "components" like modifiers or simple digits
- REGEX_BASIC = Regexp.compile(
- "(?!" + emoji_component + ")" + emoji_presentation_sequence
+ generated_constants_dirpath = File.expand_path(
+ EMOJI_VERSION == Unicode::Version.emoji_version ? "emoji/generated_native/" : "emoji/generated/",
+ __dir__
)
- # Matches only basic single, textual emoji
- # Ignores "components" like modifiers or simple digits
- REGEX_TEXT = Regexp.compile(
- "(?!" + emoji_component + ")" + text_presentation_sequence
- )
-
- # Matches any emoji-related codepoint - Use with caution (returns partil matches)
- REGEX_ANY = Regexp.compile(
- emoji_character
- )
-
- # Combined REGEXes which also match for TEXTUAL emoji
- REGEX_INCLUDE_TEXT = Regexp.union(REGEX, REGEX_TEXT)
- REGEX_VALID_INCLUDE_TEXT = Regexp.union(REGEX_VALID, REGEX_TEXT)
- REGEX_WELL_FORMED_INCLUDE_TEXT = Regexp.union(REGEX_WELL_FORMED, REGEX_TEXT)
-
- REGEX_PICTO = Regexp.compile(
- picto
- )
-
- REGEX_PICTO_NO_EMOJI = Regexp.compile(
- picto_no_emoji
- )
+ %w[
+ REGEX REGEX_VALID REGEX_WELL_FORMED REGEX_BASIC REGEX_TEXT REGEX_ANY REGEX_INCLUDE_TEXT
+ REGEX_VALID_INCLUDE_TEXT REGEX_WELL_FORMED_INCLUDE_TEXT REGEX_PICTO REGEX_PICTO_NO_EMOJI
+ ].each do |const_name|
+ autoload const_name, File.join(generated_constants_dirpath, const_name.downcase)
+ end
def self.properties(char)
ord = get_codepoint_value(char)
props = INDEX[:PROPERTIES][ord]