lib/emoji_data.rb in emoji_data-0.1.0 vs lib/emoji_data.rb in emoji_data-0.2.0.rc1

- old
+ new

@@ -1,85 +1,195 @@ require 'emoji_data/version' require 'emoji_data/emoji_char' require 'json' module EmojiData + + # specify some location paths GEM_ROOT = File.join(File.dirname(__FILE__), '..') - RAW_JSON = IO.read(File.join(GEM_ROOT, 'vendor/emoji-data/emoji.json')) - EMOJI_MAP = JSON.parse( RAW_JSON ) - EMOJI_CHARS = EMOJI_MAP.map { |em| EmojiChar.new(em) } + VENDOR_DATA = 'vendor/emoji-data/emoji.json' - # - # construct hashmap for fast precached lookups for `.find_by_unified` - # - EMOJICHAR_UNIFIED_MAP = Hash[EMOJI_CHARS.map { |u| [u.unified, u] }] - # merge variant encodings into map so we can look them up as well - EMOJI_CHARS.select(&:variant?).each do |char| - char.variations.each do |variant| - EMOJICHAR_UNIFIED_MAP.merge! Hash[variant,char] - end + # precomputed list of all possible emoji characters + EMOJI_CHARS = begin + raw_json = IO.read(File.join(GEM_ROOT, VENDOR_DATA)) + vendordata = JSON.parse( raw_json ) + vendordata.map { |em| EmojiChar.new(em) } end + # precomputed hashmap for fast precached lookups in .from_unified + EMOJICHAR_UNIFIED_MAP = {} + EMOJI_CHARS.each do |ec| + EMOJICHAR_UNIFIED_MAP[ec.unified] = ec + ec.variations.each { |variant| EMOJICHAR_UNIFIED_MAP[variant] = ec } + end + + # precomputed hashmap for fast precached lookups in .from_short_name + EMOJICHAR_KEYWORD_MAP = {} + EMOJI_CHARS.each do |ec| + ec.short_names.each { |keyword| EMOJICHAR_KEYWORD_MAP[keyword] = ec } + end + + # our constants are only for usage internally + private_constant :GEM_ROOT, :VENDOR_DATA + private_constant :EMOJI_CHARS, :EMOJICHAR_UNIFIED_MAP, :EMOJICHAR_KEYWORD_MAP + + + # Returns a list of all known Emoji characters as `EmojiChar` objects. + # + # @return [Array<EmojiChar>] a list of all known `EmojiChar`. def self.all EMOJI_CHARS end + # Returns a list of all `EmojiChar` that are represented with doublebyte + # encoding. + # + # @return [Array<EmojiChar>] a list of all doublebyte `EmojiChar`. def self.all_doublebyte EMOJI_CHARS.select(&:doublebyte?) end + # Returns a list of all `EmojiChar` that have at least one variant encoding. + # + # @return [Array<EmojiChar>] a list of all `EmojiChar` with variant encoding. def self.all_with_variants EMOJI_CHARS.select(&:variant?) end - def self.chars(options={}) - options = {include_variants: false}.merge(options) + # Returns a list of all known Emoji characters rendered as UTF-8 strings. + # + # By default, the default rendering options for this library will be used. + # However, if you pass an option hash with `include_variants: true` then all + # possible renderings of a single glyph will be included, meaning that: + # + # 1. You will have "duplicate" emojis in your list. + # 2. This list is now suitable for exhaustably matching against in a search. + # + # @option opts [Boolean] :include_variants whether or not to include all + # possible encoding variants in the list + # + # @return [Array<String>] all Emoji characters rendered as UTF-8 strings + def self.chars(opts={}) + options = {include_variants: false}.merge(opts) - normals = EMOJI_CHARS.map { |c| c.char({variant_encoding: false}) } - extras = self.all_with_variants.map { |c| c.char({variant_encoding: true}) } + normals = EMOJI_CHARS.map { |c| c.render({variant_encoding: false}) } if options[:include_variants] + extras = self.all_with_variants.map { |c| c.render({variant_encoding: true}) } return normals + extras end normals end - def self.codepoints(options={}) - options = {include_variants: false}.merge(options) + # Returns a list of all known codepoints representing Emoji characters. + # + # @option (see .chars) + # @return [Array<String>] all codepoints represented as unified ID strings + def self.codepoints(opts={}) + options = {include_variants: false}.merge(opts) + normals = EMOJI_CHARS.map(&:unified) + if options[:include_variants] - return EMOJI_CHARS.map(&:unified) + self.all_with_variants.map {|c| c.variant} + extras = self.all_with_variants.map {|c| c.variant} + return normals + extras end - EMOJI_CHARS.map(&:unified) + normals end + # Convert a native UTF-8 string glyph to its unified codepoint ID. + # + # This is a conversion operation, not a match, so it may produce unexpected + # results with different types of values. + # + # @param char [String] a single rendered emoji glyph encoded as a UTF-8 string + # @return [String] the unified ID + # + # @example + # >> EmojiData.unified_to_char("1F47E") + # => "👾" def self.char_to_unified(char) - char.codepoints.to_a.map {|i| i.to_s(16).rjust(4,'0')}.join('-').upcase + char.codepoints.to_a.map { |i| i.to_s(16).rjust(4,'0')}.join('-').upcase end - def self.unified_to_char(cp) - EmojiChar::unified_to_char(cp) + # Convert a unified codepoint ID directly to its UTF-8 string representation. + # + # @param uid [String] the unified codepoint ID for an emoji + # @return [String] UTF-8 string rendering of the emoji character + # + # @example + # >> EmojiData.char_to_unified("👾") + # => "1F47E" + def self.unified_to_char(uid) + EmojiChar::unified_to_char(uid) end - def self.find_by_unified(cp) - EMOJICHAR_UNIFIED_MAP[cp.upcase] + # Finds a specific `EmojiChar` based on its unified codepoint ID. + # + # @param uid [String] the unified codepoint ID for an emoji + # @return [EmojiChar] + def self.from_unified(uid) + EMOJICHAR_UNIFIED_MAP[uid.upcase] end - FBS_REGEXP = Regexp.new("(?:#{EmojiData.chars({include_variants: true}).join("|")})") - def self.find_by_str(str) + # precompile regex pattern for fast matches in `.scan` + # needs to be defined after self.chars so not at top of file for now... + FBS_REGEXP = Regexp.new( + "(?:#{EmojiData.chars({include_variants: true}).join("|")})" + ) + private_constant :FBS_REGEXP + + # Scans a string for all encoded emoji characters contained within. + # + # @param str [String] the target string to search + # @return [Array<EmojiChar>] all emoji characters contained within the target + # string, in the order they appeared. + # + # @example + # >> EmojiData.scan("flying on my 🚀 to visit the 👾 people.") + # => [#<EmojiData::EmojiChar... @name="ROCKET", @unified="1F680", ...>, + # #<EmojiData::EmojiChar... @name="ALIEN MONSTER", @unified="1F47E", ...>] + def self.scan(str) matches = str.scan(FBS_REGEXP) - matches.map { |m| EmojiData.find_by_unified(EmojiData.char_to_unified(m)) } + matches.map { |m| EmojiData.from_unified(EmojiData.char_to_unified(m)) } end + # Finds any `EmojiChar` that contains given string in its official name. + # + # @param name [String] + # @return [Array<EmojiChar>] def self.find_by_name(name) self.find_by_value(:name, name.upcase) end + # Find all `EmojiChar` that match string in any of their associated short + # name keywords. + # + # @param short_name [String] + # @return [Array<EmojiChar>] def self.find_by_short_name(short_name) self.find_by_value(:short_name, short_name.downcase) end + # Finds a specific `EmojiChar` based on the unified codepoint ID. + # + # Must be exact match. + # + # @param short_name [String] + # @return [EmojiChar] + def self.from_short_name(short_name) + EMOJICHAR_KEYWORD_MAP[short_name.downcase] + end + + # alias old method names for legacy apps + class << self + alias_method :find_by_unified, :from_unified + alias_method :find_by_str, :scan + end + + protected + def self.find_by_value(field,value) self.all.select { |char| char.send(field).include? value } end end