sequence_name.rb in unicoder-1.3.0

- old
+ new

@@ -9,11 +9,11 @@
       REPLACE_MIN_WORD_LENGTH = 3
 
       def initialize_index
         @index = {
           SEQUENCES: {},
-          SEQUENCES_NOT_QUALIFIED: {},
+          EMOJI_NOT_QUALIFIED: {},
         }
         @words = []
       end
 
       def assign_codepoint(codepoints, value, idx = @index[:SEQUENCES], combine: false)
@@ -72,27 +72,31 @@
 
         parse_file :emoji_zwj_sequences, :line, regex: /^(?!#)(?<codepoints>.+?)\s*;.*?; (?<name>.+?)\s*#/ do |line|
           name = line["name"].gsub(/\\x{(\h+)}/){ [$1.to_i(16)].pack("U") }.upcase
           codepoints = line["codepoints"].split.map{|cp| cp.to_i(16) }
           assign_codepoint codepoints, name
+
+
+          # Build all combinations of VS16 present and missing and add to second index
           if codepoints.include?(0xFE0F)
-            # Build all combinations of VS16 present and missing
+            sequence = codepoints.pack("U*")
+
             codepoints.slice_after(0xFE0F).reduce([[]]){|acc,cur|
               if cur.include? 0xFE0F
                 acc.flat_map{|prev| [prev + (cur - [0xFE0F]), prev + cur] }
               else
                 acc.map{|prev| prev + cur}
               end
             }.
             select {|sub_codepoints| sub_codepoints != codepoints }.
             each { |sub_codepoints|
-              assign_codepoint (sub_codepoints), name, @index[:SEQUENCES_NOT_QUALIFIED]
+              sub_sequence = sub_codepoints.pack("U*")
+              @index[:EMOJI_NOT_QUALIFIED][sub_sequence] = sequence
             }
           end
         end
 
         replace_common_words! :SEQUENCES, @words, REPLACE_COUNT, REPLACE_BASE, REPLACE_MIN_WORD_LENGTH
-        replace_common_words! :SEQUENCES_NOT_QUALIFIED, @words, REPLACE_COUNT, REPLACE_BASE, REPLACE_MIN_WORD_LENGTH
       end
     end
   end
 end