Sha256: 892e24aeb098e3b9358b2581fe837c8b8b0244f2f25c08e2d639b388dcfd6587

Contents?: true

Size: 1.36 KB

Versions: 1

Compression:

Stored size: 1.36 KB

Contents

module Unicoder
  module Builder
    class Confusable
      include Builder

      def initialize_index
        @index = {
          CONFUSABLE: {},
          IGNORABLE: [],
        }
      end

      def parse!
        parse_file :confusables, :line, regex: /^(?<from>\S+)\s+;\s+(?<to>.+?)\s+;.*$/ do |line|
          source = line["from"].to_i(16)
          if line["to"].include?(" ")
            replace_with = line["to"].split(" ").map{ |codepoint|
              cp = codepoint.to_i(16)
              option =~ /charvalues/ ? [cp].pack("U") : cp
            }
          else
            cp = line["to"].to_i(16)
            replace_with = option =~ /charvalues/ ? [cp].pack("U") : cp
          end
          assign :CONFUSABLE, source, replace_with
        end

        parse_file :core_properties, :line, begin: /^# Derived Property: Default_Ignorable_Code_Point$/, end: /^# ================================================$/, regex: /^(?<codepoints>\S+)\s+; Default_Ignorable_Code_Point.*$/ do |line|
          if line["codepoints"]['..']
            single_or_multiple_codepoints = line["codepoints"].split('..').map{ |codepoint|
              codepoint.to_i(16)
            }
          else
            single_or_multiple_codepoints = line["codepoints"].to_i(16)
          end

          @index[:IGNORABLE] << single_or_multiple_codepoints
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
unicoder-1.3.0 lib/unicoder/builders/confusable.rb