lib/stringex/unidecoder.rb in stringex-1.5.1 vs lib/stringex/unidecoder.rb in stringex-2.0.0

- old
+ new

@@ -1,23 +1,21 @@ # encoding: UTF-8 -require "yaml" module Stringex module Unidecoder # Contains Unicode codepoints, loading as needed from YAML files CODEPOINTS = Hash.new{|h, k| h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml")) } unless defined?(CODEPOINTS) - LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS) class << self # Returns string with its UTF-8 characters transliterated to ASCII ones # # You're probably better off just using the added String#to_ascii def decode(string) - string.gsub(/[^\x00-\x7f]/u) do |codepoint| - if localized = local_codepoint(codepoint) + string.gsub(/[^\x00-\x00]/u) do |codepoint| + if localized = translate(codepoint) localized else begin unpacked = codepoint.unpack("U")[0] CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)] @@ -45,123 +43,30 @@ def in_yaml_file(character) unpacked = character.unpack("U")[0] "#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})" end - # Adds localized transliterations to Unidecoder - def localize_from(hash_or_path_to_file) - hash = if hash_or_path_to_file.is_a?(Hash) - hash_or_path_to_file - else - YAML.load_file(hash_or_path_to_file) - end - verify_local_codepoints hash - end + private - # Returns locale for localized transliterations - def locale - if @locale - @locale - elsif defined?(I18n) - I18n.locale - else - default_locale - end + def translate(codepoint) + Localization.translate(:transliterations, codepoint) end - # Sets locale for localized transliterations - def locale=(new_locale) - @locale = new_locale - end - - # Returns default locale for localized transliterations. NOTE: Will set @locale as well. - def default_locale - @default_locale ||= "en" - @locale = @default_locale - end - - # Sets the default locale for localized transliterations. NOTE: Will set @locale as well. - def default_locale=(new_locale) - @default_locale = new_locale - # Seems logical that @locale should be the new default - @locale = new_locale - end - - # Returns the localized transliteration for a codepoint - def local_codepoint(codepoint) - locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym] - locale_hash && locale_hash[codepoint] - end - - # Runs a block with a temporary locale setting, returning the locale to the original state when complete - def with_locale(new_locale, &block) - new_locale = default_locale if new_locale == :default - original_locale = locale - self.locale = new_locale - block.call - self.locale = original_locale - end - - # Runs a block with default locale - def with_default_locale(&block) - with_locale default_locale, &block - end - - private # Returns the Unicode codepoint grouping for the given character def code_group(unpacked_character) "x%02x" % (unpacked_character >> 8) end # Returns the index of the given character in the YAML file for its codepoint group def grouped_point(unpacked_character) unpacked_character & 255 end - - # Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises - # instructive exception if not - def verify_local_codepoints(hash) - if !pass_check(hash) - raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data." - end - hash.each{|k, v| LOCAL_CODEPOINTS[k] = v} - end - - def pass_check(hash) - return false if !hash.is_a?(Hash) - hash.all?{|key, value| pass_check_key_and_value_test(key, value) } - end - - def pass_check_key_and_value_test(key, value) - # Fuck a duck, eh? - return false unless [Symbol, String].include?(key.class) - return false unless value.is_a?(Hash) - value.all?{|k, v| k.is_a?(String) && v.is_a?(String)} - end end end - - # Provide a simpler interface for localization implementations - class << self - %w{ - localize_from - locale - locale= - default_locale - default_locale= - local_codepoint - with_locale - with_default_locale - }.each do |name| - define_method name do |*args, &block| - Unidecoder.send name, *args, &block - end - end - end end module Stringex - module StringExtensions + module StringExtensions::PublicInstanceMethods # Returns string with its UTF-8 characters transliterated to ASCII ones. Example: # # "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france" def to_ascii Stringex::Unidecoder.decode(self)