lib/stringex/unidecoder.rb in stringex-1.5.1 vs lib/stringex/unidecoder.rb in stringex-2.0.0
- old
+ new
@@ -1,23 +1,21 @@
# encoding: UTF-8
-require "yaml"
module Stringex
module Unidecoder
# Contains Unicode codepoints, loading as needed from YAML files
CODEPOINTS = Hash.new{|h, k|
h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
} unless defined?(CODEPOINTS)
- LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS)
class << self
# Returns string with its UTF-8 characters transliterated to ASCII ones
#
# You're probably better off just using the added String#to_ascii
def decode(string)
- string.gsub(/[^\x00-\x7f]/u) do |codepoint|
- if localized = local_codepoint(codepoint)
+ string.gsub(/[^\x00-\x00]/u) do |codepoint|
+ if localized = translate(codepoint)
localized
else
begin
unpacked = codepoint.unpack("U")[0]
CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
@@ -45,123 +43,30 @@
def in_yaml_file(character)
unpacked = character.unpack("U")[0]
"#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
end
- # Adds localized transliterations to Unidecoder
- def localize_from(hash_or_path_to_file)
- hash = if hash_or_path_to_file.is_a?(Hash)
- hash_or_path_to_file
- else
- YAML.load_file(hash_or_path_to_file)
- end
- verify_local_codepoints hash
- end
+ private
- # Returns locale for localized transliterations
- def locale
- if @locale
- @locale
- elsif defined?(I18n)
- I18n.locale
- else
- default_locale
- end
+ def translate(codepoint)
+ Localization.translate(:transliterations, codepoint)
end
- # Sets locale for localized transliterations
- def locale=(new_locale)
- @locale = new_locale
- end
-
- # Returns default locale for localized transliterations. NOTE: Will set @locale as well.
- def default_locale
- @default_locale ||= "en"
- @locale = @default_locale
- end
-
- # Sets the default locale for localized transliterations. NOTE: Will set @locale as well.
- def default_locale=(new_locale)
- @default_locale = new_locale
- # Seems logical that @locale should be the new default
- @locale = new_locale
- end
-
- # Returns the localized transliteration for a codepoint
- def local_codepoint(codepoint)
- locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
- locale_hash && locale_hash[codepoint]
- end
-
- # Runs a block with a temporary locale setting, returning the locale to the original state when complete
- def with_locale(new_locale, &block)
- new_locale = default_locale if new_locale == :default
- original_locale = locale
- self.locale = new_locale
- block.call
- self.locale = original_locale
- end
-
- # Runs a block with default locale
- def with_default_locale(&block)
- with_locale default_locale, &block
- end
-
- private
# Returns the Unicode codepoint grouping for the given character
def code_group(unpacked_character)
"x%02x" % (unpacked_character >> 8)
end
# Returns the index of the given character in the YAML file for its codepoint group
def grouped_point(unpacked_character)
unpacked_character & 255
end
-
- # Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises
- # instructive exception if not
- def verify_local_codepoints(hash)
- if !pass_check(hash)
- raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data."
- end
- hash.each{|k, v| LOCAL_CODEPOINTS[k] = v}
- end
-
- def pass_check(hash)
- return false if !hash.is_a?(Hash)
- hash.all?{|key, value| pass_check_key_and_value_test(key, value) }
- end
-
- def pass_check_key_and_value_test(key, value)
- # Fuck a duck, eh?
- return false unless [Symbol, String].include?(key.class)
- return false unless value.is_a?(Hash)
- value.all?{|k, v| k.is_a?(String) && v.is_a?(String)}
- end
end
end
-
- # Provide a simpler interface for localization implementations
- class << self
- %w{
- localize_from
- locale
- locale=
- default_locale
- default_locale=
- local_codepoint
- with_locale
- with_default_locale
- }.each do |name|
- define_method name do |*args, &block|
- Unidecoder.send name, *args, &block
- end
- end
- end
end
module Stringex
- module StringExtensions
+ module StringExtensions::PublicInstanceMethods
# Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
#
# "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
def to_ascii
Stringex::Unidecoder.decode(self)