require 'rubygems' require 'open-uri' require 'active_support/inflector' require 'csv' # Rake task for importing country names from Unicode.org's CLDR repository # (http://www.unicode.org/cldr/data/charts/summary/root.html). # # It parses a HTML file from Unicode.org for given locale and saves the # Rails' I18n hash in the plugin +locale+ directory # # Don't forget to restart the application when you add new locale to load it into Rails! # # == Parameters # LOCALE (required): Sets the locale to use. Output file name will include this. # FORMAT (optional): Output format, either 'rb' or 'yml'. Defaults to 'rb' if not specified. # WEB_LOCALE (optional): Forces a locale code to use when querying the Unicode.org CLDR archive. # PARSER (optional): Forces parser to use. Available are nokogiri, hpricot and libxml. # # == Examples # rake import:country_select LOCALE=de # rake import:country_select LOCALE=pt-BR WEB_LOCALE=pt FORMAT=yml # # The code is deliberately procedural and simple, so it's easily # understandable by beginners as an introduction to Rake tasks power. # See https://github.com/svenfuchs/ruby-cldr for much more robust solution namespace :import do desc "Import country codes and names for various languages from the Unicode.org CLDR archive." task :country_select do # TODO : Implement locale import chooser from CLDR root via Highline # Setup variables locale = ENV['LOCALE'] unless locale puts "\n[!] Usage: rake import:country_select LOCALE=de\n\n" exit 0 end # convert locale code to Unicode.org CLDR acceptable code web_locale = if ENV['WEB_LOCALE'] then ENV['WEB_LOCALE'] elsif %w(zht zhtw).include?(locale.downcase.gsub(/[-_]/,'')) then 'zh_Hant' elsif %w(zhs zhcn).include?(locale.downcase.gsub(/[-_]/,'')) then 'zh_Hans' else locale.underscore.split('_')[0] end # ----- Get the CLDR HTML -------------------------------------------------- begin puts "... getting the HTML file for locale '#{web_locale}'" url = "http://www.unicode.org/cldr/data/charts/summary/#{web_locale}.html" html = open(url).read rescue => e puts "[!] Invalid locale name '#{web_locale}'! Not found in CLDR (#{e})" exit 0 end set_parser(ENV['PARSER']) if ENV['PARSER'] puts "... parsing the HTML file using #{parser.name.split("::").last}" countries = parser.parse(html).inject([]) { |arr, (_code, attrs)| arr << attrs } countries.sort_by! { |c| c[:code] } puts '... fetching correct list of country codes and filtering translations' correct_list = CSV.parse(open('https://raw.githubusercontent.com/datasets/un-locode/master/data/country-codes.csv').string) country_codes = correct_list.map { |c| c[0] } countries.delete_if { |c| !country_codes.member?(c[:code].to_s) } puts "\n\n... imported #{countries.count} countries:" puts countries.map { |c| "#{c[:code]}: #{c[:name]}" }.join(", ") # ----- Prepare the output format ------------------------------------------ format = if ENV['FORMAT'].nil?||%(rb ruby).include?(ENV['FORMAT'].downcase) then :rb elsif %(yml yaml).include?(ENV['FORMAT'].downcase) then :yml end unless format puts "\n[!] FORMAT must be either 'rb' or 'yml'\n\n" exit 0 end if format==:yml output =<
{ :countries => { HEAD countries.each do |country| output << "\t\t\t:#{country[:code]} => \"#{country[:name]}\",\n" end output <<<