Sha256: 2d1693d78a1e8e9872f371ebcbf4e750eb302f09d44931db6987dff9a1bafca2

Contents?: true

Size: 745 Bytes

Versions: 4

Compression:

Stored size: 745 Bytes

Contents

desc 'Download unicode casefold data and write new C header file'
task :sync_casefold_data do
  src_path = './CaseFolding.txt'
  dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"

  `wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`

  mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
    from, type, to = line.split(/\s*;\s*/).first(3)
    # type 'C' stands for 'common', excludes mappings to multiple chars
    hash[from] = to if type == 'C'
  end.sort

  content = File.read(dst_path + '.tmpl')
    .sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
    .sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))

  File.write(dst_path, content)
  File.unlink(src_path)
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
character_set-1.8.0-java tasks/sync_casefold_data.rake
character_set-1.8.0 tasks/sync_casefold_data.rake
character_set-1.7.0-java tasks/sync_casefold_data.rake
character_set-1.7.0 tasks/sync_casefold_data.rake