Sha256: b21165f0cc434772546e3b22262a42235f09446e34f8fc8a7694818d03fef0f0

Contents?: true

Size: 858 Bytes

Versions: 3

Compression:

Stored size: 858 Bytes

Contents

$KCODE = 'UTF8'

def process(path, tree)
  File.open(path, 'r') do |file|
    file.each_line do |line|
      node = nil
      line.chars.each do |c|
        next if c == "\n" || c == "\r"
        if node
          node[c] ||= {}
          node = node[c]
        else
          tree[c] ||= Hash.new
          node = tree[c]
        end
      end
      node[:end] = true
    end
  end
end

def build
  tree = {}
  dictionaries = ['cedict.zh_CN.utf8', 'wikipedia.zh.utf8']
  #dictionaries = ['wikipedia.zh.utf8']
  
  dictionaries.each do |dictionary|
    puts "Processing #{dictionary}..."
    path = File.join(File.dirname(__FILE__), '../../dict', dictionary)
    process(path, tree)
  end
  
  File.open(hash_path, "wb") {|io| Marshal.dump(tree, io)}  
  puts 'Done'
end

def hash_path
  File.join(File.dirname(__FILE__), '../../dict/dict.hash')
end

build

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
rseg-0.1.3 lib/builder/dict.rb
rseg-0.1.2 lib/builder/dict.rb
rseg-0.1.1 lib/builder/dict.rb