Sha256: bc10fc95a2096d41096842135ed3208bb115314394563df0cf1a1f0087dce66a

Contents?: true

Size: 1.28 KB

Versions: 4

Compression:

Stored size: 1.28 KB

Contents

# encoding: UTF-8

module Analects
  class CedictLoader
    include Enumerable

    attr_reader :headers

    def initialize(io, library)
      @contents = io.read
      @headers = {}
      @contents.each_line do |line|
        if line =~ /^#! (.*)=(.*)/
          @headers[$1.strip] = $2.strip
        end
        break unless line =~ /^#/
      end
    end

    def field_names
      [:traditional, :simplified, :pinyin, :definitions]
    end

    def each(&blk)
      return to_enum(__method__) unless block_given?
      @entries ||= @contents.each_line.map do |line|
        process_contents(line) if line !~ /^#/
      end.compact
      @entries.each(&blk)
    end

    def find_by(qry)
      qry.map {|field, value| lookup_index(field).fetch(value, [])}.inject {|r1, r2| r1 & r2}
    end

    def lookup_index(field)
      @indexes ||= field_names.each_with_object({}) do |field, acc|
        acc[field] = each_with_object({}) do |entry, acc|
          (acc[entry[field_names.index(field)]] ||= []) << entry
        end
      end
      @indexes[field]
    end

    private

    def process_contents(line)
      if line.strip =~ /^([^\s]*) ([^\s]*) \[([\w\d:,ยท ]+)\](.*)/
        [$1,$2,$3,$4].map{|x| x.strip}
      else
        raise "Unexpected contents : #{line.inspect}"
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
analects-0.4.2 lib/analects/cedict_loader.rb
analects-0.4.1 lib/analects/cedict_loader.rb
analects-0.4.0 lib/analects/cedict_loader.rb
analects-0.3.1 lib/analects/cedict_loader.rb