Sha256: e5e972337ed3dfa0b7159a68b44be972d645147788486482e060a7b14642ae11

Contents?: true

Size: 1.74 KB

Versions: 1

Compression:

Stored size: 1.74 KB

Contents

module Zidian
  
  def self.find(expression)
    $KCODE = 'UTF8'
    case expression.class.name
    when "Array"
      expression.collect{|e| find(e) }.flatten.uniq
    when "Integer", "Fixnum" then
      Word.new(get_line(expression), expression)
    when "String" then
      find_word(expression).lines.to_a.collect{|line| Word.new(line) }
    else
      raise InvalFindInputException
    end
  end
  
  protected
  
  def self.find_word(word) #:nodoc:
    words = word.split.map{|w| "#{w}[1-4]?"}.join(" ")
    # adding the -i option allows to search independently from the case, but it makes it very slow
    `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
  end
  
  def self.get_line(line_number) #:nodoc:
    raise InvalidIdException if (line_number < 35 || line_number > 86617)
    `sed -n '#{line_number}p' #{File.dirname(__FILE__)}/cedict_ts.u8`
  end
  
  class Word
    
    attr_reader :id, :traditional, :simplified, :pinyin, :english
    
    def initialize(line, id=nil)
      @id = id
      extract_attributes_from_string(line.strip!)
    end
    
    def extract_attributes_from_string(line)
      if line =~ /^[0-9]*:/
        @id = line.gsub!(/^[0-9]*:/).to_a.first.gsub(':','').to_i
      end
      @traditional = line.match(/^[^\s]+/)[0]
      @simplified = line.match(/\s[^\s]+/)[0].strip
      @pinyin  = line.match(/\[.+?\]/)[0].gsub(/[\[\]]/,'')
      @english = line.scan(/\/[^\/]+/).collect{|e| e.gsub(/[\/]/,'')}
    end
    
  end
  
  class InvalidIdException < Exception; 
    def message
      "Invalid ID, must be between 35 and 86617"
    end
  end
  
  class InvalFindInputException < Exception; 
    def message
      "Invalid find parameter. Only integers, strings accepted"
    end
  end
  
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
zidian-0.2.0 lib/zidian.rb