Jldrill Git C0 Coverage Information - RCov

lib/jldrill/model/items/JEDictionary.rb

Name Total Lines Lines of Code Total Coverage Code Coverage
lib/jldrill/model/items/JEDictionary.rb 297 227
85.19%
80.62%

Key

Code reported as executed by Ruby looks like this...and this: this line is also marked as covered.Lines considered as run by rcov, but not reported by Ruby, look like this,and this: these lines were inferred by rcov (using simple heuristics).Finally, here's a line marked as not executed.

Coverage Details

1 # -*- coding: utf-8 -*-
2 
3 require 'jldrill/model/items/JWord'
4 require 'jldrill/model/DataFile'
5 require "jldrill/model/items/Vocabulary"
6 require "jldrill/model/items/edict/Meaning"
7 require 'Context/Log'
8 require 'kconv'
9 
10 module JLDrill
11 
12     # A JEDictionary is a Japanese to English Dictionary.
13     # It is composed of an array of entries from an EDict
14     # dictionary. These entries are parsed to create JWords.
15     # The JWords can then further parse the entries to
16     # create Meanings.
17 	class JEDictionary < DataFile
18         attr_reader :jWords
19 
20         LINE_RE_TEXT = '^([^\[\s]*)\s+(\[(.*)\]\s+)?\/(([^\/]*\/)+)\s*$'
21         LINE_RE = Regexp.new(LINE_RE_TEXT)
22         GET_JWORD_RE = Regexp.new('^([^\[\s]*)\s+(\[(.*)\]\s+)?')
23         KANA_RE = /((.*))/
24         FIRST_CHAR_RE = Regexp.new("^(.)", "U")
25 
26         def initialize
27             super
28             @stepSize = 1000
29         end
30 
31         # Reset the dictionary back to empty
32         def reset
33             @jWords = []
34             @readingHash = {}
35             @kanjiHash = {}
36             super
37         end
38 
39         # The number of items we have indexed in the dictionary.
40         def dataSize
41             return @jWords.size
42         end
43 
44         def length
45             return dataSize
46         end
47 
48         # Returns true if the line at the given index is UTF8
49         def isUTF8?(index)
50             !Kconv.isutf8(@lines[index]).nil?
51         end
52 
53         def getMeaning(position)
54             retVal = ""
55             if lines[position] =~ LINE_RE
56                 retVal = $4
57             end
58             return retVal
59         end
60 
61         # Parse the line at the given position and return the a Vocabulary
62         # containing the information (this is deprecated).
63         def getVocab(position)
64             retVal = nil
65             if lines[position] =~ LINE_RE
66                 kanji = $1
67                 reading = $3
68                 english = JLDrill::Meaning.create($4)
69                 
70                 # Hack for JLPT files
71                 if reading =~ KANA_RE
72                     reading = nil
73                     hint = $1
74                 end
75 
76                 if(reading == "" || reading == nil)
77                     reading = kanji
78                     kanji = nil
79                 end
80 
81                 retVal = Vocabulary.new(kanji, reading, english.allDefinitions,
82                                    english.allTypes, hint, position)
83             else
84                 Context::Log::warning("JLDrill::JEDictionary", 
85                                       "Could not parse #{position}")
86             end             
87             return retVal                        
88         end
89 
90         # modifies the line at position to be UTF8
91         def toUTF8(position)
92             lines[position] = NKF.nkf("-Ewxm0", lines[position])
93         end
94 
95         # Read all the lines into the buffer.
96         # This method also converts them the UTF8
97         def readLines
98             super
99         end
100 
101         # Compensate for files that have missing kanji or
102         # JLPT files which have a strange format.
103         def hackWord(word)
104             # Hack for JLPT files
105             if word.reading =~ KANA_RE || word.reading.nil? ||
106                     word.reading.empty?
107                 word.reading = word.kanji
108                 word.kanji = ""
109             end
110             if word.kanji.nil?
111                 word.kanji = ""
112             end
113             return word
114         end
115 
116         # Has the word in both the reading and kanji hashes so that
117         # we can find them quickly.
118         def hashWord(word)
119             # UTF8 kanji and kana characters are usually 3 bytes each.
120             # We will hash on the first character.
121             (@readingHash[word.reading[0..2]] ||= []).push(word)
122             (@kanjiHash[word.kanji[0..2]] ||= []).push(word)
123         end
124 
125         def parseLine(index)
126             if !isUTF8?(index)
127                 toUTF8(index)
128             end
129             if lines[index] =~ GET_JWORD_RE
130                 word = JWord.new
131                 word.kanji = $1
132                 word.reading = $3
133                 word.dictionary = self
134                 word.position = index
135                 @jWords[@jWords.size] = word
136                 word = hackWord(word)
137                 hashWord(word)
138             end
139         end
140 
141         def vocab(index)
142             word = @jWords[index]
143             if !word.nil?
144                 return word.toVocab
145             else
146                 return nil
147             end
148         end
149 
150         def eachVocab(&block)
151             @jWords.each do |word|
152                 block.call(word.toVocab)
153             end
154         end
155 
156         # Create the indeces for the item at the current line.
157         def parseEntry
158             parseLine(@parsed)
159             @parsed += 1
160         end
161 
162         # This is what to do when we are finished parsing.
163         def finishParsing
164             # Don't reset the lines because we need them later
165             setLoaded(true)
166         end
167 
168         # Find the items that may have been hashed with this reading.
169         def findBinWithReading(reading)
170             if reading.size >= 3
171                 bin = (@readingHash[reading[0..2]] ||= [])
172             else
173                 keys = @readingHash.keys.find_all do |key|
174                     key.start_with?(reading)
175                 end
176                 bin = []
177                 keys.each do |key|
178                     bin += @readingHash[key]
179                 end
180             end
181             return bin
182         end
183 
184         # Find the items that may have been hashed with this kanji.
185         def findBinWithKanji(kanji)
186             if kanji.size >= 3
187                 bin = (@kanjiHash[kanji[0..2]] ||= [])
188             else
189                 keys = @kanjiHash.keys.find_all do |key|
190                     key.start_with?(kanji)
191                 end
192                 bin = []
193                 keys.each do |key|
194                     bin += @kanjiHash[key]
195                 end
196             end
197             return bin
198         end
199 
200         # Return all the JWords that have a reading starting with reading.
201         def findReadingsStartingWith(reading)
202             bin = findBinWithReading(reading)
203             if reading.size > 3 
204                 return bin.find_all do |word|
205                     word.reading.start_with?(reading)
206                 end
207             else
208                 return bin
209             end
210         end
211 
212         # Return all the JWords that have kanji starting with kanji.
213         def findKanjiStartingWith(kanji)
214             bin = findBinWithKanji(kanji)
215             if kanji.size > 3 
216                 return bin.find_all do |word|
217                     word.kanji.start_with?(kanji)
218                 end
219             else
220                 return bin
221             end
222         end
223 
224         # Return all the JWords that have the reading, reading.
225         def findReading(reading)
226             relevance = reading.size
227             return findBinWithReading(reading).find_all do |word|
228                 if word.reading.eql?(reading)
229                     word.relevance = relevance
230                     true
231                 else
232                     false
233                 end
234             end
235         end
236 
237         # Return all the JWords that have the kanji, kanji.
238         def findKanji(kanji)
239             relevance = kanji.size
240             return findBinWithKanji(kanji).find_all do |word|
241                 if word.kanji.eql?(kanji)
242                     word.relevance = relevance
243                     true
244                 else
245                     false
246                 end
247             end
248         end
249 
250         def findWord(string)
251             kanji = findKanji(string)
252             reading = findReading(string)
253             return kanji + reading
254         end
255 
256         # Return true if the dictionary contains this vocabulary.
257         def include?(vocabulary)
258             return findReading(vocabulary.reading).any? do |word|
259                 word.toVocab.eql?(vocabulary)
260             end
261         end
262 
263         # Return all the words that occur at the begining of reading
264         def findReadingsThatStart(reading)
265             findBinWithReading(reading[0..2]).find_all do |word|
266                 relevance = word.reading.size
267                 if reading.start_with?(word.reading)
268                     word.relevance = relevance
269                     true
270                 else
271                     false
272                 end
273             end
274         end
275 
276         # Return all the words that occur at the begining of kanji
277         def findKanjiThatStart(kanji)
278             findBinWithKanji(kanji[0..2]).find_all do |word|
279                 relevance = word.kanji.size
280                 if kanji.start_with?(word.kanji)
281                     word.relevance = relevance
282                     true
283                 else
284                     false
285                 end
286             end
287         end
288 
289         # Return all the words that occur at the begining of the string
290         # These are sorted by size with the largest finds given first
291         def findWordsThatStart(string)
292             kanji = findKanjiThatStart(string)
293             reading = findReadingsThatStart(string)
294             return kanji + reading
295         end
296     end
297 end

Generated on Mon May 23 16:17:46 +0900 2011 with rcov 0.9.8