Name | Total Lines | Lines of Code | Total Coverage | Code Coverage |
---|---|---|---|---|
lib/jldrill/model/items/JEDictionary.rb | 297 | 227 | 85.19%
|
80.62%
|
Code reported as executed by Ruby looks like this...and this: this line is also marked as covered.Lines considered as run by rcov, but not reported by Ruby, look like this,and this: these lines were inferred by rcov (using simple heuristics).Finally, here's a line marked as not executed.
1 # -*- coding: utf-8 -*- |
2 |
3 require 'jldrill/model/items/JWord' |
4 require 'jldrill/model/DataFile' |
5 require "jldrill/model/items/Vocabulary" |
6 require "jldrill/model/items/edict/Meaning" |
7 require 'Context/Log' |
8 require 'kconv' |
9 |
10 module JLDrill |
11 |
12 # A JEDictionary is a Japanese to English Dictionary. |
13 # It is composed of an array of entries from an EDict |
14 # dictionary. These entries are parsed to create JWords. |
15 # The JWords can then further parse the entries to |
16 # create Meanings. |
17 class JEDictionary < DataFile |
18 attr_reader :jWords |
19 |
20 LINE_RE_TEXT = '^([^\[\s]*)\s+(\[(.*)\]\s+)?\/(([^\/]*\/)+)\s*$' |
21 LINE_RE = Regexp.new(LINE_RE_TEXT) |
22 GET_JWORD_RE = Regexp.new('^([^\[\s]*)\s+(\[(.*)\]\s+)?') |
23 KANA_RE = /((.*))/ |
24 FIRST_CHAR_RE = Regexp.new("^(.)", "U") |
25 |
26 def initialize |
27 super |
28 @stepSize = 1000 |
29 end |
30 |
31 # Reset the dictionary back to empty |
32 def reset |
33 @jWords = [] |
34 @readingHash = {} |
35 @kanjiHash = {} |
36 super |
37 end |
38 |
39 # The number of items we have indexed in the dictionary. |
40 def dataSize |
41 return @jWords.size |
42 end |
43 |
44 def length |
45 return dataSize |
46 end |
47 |
48 # Returns true if the line at the given index is UTF8 |
49 def isUTF8?(index) |
50 !Kconv.isutf8(@lines[index]).nil? |
51 end |
52 |
53 def getMeaning(position) |
54 retVal = "" |
55 if lines[position] =~ LINE_RE |
56 retVal = $4 |
57 end |
58 return retVal |
59 end |
60 |
61 # Parse the line at the given position and return the a Vocabulary |
62 # containing the information (this is deprecated). |
63 def getVocab(position) |
64 retVal = nil |
65 if lines[position] =~ LINE_RE |
66 kanji = $1 |
67 reading = $3 |
68 english = JLDrill::Meaning.create($4) |
69 |
70 # Hack for JLPT files |
71 if reading =~ KANA_RE |
72 reading = nil |
73 hint = $1 |
74 end |
75 |
76 if(reading == "" || reading == nil) |
77 reading = kanji |
78 kanji = nil |
79 end |
80 |
81 retVal = Vocabulary.new(kanji, reading, english.allDefinitions, |
82 english.allTypes, hint, position) |
83 else |
84 Context::Log::warning("JLDrill::JEDictionary", |
85 "Could not parse #{position}") |
86 end |
87 return retVal |
88 end |
89 |
90 # modifies the line at position to be UTF8 |
91 def toUTF8(position) |
92 lines[position] = NKF.nkf("-Ewxm0", lines[position]) |
93 end |
94 |
95 # Read all the lines into the buffer. |
96 # This method also converts them the UTF8 |
97 def readLines |
98 super |
99 end |
100 |
101 # Compensate for files that have missing kanji or |
102 # JLPT files which have a strange format. |
103 def hackWord(word) |
104 # Hack for JLPT files |
105 if word.reading =~ KANA_RE || word.reading.nil? || |
106 word.reading.empty? |
107 word.reading = word.kanji |
108 word.kanji = "" |
109 end |
110 if word.kanji.nil? |
111 word.kanji = "" |
112 end |
113 return word |
114 end |
115 |
116 # Has the word in both the reading and kanji hashes so that |
117 # we can find them quickly. |
118 def hashWord(word) |
119 # UTF8 kanji and kana characters are usually 3 bytes each. |
120 # We will hash on the first character. |
121 (@readingHash[word.reading[0..2]] ||= []).push(word) |
122 (@kanjiHash[word.kanji[0..2]] ||= []).push(word) |
123 end |
124 |
125 def parseLine(index) |
126 if !isUTF8?(index) |
127 toUTF8(index) |
128 end |
129 if lines[index] =~ GET_JWORD_RE |
130 word = JWord.new |
131 word.kanji = $1 |
132 word.reading = $3 |
133 word.dictionary = self |
134 word.position = index |
135 @jWords[@jWords.size] = word |
136 word = hackWord(word) |
137 hashWord(word) |
138 end |
139 end |
140 |
141 def vocab(index) |
142 word = @jWords[index] |
143 if !word.nil? |
144 return word.toVocab |
145 else |
146 return nil |
147 end |
148 end |
149 |
150 def eachVocab(&block) |
151 @jWords.each do |word| |
152 block.call(word.toVocab) |
153 end |
154 end |
155 |
156 # Create the indeces for the item at the current line. |
157 def parseEntry |
158 parseLine(@parsed) |
159 @parsed += 1 |
160 end |
161 |
162 # This is what to do when we are finished parsing. |
163 def finishParsing |
164 # Don't reset the lines because we need them later |
165 setLoaded(true) |
166 end |
167 |
168 # Find the items that may have been hashed with this reading. |
169 def findBinWithReading(reading) |
170 if reading.size >= 3 |
171 bin = (@readingHash[reading[0..2]] ||= []) |
172 else |
173 keys = @readingHash.keys.find_all do |key| |
174 key.start_with?(reading) |
175 end |
176 bin = [] |
177 keys.each do |key| |
178 bin += @readingHash[key] |
179 end |
180 end |
181 return bin |
182 end |
183 |
184 # Find the items that may have been hashed with this kanji. |
185 def findBinWithKanji(kanji) |
186 if kanji.size >= 3 |
187 bin = (@kanjiHash[kanji[0..2]] ||= []) |
188 else |
189 keys = @kanjiHash.keys.find_all do |key| |
190 key.start_with?(kanji) |
191 end |
192 bin = [] |
193 keys.each do |key| |
194 bin += @kanjiHash[key] |
195 end |
196 end |
197 return bin |
198 end |
199 |
200 # Return all the JWords that have a reading starting with reading. |
201 def findReadingsStartingWith(reading) |
202 bin = findBinWithReading(reading) |
203 if reading.size > 3 |
204 return bin.find_all do |word| |
205 word.reading.start_with?(reading) |
206 end |
207 else |
208 return bin |
209 end |
210 end |
211 |
212 # Return all the JWords that have kanji starting with kanji. |
213 def findKanjiStartingWith(kanji) |
214 bin = findBinWithKanji(kanji) |
215 if kanji.size > 3 |
216 return bin.find_all do |word| |
217 word.kanji.start_with?(kanji) |
218 end |
219 else |
220 return bin |
221 end |
222 end |
223 |
224 # Return all the JWords that have the reading, reading. |
225 def findReading(reading) |
226 relevance = reading.size |
227 return findBinWithReading(reading).find_all do |word| |
228 if word.reading.eql?(reading) |
229 word.relevance = relevance |
230 true |
231 else |
232 false |
233 end |
234 end |
235 end |
236 |
237 # Return all the JWords that have the kanji, kanji. |
238 def findKanji(kanji) |
239 relevance = kanji.size |
240 return findBinWithKanji(kanji).find_all do |word| |
241 if word.kanji.eql?(kanji) |
242 word.relevance = relevance |
243 true |
244 else |
245 false |
246 end |
247 end |
248 end |
249 |
250 def findWord(string) |
251 kanji = findKanji(string) |
252 reading = findReading(string) |
253 return kanji + reading |
254 end |
255 |
256 # Return true if the dictionary contains this vocabulary. |
257 def include?(vocabulary) |
258 return findReading(vocabulary.reading).any? do |word| |
259 word.toVocab.eql?(vocabulary) |
260 end |
261 end |
262 |
263 # Return all the words that occur at the begining of reading |
264 def findReadingsThatStart(reading) |
265 findBinWithReading(reading[0..2]).find_all do |word| |
266 relevance = word.reading.size |
267 if reading.start_with?(word.reading) |
268 word.relevance = relevance |
269 true |
270 else |
271 false |
272 end |
273 end |
274 end |
275 |
276 # Return all the words that occur at the begining of kanji |
277 def findKanjiThatStart(kanji) |
278 findBinWithKanji(kanji[0..2]).find_all do |word| |
279 relevance = word.kanji.size |
280 if kanji.start_with?(word.kanji) |
281 word.relevance = relevance |
282 true |
283 else |
284 false |
285 end |
286 end |
287 end |
288 |
289 # Return all the words that occur at the begining of the string |
290 # These are sorted by size with the largest finds given first |
291 def findWordsThatStart(string) |
292 kanji = findKanjiThatStart(string) |
293 reading = findReadingsThatStart(string) |
294 return kanji + reading |
295 end |
296 end |
297 end |
Generated on Mon May 23 16:17:46 +0900 2011 with rcov 0.9.8