Name | Total Lines | Lines of Code | Total Coverage | Code Coverage |
---|---|---|---|---|
lib/jldrill/model/Tanaka.rb | 237 | 188 | 94.94%
|
94.15%
|
Code reported as executed by Ruby looks like this...and this: this line is also marked as covered.Lines considered as run by rcov, but not reported by Ruby, look like this,and this: these lines were inferred by rcov (using simple heuristics).Finally, here's a line marked as not executed.
1 require 'jldrill/model/DataFile' |
2 |
3 module JLDrill::Tanaka |
4 |
5 # Represents one of the words stored in the Tanaka library |
6 class Word |
7 attr_reader :contents |
8 |
9 def initialize(contents) |
10 @contents = contents |
11 end |
12 |
13 def Word.create(kanji, reading) |
14 contents = kanji |
15 if !reading.nil? |
16 contents += "(#{reading})" |
17 end |
18 return Word.new(contents) |
19 end |
20 |
21 def to_s |
22 @contents |
23 end |
24 |
25 def eql?(word) |
26 return @contents.eql?(word.contents) |
27 end |
28 |
29 def hash |
30 @contents.hash |
31 end |
32 end |
33 |
34 class Sentence |
35 |
36 RE = /^A: ([^\t]*)\t(.*)#ID=(.*)$/u |
37 WORD_RE = /^([^(\[{~]*)(\(([^)]*)\))?(\[([^\]]*)\])?(\{([^}]*)\})?(~)?/u |
38 |
39 attr_reader :kanji, :reading, :sense, :actual, :checked |
40 |
41 def initialize(data, wordData) |
42 @data = data |
43 @wordData = wordData |
44 parseWordData |
45 end |
46 |
47 def parseWordData |
48 if WORD_RE.match(@wordData) |
49 @kanji = $1 |
50 @reading = $3 |
51 if !$5.nil? |
52 @sense = $5.to_i |
53 else |
54 @sense = 0 |
55 end |
56 @actual = $7 |
57 @checked = $8.eql?("~") |
58 end |
59 end |
60 |
61 def english |
62 retVal = "" |
63 if RE.match(@data) |
64 retVal = $2 |
65 end |
66 return retVal |
67 end |
68 |
69 def japanese |
70 retVal = "" |
71 if RE.match(@data) |
72 retVal = $1 |
73 end |
74 return retVal |
75 end |
76 |
77 def id |
78 retVal = "" |
79 if RE.match(@data) |
80 retVal = $3.to_i |
81 end |
82 return retVal |
83 end |
84 |
85 def word_to_s |
86 retVal = @kanji.to_s |
87 if !@reading.nil? |
88 retVal += "(#{@reading})" |
89 end |
90 if @sense != 0 |
91 retVal += "[#{@sense.to_s}]" |
92 end |
93 if !@actual.nil? |
94 retVal += "{#{@actual.to_s}}" |
95 end |
96 if @checked |
97 retVal += "~" |
98 end |
99 return retVal |
100 end |
101 |
102 def to_s |
103 return "#{self.id}: " + word_to_s + "\n\t#{self.japanese}\n\t#{self.english}" |
104 end |
105 end |
106 |
107 # Represents the results of searching the Tanaka reference library |
108 # It is composed of a list of sentences. |
109 class SearchResults |
110 |
111 attr_reader :sentences, :connections |
112 attr_writer :sentences, :connections |
113 |
114 def initialize(word, connections, sentences) |
115 @word = word |
116 @sentences = sentences |
117 @connections = connections |
118 end |
119 |
120 def getSentences |
121 retVal = [] |
122 if !@connections.nil? |
123 wordData = getWordData |
124 @connections.each_with_index do |connection, i| |
125 retVal.push(Sentence.new(@sentences[connection], wordData[i])) |
126 end |
127 end |
128 return retVal |
129 end |
130 |
131 def findWord(connection) |
132 connection.split(" ").each do |word| |
133 if word.start_with?(@word) |
134 return word |
135 end |
136 end |
137 return "" |
138 end |
139 |
140 def getWordData |
141 wordData = [] |
142 @connections.each_with_index do |connection, i| |
143 wordData.push(findWord(@sentences[connection + 1])) |
144 end |
145 return wordData |
146 end |
147 |
148 end |
149 |
150 # Represents the Tanaka reference library |
151 class Reference < JLDrill::DataFile |
152 |
153 attr_reader :words |
154 attr_writer :words |
155 |
156 A_RE = /^A:/ |
157 B_RE = /^B: (.*)/ |
158 WORD_RE = /([^{(\[~]*(\([^)]*\))?)/u |
159 |
160 def initialize() |
161 super |
162 @sentences = 0 |
163 @words = {} |
164 @stepSize = 1000 |
165 end |
166 |
167 def numSentences |
168 dataSize |
169 end |
170 |
171 def numWords |
172 return @words.keys.size |
173 end |
174 |
175 def addWord(word, pos) |
176 if WORD_RE.match(word) |
177 (@words[$1] ||= []).push(pos) |
178 end |
179 end |
180 |
181 def parseLines(aLine, bLine, pos) |
182 success = false |
183 if A_RE.match(aLine) |
184 if B_RE.match(bLine) |
185 @sentences += 1 |
186 w = $1.split(' ') |
187 w.each do |word| |
188 addWord(word, pos) |
189 end |
190 success = true |
191 end |
192 end |
193 return success |
194 end |
195 |
196 def dataSize |
197 @sentences |
198 end |
199 |
200 def parseEntry |
201 if parseLines(@lines[@parsed], @lines[@parsed + 1], @parsed) |
202 @parsed += 2 |
203 # As long as a single line gets parsed it is a success |
204 else |
205 @parsed += 1 |
206 end |
207 end |
208 |
209 def search(kanji, reading) |
210 word = nil |
211 if !kanji.nil? |
212 word = Word.create(kanji, reading).to_s |
213 connections = @words[word] |
214 if connections.nil? |
215 # The corpus only uses readings to disambiguate |
216 # kanji. Most words don't have readings. So |
217 # if we don't find anything, search again without |
218 # the reading. |
219 word = Word.create(kanji, nil).to_s |
220 connections = @words[word] |
221 end |
222 else |
223 # When there is no kanji, use the reading as the kanji |
224 word = Word.create(reading, nil).to_s |
225 connections = @words[word] |
226 end |
227 |
228 return SearchResults.new(word, connections, @lines).getSentences |
229 end |
230 |
231 # Don't erase @lines because we need them later |
232 def finishParsing |
233 setLoaded(true) |
234 end |
235 |
236 end |
237 end |
Generated on Mon May 23 16:17:46 +0900 2011 with rcov 0.9.8