lib/jldrill/model/items/JEDictionary.rb

Name	Total Lines	Lines of Code	Total Coverage	Code Coverage
lib/jldrill/model/items/JEDictionary.rb	`297`	`227`	`85.19%`	`80.62%`

Key

Code reported as executed by Ruby looks like this...and this: this line is also marked as covered.Lines considered as run by rcov, but not reported by Ruby, look like this,and this: these lines were inferred by rcov (using simple heuristics).Finally, here's a line marked as not executed.

Coverage Details

1 # -*- coding: utf-8 -*-

3 require 'jldrill/model/items/JWord'

4 require 'jldrill/model/DataFile'

5 require "jldrill/model/items/Vocabulary"

6 require "jldrill/model/items/edict/Meaning"

7 require 'Context/Log'

8 require 'kconv'

10 module JLDrill

12     # A JEDictionary is a Japanese to English Dictionary.

13     # It is composed of an array of entries from an EDict

14     # dictionary. These entries are parsed to create JWords.

15     # The JWords can then further parse the entries to

16     # create Meanings.

17 	class JEDictionary < DataFile

18         attr_reader :jWords

20         LINE_RE_TEXT = '^([^\[\s]*)\s+(\[(.*)\]\s+)?\/(([^\/]*\/)+)\s*$'

21         LINE_RE = Regexp.new(LINE_RE_TEXT)

22         GET_JWORD_RE = Regexp.new('^([^\[\s]*)\s+(\[(.*)\]\s+)?')

23         KANA_RE = /（(.*)）/

24         FIRST_CHAR_RE = Regexp.new("^(.)", "U")

26         def initialize

27             super

28             @stepSize = 1000

29         end

31         # Reset the dictionary back to empty

32         def reset

33             @jWords = []

34             @readingHash = {}

35             @kanjiHash = {}

36             super

37         end

39         # The number of items we have indexed in the dictionary.

40         def dataSize

41             return @jWords.size

42         end

44         def length

45             return dataSize

46         end

48         # Returns true if the line at the given index is UTF8

49         def isUTF8?(index)

50             !Kconv.isutf8(@lines[index]).nil?

51         end

53         def getMeaning(position)

54             retVal = ""

55             if lines[position] =~ LINE_RE

56                 retVal = $4

57             end

58             return retVal

59         end

61         # Parse the line at the given position and return the a Vocabulary

62         # containing the information (this is deprecated).

63         def getVocab(position)

64             retVal = nil

65             if lines[position] =~ LINE_RE

66                 kanji = $1

67                 reading = $3

68                 english = JLDrill::Meaning.create($4)

70                 # Hack for JLPT files

71                 if reading =~ KANA_RE

72                     reading = nil

73                     hint = $1

74                 end

76                 if(reading == "" || reading == nil)

77                     reading = kanji

78                     kanji = nil

79                 end

81                 retVal = Vocabulary.new(kanji, reading, english.allDefinitions,

82                                    english.allTypes, hint, position)

83             else

84                 Context::Log::warning("JLDrill::JEDictionary",

85                                       "Could not parse #{position}")

86             end

87             return retVal

88         end

90         # modifies the line at position to be UTF8

91         def toUTF8(position)

92             lines[position] = NKF.nkf("-Ewxm0", lines[position])

93         end

95         # Read all the lines into the buffer.

96         # This method also converts them the UTF8

97         def readLines

98             super

99         end

101         # Compensate for files that have missing kanji or

102         # JLPT files which have a strange format.

103         def hackWord(word)

104             # Hack for JLPT files

105             if word.reading =~ KANA_RE || word.reading.nil? ||

106                     word.reading.empty?

107                 word.reading = word.kanji

108                 word.kanji = ""

109             end

110             if word.kanji.nil?

111                 word.kanji = ""

112             end

113             return word

114         end

116         # Has the word in both the reading and kanji hashes so that

117         # we can find them quickly.

118         def hashWord(word)

119             # UTF8 kanji and kana characters are usually 3 bytes each.

120             # We will hash on the first character.

121             (@readingHash[word.reading[0..2]] ||= []).push(word)

122             (@kanjiHash[word.kanji[0..2]] ||= []).push(word)

123         end

125         def parseLine(index)

126             if !isUTF8?(index)

127                 toUTF8(index)

128             end

129             if lines[index] =~ GET_JWORD_RE

130                 word = JWord.new

131                 word.kanji = $1

132                 word.reading = $3

133                 word.dictionary = self

134                 word.position = index

135                 @jWords[@jWords.size] = word

136                 word = hackWord(word)

137                 hashWord(word)

138             end

139         end

141         def vocab(index)

142             word = @jWords[index]

143             if !word.nil?

144                 return word.toVocab

145             else

146                 return nil

147             end

148         end

150         def eachVocab(&block)

151             @jWords.each do |word|

152                 block.call(word.toVocab)

153             end

154         end

156         # Create the indeces for the item at the current line.

157         def parseEntry

158             parseLine(@parsed)

159             @parsed += 1

160         end

162         # This is what to do when we are finished parsing.

163         def finishParsing

164             # Don't reset the lines because we need them later

165             setLoaded(true)

166         end

168         # Find the items that may have been hashed with this reading.

169         def findBinWithReading(reading)

170             if reading.size >= 3

171                 bin = (@readingHash[reading[0..2]] ||= [])

172             else

173                 keys = @readingHash.keys.find_all do |key|

174                     key.start_with?(reading)

175                 end

176                 bin = []

177                 keys.each do |key|

178                     bin += @readingHash[key]

179                 end

180             end

181             return bin

182         end

184         # Find the items that may have been hashed with this kanji.

185         def findBinWithKanji(kanji)

186             if kanji.size >= 3

187                 bin = (@kanjiHash[kanji[0..2]] ||= [])

188             else

189                 keys = @kanjiHash.keys.find_all do |key|

190                     key.start_with?(kanji)

191                 end

192                 bin = []

193                 keys.each do |key|

194                     bin += @kanjiHash[key]

195                 end

196             end

197             return bin

198         end

200         # Return all the JWords that have a reading starting with reading.

201         def findReadingsStartingWith(reading)

202             bin = findBinWithReading(reading)

203             if reading.size > 3

204                 return bin.find_all do |word|

205                     word.reading.start_with?(reading)

206                 end

207             else

208                 return bin

209             end

210         end

212         # Return all the JWords that have kanji starting with kanji.

213         def findKanjiStartingWith(kanji)

214             bin = findBinWithKanji(kanji)

215             if kanji.size > 3

216                 return bin.find_all do |word|

217                     word.kanji.start_with?(kanji)

218                 end

219             else

220                 return bin

221             end

222         end

224         # Return all the JWords that have the reading, reading.

225         def findReading(reading)

226             relevance = reading.size

227             return findBinWithReading(reading).find_all do |word|

228                 if word.reading.eql?(reading)

229                     word.relevance = relevance

230                     true

231                 else

232                     false

233                 end

234             end

235         end

237         # Return all the JWords that have the kanji, kanji.

238         def findKanji(kanji)

239             relevance = kanji.size

240             return findBinWithKanji(kanji).find_all do |word|

241                 if word.kanji.eql?(kanji)

242                     word.relevance = relevance

243                     true

244                 else

245                     false

246                 end

247             end

248         end

250         def findWord(string)

251             kanji = findKanji(string)

252             reading = findReading(string)

253             return kanji + reading

254         end

256         # Return true if the dictionary contains this vocabulary.

257         def include?(vocabulary)

258             return findReading(vocabulary.reading).any? do |word|

259                 word.toVocab.eql?(vocabulary)

260             end

261         end

263         # Return all the words that occur at the begining of reading

264         def findReadingsThatStart(reading)

265             findBinWithReading(reading[0..2]).find_all do |word|

266                 relevance = word.reading.size

267                 if reading.start_with?(word.reading)

268                     word.relevance = relevance

269                     true

270                 else

271                     false

272                 end

273             end

274         end

276         # Return all the words that occur at the begining of kanji

277         def findKanjiThatStart(kanji)

278             findBinWithKanji(kanji[0..2]).find_all do |word|

279                 relevance = word.kanji.size

280                 if kanji.start_with?(word.kanji)

281                     word.relevance = relevance

282                     true

283                 else

284                     false

285                 end

286             end

287         end

289         # Return all the words that occur at the begining of the string

290         # These are sorted by size with the largest finds given first

291         def findWordsThatStart(string)

292             kanji = findKanjiThatStart(string)

293             reading = findReadingsThatStart(string)

294             return kanji + reading

295         end

296     end

297 end

Generated on Mon May 23 16:17:46 +0900 2011 with rcov 0.9.8

Jldrill Git C0 Coverage Information - RCov

lib/jldrill/model/items/JEDictionary.rb

Key

Coverage Details