doc/CodeZauker/FileScanner.html in code_zauker-0.0.3 vs doc/CodeZauker/FileScanner.html in code_zauker-0.0.4

- old
+ new

@@ -4,11 +4,11 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title> Class: CodeZauker::FileScanner - &mdash; Code Zauker 0.0.3 Documentation + &mdash; Code Zauker 0.0.4 Documentation </title> <link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" /> @@ -107,10 +107,12 @@ </div> + + <h2> Instance Method Summary <small>(<a href="#" class="summary_toggle">collapse</a>)</small> </h2> @@ -351,20 +353,20 @@ <tr> <td> <pre class="lines"> -80 -81 -82 -83 -84 -85 -86</pre> +112 +113 +114 +115 +116 +117 +118</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 80</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 112</span> <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>==</span><span class='kw'>nil</span> <span class='ivar'>@redis</span><span class='op'>=</span><span class='const'>Redis</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span> <span class='kw'>else</span> @@ -394,16 +396,16 @@ <tr> <td> <pre class="lines"> -89 -90 -91</pre> +121 +122 +123</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 89</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 121</span> <span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_quit'>quit</span> <span class='kw'>end</span></pre> </td> @@ -435,22 +437,22 @@ <tr> <td> <pre class="lines"> -246 -247 -248 -249 -250 -251 -252 -253 -254</pre> +282 +283 +284 +285 +286 +287 +288 +289 +290</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 246</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 282</span> <span class='kw'>def</span> <span class='id identifier rubyid_isearch'>isearch</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span> <span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='lparen'>(</span><span class='rparen'>)</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='id identifier rubyid_split_in_trigrams'>split_in_trigrams</span><span class='lparen'>(</span><span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:ci</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span> @@ -475,46 +477,10 @@ <tr> <td> <pre class="lines"> -148 -149 -150 -151 -152 -153 -154 -155 -156 -157 -158 -159 -160 -161 -162 -163 -164 -165 -166 -167 -168 -169 -170 -171 -172 -173 -174 -175 -176 -177 -178 -179 -180 -181 -182 -183 184 185 186 187 188 @@ -539,14 +505,50 @@ 207 208 209 210 211 -212</pre> +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 148</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 184</span> <span class='kw'>def</span> <span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span> <span class='comment'># Define my redis id... </span> <span class='comment'># Already exists?... </span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> @@ -568,36 +570,36 @@ </span> <span class='comment'># The ratio is below 13% of total trigrams are unique for very big files </span> <span class='comment'># So we avoid a huge roundtrip to redis, and store the trigram on a memory-based set </span> <span class='comment'># before sending it to redis. This avoid </span> <span class='comment'># a lot of spourios work </span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span> - <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span> - <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span> - <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='const'>TRIGRAM_DEFAULT_PUSH_SIZE</span> - <span class='id identifier rubyid_util'>util</span><span class='op'>=</span><span class='const'>Util</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span> - <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='op'>|</span> - <span class='id identifier rubyid_l'>l</span><span class='op'>=</span> <span class='id identifier rubyid_util'>util</span><span class='period'>.</span><span class='id identifier rubyid_ensureUTF8'>ensureUTF8</span><span class='lparen'>(</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='rparen'>)</span> - <span class='comment'># Split each line into 3-char chunks, and store in a redis set -</span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span> - <span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span> - <span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span> - <span class='comment'># Avoid storing the 3space guy enterely -</span> <span class='kw'>if</span> <span class='id identifier rubyid_trigram'>trigram</span><span class='op'>==</span><span class='const'>SPACE_GUY</span> - <span class='kw'>next</span> - <span class='kw'>end</span> - <span class='comment'># push the trigram to redis (highly optimized) -</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_trigram'>trigram</span><span class='rparen'>)</span> - <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span> - <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> - <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span> - <span class='kw'>end</span> - <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span> - <span class='comment'>#puts &quot;#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}&quot; -</span> <span class='kw'>end</span> - <span class='kw'>end</span> - <span class='rbrace'>}</span> + <span class='id identifier rubyid_util'>util</span><span class='op'>=</span><span class='const'>Util</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span> + <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_util'>util</span><span class='period'>.</span><span class='id identifier rubyid_get_lines'>get_lines</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> + <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='const'>TRIGRAM_DEFAULT_PUSH_SIZE</span> + <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='op'>|</span> + <span class='id identifier rubyid_l'>l</span><span class='op'>=</span> <span class='id identifier rubyid_util'>util</span><span class='period'>.</span><span class='id identifier rubyid_ensureUTF8'>ensureUTF8</span><span class='lparen'>(</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='rparen'>)</span> + <span class='comment'># Split each line into 3-char chunks, and store in a redis set +</span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span> + <span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span> + <span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span> + <span class='comment'># Avoid storing the 3space guy enterely +</span> <span class='kw'>if</span> <span class='id identifier rubyid_trigram'>trigram</span><span class='op'>==</span><span class='const'>SPACE_GUY</span> + <span class='kw'>next</span> + <span class='kw'>end</span> + <span class='comment'># push the trigram to redis (highly optimized) +</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_trigram'>trigram</span><span class='rparen'>)</span> + <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span> + <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> + <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span> + <span class='kw'>end</span> + <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span> + <span class='comment'>#puts &quot;#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}&quot; +</span> <span class='kw'>end</span> + <span class='kw'>end</span> + + <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='int'>0</span> <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='kw'>nil</span> <span class='comment'>#puts &quot;Final push of #{s.length}&quot; </span> <span class='kw'>end</span> @@ -627,23 +629,23 @@ <tr> <td> <pre class="lines"> -228 -229 -230 -231 -232 -233 -234 -235 -236 -237</pre> +264 +265 +266 +267 +268 +269 +270 +271 +272 +273</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 228</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 264</span> <span class='kw'>def</span> <span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span> <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> <span class='comment'># fscan:id2filename:#{fid}.... </span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span> @@ -669,20 +671,20 @@ <tr> <td> <pre class="lines"> -276 -277 -278 -279 -280 -281 -282</pre> +312 +313 +314 +315 +316 +317 +318</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 276</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 312</span> <span class='kw'>def</span> <span class='id identifier rubyid_reindex'>reindex</span><span class='lparen'>(</span><span class='id identifier rubyid_fileList'>fileList</span><span class='rparen'>)</span> <span class='comment'>#puts &quot;Reindexing... #{fileList.length} files...&quot; </span> <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_current_file'>current_file</span> <span class='op'>|</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='lbracket'>[</span><span class='id identifier rubyid_current_file'>current_file</span><span class='rbracket'>]</span><span class='rparen'>)</span> @@ -716,49 +718,49 @@ <tr> <td> <pre class="lines"> -297 -298 -299 -300 -301 -302 -303 -304 -305 -306 -307 -308 -309 -310 -311 -312 -313 -314 -315 -316 -317 -318 -319 -320 -321 -322 -323 -324 -325 -326 -327 -328 -329 -330 -331 -332</pre> +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 297</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 333</span> <span class='kw'>def</span> <span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>==</span><span class='kw'>nil</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:*</span><span class='tstring_end'>&quot;</span></span> @@ -821,23 +823,23 @@ <tr> <td> <pre class="lines"> -285 -286 -287 -288 -289 -290 -291 -292 -293 -294</pre> +321 +322 +323 +324 +325 +326 +327 +328 +329 +330</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 285</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 321</span> <span class='kw'>def</span> <span class='id identifier rubyid_removeAll'>removeAll</span><span class='lparen'>(</span><span class='rparen'>)</span> <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:*</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='id identifier rubyid_tokill'>tokill</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='op'>*</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram*</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rparen'>)</span> @@ -877,28 +879,28 @@ <tr> <td> <pre class="lines"> -260 -261 -262 -263 -264 -265 -266 -267 -268 -269 -270 -271 -272 -273 -274</pre> +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 260</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 296</span> <span class='kw'>def</span> <span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span> <span class='const'>GRAM_SIZE</span> <span class='id identifier rubyid_raise'>raise</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>FATAL: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_term'>term</span><span class='rbrace'>}</span><span class='tstring_content'> is shorter then the minimum size of </span><span class='embexpr_beg'>#{</span><span class='const'>GRAM_SIZE</span><span class='rbrace'>}</span><span class='tstring_content'> character</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>end</span> @@ -921,12 +923,12 @@ </div> </div> <div id="footer"> - Generated on Fri Feb 3 17:18:44 2012 by + Generated on Sun Feb 12 19:16:27 2012 by <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a> - 0.7.4 (ruby-1.9.3). + 0.7.5 (ruby-1.9.3). </div> </body> </html> \ No newline at end of file