doc/CodeZauker/FileScanner.html in code_zauker-0.0.2 vs doc/CodeZauker/FileScanner.html in code_zauker-0.0.3

- old
+ new

@@ -4,11 +4,11 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title> Class: CodeZauker::FileScanner - &mdash; Code Zauker 0.0.2 Documentation + &mdash; Code Zauker 0.0.3 Documentation </title> <link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" /> @@ -164,10 +164,35 @@ <li class="public "> <span class="summary_signature"> + <a href="#isearch-instance_method" title="#isearch (instance method)">- (Object) <strong>isearch</strong>(term) </a> + + + + </span> + + + + + + + + + <span class="summary_desc"><div class='inline'> +<h1>Do a case-insenitive search </h1> + +<p>using the special set of trigrams "trigram:ci:*" all downcase.</p> +</div></span> + +</li> + + + <li class="public "> + <span class="summary_signature"> + <a href="#load-instance_method" title="#load (instance method)">- (Object) <strong>load</strong>(filename, noReload = false) </a> </span> @@ -185,10 +210,52 @@ <li class="public "> <span class="summary_signature"> + <a href="#map_ids_to_files-instance_method" title="#map_ids_to_files (instance method)">- (Object) <strong>map_ids_to_files</strong>(fileIds) </a> + + + + </span> + + + + + + + + + <span class="summary_desc"><div class='inline'></div></span> + +</li> + + + <li class="public "> + <span class="summary_signature"> + + <a href="#reindex-instance_method" title="#reindex (instance method)">- (Object) <strong>reindex</strong>(fileList) </a> + + + + </span> + + + + + + + + + <span class="summary_desc"><div class='inline'></div></span> + +</li> + + + <li class="public "> + <span class="summary_signature"> + <a href="#remove-instance_method" title="#remove (instance method)">- (Object) <strong>remove</strong>(filePaths = nil) </a> </span> @@ -284,20 +351,20 @@ <tr> <td> <pre class="lines"> -16 -17 -18 -19 -20 -21 -22</pre> +80 +81 +82 +83 +84 +85 +86</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 16</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 80</span> <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>==</span><span class='kw'>nil</span> <span class='ivar'>@redis</span><span class='op'>=</span><span class='const'>Redis</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span> <span class='kw'>else</span> @@ -327,26 +394,79 @@ <tr> <td> <pre class="lines"> -23 -24 -25</pre> +89 +90 +91</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 23</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 89</span> -<span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span> +<span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_quit'>quit</span> <span class='kw'>end</span></pre> </td> </tr> </table> </div> <div class="method_details "> + <p class="signature " id="isearch-instance_method"> + + - (<tt>Object</tt>) <strong>isearch</strong>(term) + + + +</p><div class="docstring"> + <div class="discussion"> + +<h1>Do a case-insenitive search </h1> + +<p>using the special set of trigrams "trigram:ci:*" all downcase</p> + + + </div> +</div> +<div class="tags"> + + +</div><table class="source_code"> + <tr> + <td> + <pre class="lines"> + + +246 +247 +248 +249 +250 +251 +252 +253 +254</pre> + </td> + <td> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 246</span> + +<span class='kw'>def</span> <span class='id identifier rubyid_isearch'>isearch</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span> + <span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='lparen'>(</span><span class='rparen'>)</span> + <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='id identifier rubyid_split_in_trigrams'>split_in_trigrams</span><span class='lparen'>(</span><span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:ci</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> + <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span> + <span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span> + <span class='kw'>end</span> + <span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span> + <span class='kw'>return</span> <span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span> +<span class='kw'>end</span></pre> + </td> + </tr> +</table> +</div> + + <div class="method_details "> <p class="signature " id="load-instance_method"> - (<tt>Object</tt>) <strong>load</strong>(filename, noReload = false) @@ -355,76 +475,78 @@ <tr> <td> <pre class="lines"> -52 -53 -54 -55 -56 -57 -58 -59 -60 -61 -62 -63 -64 -65 -66 -67 -68 -69 -70 -71 -72 -73 -74 -75 -76 -77 -78 -79 -80 -81 -82 -83 -84 -85 -86 -87 -88 -89 -90 -91 -92 -93 -94 -95 -96 -97 -98 -99 -100 -101 -102 -103 -104 -105 -106 -107 -108 -109 -110 -111 -112 -113 -114</pre> +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 52</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 148</span> <span class='kw'>def</span> <span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span> <span class='comment'># Define my redis id... </span> <span class='comment'># Already exists?... </span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> @@ -434,26 +556,28 @@ <span class='comment'># BUG: Consider storing it at the END of the processing </span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span> <span class='kw'>else</span> <span class='kw'>if</span> <span class='id identifier rubyid_noReload'>noReload</span> - <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Already found </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> as id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> and NOT RELOADED</span><span class='tstring_end'>&quot;</span></span> - <span class='kw'>return</span> <span class='kw'>nil</span> + <span class='comment'>#puts &quot;Already found #{filename} as id:#{fid} and NOT RELOADED&quot; +</span> <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>end</span> <span class='kw'>end</span> <span class='comment'># fid is the set key!... </span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='op'>=</span><span class='int'>0</span> <span class='comment'># TEST_LICENSE.txt: 3290 Total Scanned: 24628 </span> <span class='comment'># The ratio is below 13% of total trigrams are unique for very big files </span> <span class='comment'># So we avoid a huge roundtrip to redis, and store the trigram on a memory-based set </span> <span class='comment'># before sending it to redis. This avoid </span> <span class='comment'># a lot of spourios work </span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span> - <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span> + <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span> <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span> - <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='int'>6000</span> - <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_l'>l</span><span class='op'>|</span> + <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='const'>TRIGRAM_DEFAULT_PUSH_SIZE</span> + <span class='id identifier rubyid_util'>util</span><span class='op'>=</span><span class='const'>Util</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span> + <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='op'>|</span> + <span class='id identifier rubyid_l'>l</span><span class='op'>=</span> <span class='id identifier rubyid_util'>util</span><span class='period'>.</span><span class='id identifier rubyid_ensureUTF8'>ensureUTF8</span><span class='lparen'>(</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='rparen'>)</span> <span class='comment'># Split each line into 3-char chunks, and store in a redis set </span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span> <span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span> <span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span> <span class='comment'># Avoid storing the 3space guy enterely @@ -468,11 +592,11 @@ <span class='kw'>end</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span> <span class='comment'>#puts &quot;#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}&quot; </span> <span class='kw'>end</span> <span class='kw'>end</span> - <span class='kw'>end</span> + <span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='int'>0</span> <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='kw'>nil</span> <span class='comment'>#puts &quot;Final push of #{s.length}&quot; @@ -480,21 +604,99 @@ <span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_scard'>scard</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sadd'>sadd</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='op'>=</span><span class='lparen'>(</span> <span class='lparen'>(</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>*</span><span class='float'>1.0</span><span class='rparen'>)</span> <span class='op'>/</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='rparen'>)</span><span class='op'>*</span> <span class='float'>100.0</span> - <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&lt;</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&gt;</span><span class='int'>75</span> - <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='tstring_end'>&quot;</span></span> + <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&lt;</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&gt;</span><span class='int'>75</span> + <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> ?Binary</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&gt;</span><span class='int'>90</span> <span class='kw'>and</span> <span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>&gt;</span><span class='int'>70</span> <span class='kw'>end</span> <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>end</span></pre> </td> </tr> </table> </div> <div class="method_details "> + <p class="signature " id="map_ids_to_files-instance_method"> + + - (<tt>Object</tt>) <strong>map_ids_to_files</strong>(fileIds) + + + +</p><table class="source_code"> + <tr> + <td> + <pre class="lines"> + + +228 +229 +230 +231 +232 +233 +234 +235 +236 +237</pre> + </td> + <td> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 228</span> + +<span class='kw'>def</span> <span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span> + <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> + <span class='comment'># fscan:id2filename:#{fid}.... +</span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span> + <span class='id identifier rubyid_file_name'>file_name</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> + <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_file_name'>file_name</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='op'>!</span><span class='id identifier rubyid_file_name'>file_name</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> + <span class='kw'>end</span> + <span class='comment'>#puts &quot; ** Files found:#{filenames} from ids #{fileIds}&quot; +</span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span> +<span class='kw'>end</span></pre> + </td> + </tr> +</table> +</div> + + <div class="method_details "> + <p class="signature " id="reindex-instance_method"> + + - (<tt>Object</tt>) <strong>reindex</strong>(fileList) + + + +</p><table class="source_code"> + <tr> + <td> + <pre class="lines"> + + +276 +277 +278 +279 +280 +281 +282</pre> + </td> + <td> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 276</span> + +<span class='kw'>def</span> <span class='id identifier rubyid_reindex'>reindex</span><span class='lparen'>(</span><span class='id identifier rubyid_fileList'>fileList</span><span class='rparen'>)</span> + <span class='comment'>#puts &quot;Reindexing... #{fileList.length} files...&quot; +</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_current_file'>current_file</span> <span class='op'>|</span> + <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='lbracket'>[</span><span class='id identifier rubyid_current_file'>current_file</span><span class='rbracket'>]</span><span class='rparen'>)</span> + <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_current_file'>current_file</span><span class='comma'>,</span><span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span> + <span class='kw'>end</span> +<span class='kw'>end</span></pre> + </td> + </tr> +</table> +</div> + + <div class="method_details "> <p class="signature " id="remove-instance_method"> - (<tt>Object</tt>) <strong>remove</strong>(filePaths = nil) @@ -514,48 +716,49 @@ <tr> <td> <pre class="lines"> -163 -164 -165 -166 -167 -168 -169 -170 -171 -172 -173 -174 -175 -176 -177 -178 -179 -180 -181 -182 -183 -184 -185 -186 -187 -188 -189 -190 -191 -192 -193 -194 -195 -196 -197</pre> +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 163</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 297</span> <span class='kw'>def</span> <span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>==</span><span class='kw'>nil</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:*</span><span class='tstring_end'>&quot;</span></span> @@ -564,30 +767,31 @@ <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span> <span class='kw'>end</span> <span class='kw'>else</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='id identifier rubyid_filePaths'>filePaths</span> <span class='kw'>end</span> - <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Files to remove from index...</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> - - <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span> + <span class='comment'># puts &quot;Files to remove from index...#{fileList.length}&quot; +</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_smembers'>smembers</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span> <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>?Nothing to do on </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>end</span> - <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> + <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_content'> Expurging...</span><span class='tstring_end'>&quot;</span></span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_ts'>ts</span> <span class='op'>|</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span> <span class='kw'>begin</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:ci:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span> - <span class='kw'>rescue</span> <span class='const'>ArgumentError</span> + <span class='comment'>#putc &quot;.&quot; +</span> <span class='kw'>rescue</span> <span class='const'>ArgumentError</span> <span class='comment'># Ignore &quot;ArgumentError: invalid byte sequence in UTF-8&quot; </span> <span class='comment'># and proceed... </span> <span class='kw'>end</span> <span class='kw'>end</span> - - <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> + <span class='comment'>#putc &quot;\n&quot; +</span> + <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_filename'>filename</span> <span class='kw'>end</span> <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>end</span></pre> </td> @@ -617,19 +821,33 @@ <tr> <td> <pre class="lines"> -158 -159 -160</pre> +285 +286 +287 +288 +289 +290 +291 +292 +293 +294</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 158</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 285</span> <span class='kw'>def</span> <span class='id identifier rubyid_removeAll'>removeAll</span><span class='lparen'>(</span><span class='rparen'>)</span> - <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='kw'>nil</span><span class='rparen'>)</span> + <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> + <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:*</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> + <span class='id identifier rubyid_tokill'>tokill</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='op'>*</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram*</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rparen'>)</span> + <span class='id identifier rubyid_tokill'>tokill</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_x'>x</span> <span class='op'>|</span> + <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='id identifier rubyid_x'>x</span> + <span class='comment'>#puts &quot;Deleted #x&quot; +</span> <span class='kw'>end</span> + <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>end</span></pre> </td> </tr> </table> </div> @@ -659,71 +877,43 @@ <tr> <td> <pre class="lines"> -119 -120 -121 -122 -123 -124 -125 -126 -127 -128 -129 -130 -131 -132 -133 -134 -135 -136 -137 -138 -139 -140 -141 -142 -143 -144 -145 -146 -147</pre> +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274</pre> </td> <td> - <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 119</span> + <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 260</span> <span class='kw'>def</span> <span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span> <span class='const'>GRAM_SIZE</span> <span class='id identifier rubyid_raise'>raise</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>FATAL: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_term'>term</span><span class='rbrace'>}</span><span class='tstring_content'> is shorter then the minimum size of </span><span class='embexpr_beg'>#{</span><span class='const'>GRAM_SIZE</span><span class='rbrace'>}</span><span class='tstring_content'> character</span><span class='tstring_end'>&quot;</span></span> <span class='kw'>end</span> <span class='comment'>#puts &quot; ** Searching: #{term}&quot; -</span> <span class='comment'># split the term in a padded trigram -</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> - <span class='comment'># Search=&gt; Sea AND ear AND arc AND rch -</span> <span class='kw'>for</span> <span class='id identifier rubyid_j'>j</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> - <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='comma'>,</span><span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span> - <span class='kw'>if</span> <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span><span class='const'>GRAM_SIZE</span> - <span class='comment'># We are at the end... -</span> <span class='kw'>break</span> - <span class='kw'>end</span> - <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> - <span class='kw'>end</span> +</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='id identifier rubyid_split_in_trigrams'>split_in_trigrams</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='comment'>#puts &quot;Trigam conversion /#{term}/ into #{trigramInAnd}&quot; </span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span> <span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span> <span class='kw'>end</span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span> - <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span> - <span class='comment'># fscan:id2filename:#{fid}.... -</span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span> - <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='rparen'>)</span> - <span class='kw'>end</span> - <span class='comment'>#puts &quot; ** Files found:#{filenames} from ids #{fileIds}&quot; -</span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span> + <span class='id identifier rubyid_fileNames'>fileNames</span><span class='op'>=</span><span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span> + <span class='comment'>#puts &quot;DEBUG #{fileIds} #{fileNames}&quot; +</span> <span class='kw'>return</span> <span class='id identifier rubyid_fileNames'>fileNames</span> <span class='kw'>end</span></pre> </td> </tr> </table> </div> @@ -731,10 +921,10 @@ </div> </div> <div id="footer"> - Generated on Fri Jan 27 14:54:06 2012 by + Generated on Fri Feb 3 17:18:44 2012 by <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a> 0.7.4 (ruby-1.9.3). </div> </body> \ No newline at end of file