doc/CodeZauker/FileScanner.html in code_zauker-0.0.2 vs doc/CodeZauker/FileScanner.html in code_zauker-0.0.3
- old
+ new
@@ -4,11 +4,11 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>
Class: CodeZauker::FileScanner
- — Code Zauker 0.0.2 Documentation
+ — Code Zauker 0.0.3 Documentation
</title>
<link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" />
@@ -164,10 +164,35 @@
<li class="public ">
<span class="summary_signature">
+ <a href="#isearch-instance_method" title="#isearch (instance method)">- (Object) <strong>isearch</strong>(term) </a>
+
+
+
+ </span>
+
+
+
+
+
+
+
+
+ <span class="summary_desc"><div class='inline'>
+<h1>Do a case-insenitive search </h1>
+
+<p>using the special set of trigrams "trigram:ci:*" all downcase.</p>
+</div></span>
+
+</li>
+
+
+ <li class="public ">
+ <span class="summary_signature">
+
<a href="#load-instance_method" title="#load (instance method)">- (Object) <strong>load</strong>(filename, noReload = false) </a>
</span>
@@ -185,10 +210,52 @@
<li class="public ">
<span class="summary_signature">
+ <a href="#map_ids_to_files-instance_method" title="#map_ids_to_files (instance method)">- (Object) <strong>map_ids_to_files</strong>(fileIds) </a>
+
+
+
+ </span>
+
+
+
+
+
+
+
+
+ <span class="summary_desc"><div class='inline'></div></span>
+
+</li>
+
+
+ <li class="public ">
+ <span class="summary_signature">
+
+ <a href="#reindex-instance_method" title="#reindex (instance method)">- (Object) <strong>reindex</strong>(fileList) </a>
+
+
+
+ </span>
+
+
+
+
+
+
+
+
+ <span class="summary_desc"><div class='inline'></div></span>
+
+</li>
+
+
+ <li class="public ">
+ <span class="summary_signature">
+
<a href="#remove-instance_method" title="#remove (instance method)">- (Object) <strong>remove</strong>(filePaths = nil) </a>
</span>
@@ -284,20 +351,20 @@
<tr>
<td>
<pre class="lines">
-16
-17
-18
-19
-20
-21
-22</pre>
+80
+81
+82
+83
+84
+85
+86</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 16</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 80</span>
<span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
<span class='kw'>if</span> <span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>==</span><span class='kw'>nil</span>
<span class='ivar'>@redis</span><span class='op'>=</span><span class='const'>Redis</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
<span class='kw'>else</span>
@@ -327,26 +394,79 @@
<tr>
<td>
<pre class="lines">
-23
-24
-25</pre>
+89
+90
+91</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 23</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 89</span>
-<span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span>
+<span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_quit'>quit</span>
<span class='kw'>end</span></pre>
</td>
</tr>
</table>
</div>
<div class="method_details ">
+ <p class="signature " id="isearch-instance_method">
+
+ - (<tt>Object</tt>) <strong>isearch</strong>(term)
+
+
+
+</p><div class="docstring">
+ <div class="discussion">
+
+<h1>Do a case-insenitive search </h1>
+
+<p>using the special set of trigrams "trigram:ci:*" all downcase</p>
+
+
+ </div>
+</div>
+<div class="tags">
+
+
+</div><table class="source_code">
+ <tr>
+ <td>
+ <pre class="lines">
+
+
+246
+247
+248
+249
+250
+251
+252
+253
+254</pre>
+ </td>
+ <td>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 246</span>
+
+<span class='kw'>def</span> <span class='id identifier rubyid_isearch'>isearch</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='lparen'>(</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='id identifier rubyid_split_in_trigrams'>split_in_trigrams</span><span class='lparen'>(</span><span class='id identifier rubyid_termLowercase'>termLowercase</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram:ci</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
+ <span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
+ <span class='kw'>end</span>
+ <span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span>
+ <span class='kw'>return</span> <span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span>
+<span class='kw'>end</span></pre>
+ </td>
+ </tr>
+</table>
+</div>
+
+ <div class="method_details ">
<p class="signature " id="load-instance_method">
- (<tt>Object</tt>) <strong>load</strong>(filename, noReload = false)
@@ -355,76 +475,78 @@
<tr>
<td>
<pre class="lines">
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
-93
-94
-95
-96
-97
-98
-99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114</pre>
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 52</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 148</span>
<span class='kw'>def</span> <span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span>
<span class='comment'># Define my redis id...
</span> <span class='comment'># Already exists?...
</span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
@@ -434,26 +556,28 @@
<span class='comment'># BUG: Consider storing it at the END of the processing
</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span>
<span class='kw'>else</span>
<span class='kw'>if</span> <span class='id identifier rubyid_noReload'>noReload</span>
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>Already found </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> as id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> and NOT RELOADED</span><span class='tstring_end'>"</span></span>
- <span class='kw'>return</span> <span class='kw'>nil</span>
+ <span class='comment'>#puts "Already found #{filename} as id:#{fid} and NOT RELOADED"
+</span> <span class='kw'>return</span> <span class='kw'>nil</span>
<span class='kw'>end</span>
<span class='kw'>end</span>
<span class='comment'># fid is the set key!...
</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='op'>=</span><span class='int'>0</span>
<span class='comment'># TEST_LICENSE.txt: 3290 Total Scanned: 24628
</span> <span class='comment'># The ratio is below 13% of total trigrams are unique for very big files
</span> <span class='comment'># So we avoid a huge roundtrip to redis, and store the trigram on a memory-based set
</span> <span class='comment'># before sending it to redis. This avoid
</span> <span class='comment'># a lot of spourios work
</span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
- <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>r</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span>
+ <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>r</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span>
<span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span>
- <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='int'>6000</span>
- <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_l'>l</span><span class='op'>|</span>
+ <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='const'>TRIGRAM_DEFAULT_PUSH_SIZE</span>
+ <span class='id identifier rubyid_util'>util</span><span class='op'>=</span><span class='const'>Util</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='op'>|</span>
+ <span class='id identifier rubyid_l'>l</span><span class='op'>=</span> <span class='id identifier rubyid_util'>util</span><span class='period'>.</span><span class='id identifier rubyid_ensureUTF8'>ensureUTF8</span><span class='lparen'>(</span><span class='id identifier rubyid_lineNotUTF8'>lineNotUTF8</span><span class='rparen'>)</span>
<span class='comment'># Split each line into 3-char chunks, and store in a redis set
</span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span>
<span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span>
<span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
<span class='comment'># Avoid storing the 3space guy enterely
@@ -468,11 +592,11 @@
<span class='kw'>end</span>
<span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span>
<span class='comment'>#puts "#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}"
</span> <span class='kw'>end</span>
<span class='kw'>end</span>
- <span class='kw'>end</span>
+ <span class='rbrace'>}</span>
<span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>></span> <span class='int'>0</span>
<span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
<span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='kw'>nil</span>
<span class='comment'>#puts "Final push of #{s.length}"
@@ -480,21 +604,99 @@
<span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_scard'>scard</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sadd'>sadd</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='op'>=</span><span class='lparen'>(</span> <span class='lparen'>(</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>*</span><span class='float'>1.0</span><span class='rparen'>)</span> <span class='op'>/</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='rparen'>)</span><span class='op'>*</span> <span class='float'>100.0</span>
- <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'><</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>></span><span class='int'>75</span>
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='tstring_end'>"</span></span>
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'><</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>></span><span class='int'>75</span>
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> ?Binary</span><span class='tstring_end'>"</span></span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>></span><span class='int'>90</span> <span class='kw'>and</span> <span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>></span><span class='int'>70</span>
<span class='kw'>end</span>
<span class='kw'>return</span> <span class='kw'>nil</span>
<span class='kw'>end</span></pre>
</td>
</tr>
</table>
</div>
<div class="method_details ">
+ <p class="signature " id="map_ids_to_files-instance_method">
+
+ - (<tt>Object</tt>) <strong>map_ids_to_files</strong>(fileIds)
+
+
+
+</p><table class="source_code">
+ <tr>
+ <td>
+ <pre class="lines">
+
+
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237</pre>
+ </td>
+ <td>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 228</span>
+
+<span class='kw'>def</span> <span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
+ <span class='comment'># fscan:id2filename:#{fid}....
+</span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span>
+ <span class='id identifier rubyid_file_name'>file_name</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_file_name'>file_name</span><span class='rparen'>)</span> <span class='kw'>if</span> <span class='op'>!</span><span class='id identifier rubyid_file_name'>file_name</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
+ <span class='kw'>end</span>
+ <span class='comment'>#puts " ** Files found:#{filenames} from ids #{fileIds}"
+</span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span>
+<span class='kw'>end</span></pre>
+ </td>
+ </tr>
+</table>
+</div>
+
+ <div class="method_details ">
+ <p class="signature " id="reindex-instance_method">
+
+ - (<tt>Object</tt>) <strong>reindex</strong>(fileList)
+
+
+
+</p><table class="source_code">
+ <tr>
+ <td>
+ <pre class="lines">
+
+
+276
+277
+278
+279
+280
+281
+282</pre>
+ </td>
+ <td>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 276</span>
+
+<span class='kw'>def</span> <span class='id identifier rubyid_reindex'>reindex</span><span class='lparen'>(</span><span class='id identifier rubyid_fileList'>fileList</span><span class='rparen'>)</span>
+ <span class='comment'>#puts "Reindexing... #{fileList.length} files..."
+</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_current_file'>current_file</span> <span class='op'>|</span>
+ <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='lbracket'>[</span><span class='id identifier rubyid_current_file'>current_file</span><span class='rbracket'>]</span><span class='rparen'>)</span>
+ <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_current_file'>current_file</span><span class='comma'>,</span><span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span>
+ <span class='kw'>end</span>
+<span class='kw'>end</span></pre>
+ </td>
+ </tr>
+</table>
+</div>
+
+ <div class="method_details ">
<p class="signature " id="remove-instance_method">
- (<tt>Object</tt>) <strong>remove</strong>(filePaths = nil)
@@ -514,48 +716,49 @@
<tr>
<td>
<pre class="lines">
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197</pre>
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 163</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 297</span>
<span class='kw'>def</span> <span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
<span class='kw'>if</span> <span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>==</span><span class='kw'>nil</span>
<span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
<span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:*</span><span class='tstring_end'>"</span></span>
@@ -564,30 +767,31 @@
<span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
<span class='kw'>end</span>
<span class='kw'>else</span>
<span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='id identifier rubyid_filePaths'>filePaths</span>
<span class='kw'>end</span>
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>Files to remove from index...</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
-
- <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span>
+ <span class='comment'># puts "Files to remove from index...#{fileList.length}"
+</span> <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span>
<span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_smembers'>smembers</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='kw'>if</span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
<span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>?Nothing to do on </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='kw'>end</span>
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_content'> Expurging...</span><span class='tstring_end'>"</span></span>
<span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_ts'>ts</span> <span class='op'>|</span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
<span class='kw'>begin</span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram:ci:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span>
- <span class='kw'>rescue</span> <span class='const'>ArgumentError</span>
+ <span class='comment'>#putc "."
+</span> <span class='kw'>rescue</span> <span class='const'>ArgumentError</span>
<span class='comment'># Ignore "ArgumentError: invalid byte sequence in UTF-8"
</span> <span class='comment'># and proceed...
</span> <span class='kw'>end</span>
<span class='kw'>end</span>
-
- <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
+ <span class='comment'>#putc "\n"
+</span>
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span>
<span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='id identifier rubyid_filename'>filename</span>
<span class='kw'>end</span>
<span class='kw'>return</span> <span class='kw'>nil</span>
<span class='kw'>end</span></pre>
</td>
@@ -617,19 +821,33 @@
<tr>
<td>
<pre class="lines">
-158
-159
-160</pre>
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 158</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 285</span>
<span class='kw'>def</span> <span class='id identifier rubyid_removeAll'>removeAll</span><span class='lparen'>(</span><span class='rparen'>)</span>
- <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='kw'>nil</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
+ <span class='id identifier rubyid_tokill'>tokill</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:*</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_tokill'>tokill</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='op'>*</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram*</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rparen'>)</span>
+ <span class='id identifier rubyid_tokill'>tokill</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_x'>x</span> <span class='op'>|</span>
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='id identifier rubyid_x'>x</span>
+ <span class='comment'>#puts "Deleted #x"
+</span> <span class='kw'>end</span>
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>"</span></span>
<span class='kw'>end</span></pre>
</td>
</tr>
</table>
</div>
@@ -659,71 +877,43 @@
<tr>
<td>
<pre class="lines">
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147</pre>
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274</pre>
</td>
<td>
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 119</span>
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 260</span>
<span class='kw'>def</span> <span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span>
<span class='kw'>if</span> <span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'><</span> <span class='const'>GRAM_SIZE</span>
<span class='id identifier rubyid_raise'>raise</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>FATAL: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_term'>term</span><span class='rbrace'>}</span><span class='tstring_content'> is shorter then the minimum size of </span><span class='embexpr_beg'>#{</span><span class='const'>GRAM_SIZE</span><span class='rbrace'>}</span><span class='tstring_content'> character</span><span class='tstring_end'>"</span></span>
<span class='kw'>end</span>
<span class='comment'>#puts " ** Searching: #{term}"
-</span> <span class='comment'># split the term in a padded trigram
-</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
- <span class='comment'># Search=> Sea AND ear AND arc AND rch
-</span> <span class='kw'>for</span> <span class='id identifier rubyid_j'>j</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span>
- <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='comma'>,</span><span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
- <span class='kw'>if</span> <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'><</span><span class='const'>GRAM_SIZE</span>
- <span class='comment'># We are at the end...
-</span> <span class='kw'>break</span>
- <span class='kw'>end</span>
- <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
- <span class='kw'>end</span>
+</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='id identifier rubyid_split_in_trigrams'>split_in_trigrams</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>trigram</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
<span class='comment'>#puts "Trigam conversion /#{term}/ into #{trigramInAnd}"
</span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
<span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
<span class='kw'>end</span>
<span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span>
- <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
- <span class='comment'># fscan:id2filename:#{fid}....
-</span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span>
- <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span><span class='rparen'>)</span>
- <span class='kw'>end</span>
- <span class='comment'>#puts " ** Files found:#{filenames} from ids #{fileIds}"
-</span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span>
+ <span class='id identifier rubyid_fileNames'>fileNames</span><span class='op'>=</span><span class='id identifier rubyid_map_ids_to_files'>map_ids_to_files</span><span class='lparen'>(</span><span class='id identifier rubyid_fileIds'>fileIds</span><span class='rparen'>)</span>
+ <span class='comment'>#puts "DEBUG #{fileIds} #{fileNames}"
+</span> <span class='kw'>return</span> <span class='id identifier rubyid_fileNames'>fileNames</span>
<span class='kw'>end</span></pre>
</td>
</tr>
</table>
</div>
@@ -731,10 +921,10 @@
</div>
</div>
<div id="footer">
- Generated on Fri Jan 27 14:54:06 2012 by
+ Generated on Fri Feb 3 17:18:44 2012 by
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
0.7.4 (ruby-1.9.3).
</div>
</body>
\ No newline at end of file