docs/classes/Ankusa/Classifier.html in ankusa-0.0.6 vs docs/classes/Ankusa/Classifier.html in ankusa-0.0.7

- old
+ new

@@ -3,11 +3,11 @@ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> - <title>Class: Ankusa::Classifier</title> + <title>Module: Ankusa::Classifier</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <meta http-equiv="Content-Script-Type" content="text/javascript" /> <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> <script type="text/javascript"> // <![CDATA[ @@ -47,11 +47,11 @@ <div id="classHeader"> <table class="header-table"> <tr class="top-aligned-row"> - <td><strong>Class</strong></td> + <td><strong>Module</strong></td> <td class="class-name-in-header">Ankusa::Classifier</td> </tr> <tr class="top-aligned-row"> <td><strong>In:</strong></td> <td> @@ -60,16 +60,10 @@ </a> <br /> </td> </tr> - <tr class="top-aligned-row"> - <td><strong>Parent:</strong></td> - <td> - Object - </td> - </tr> </table> </div> <!-- banner header --> <div id="bodyContent"> @@ -84,19 +78,16 @@ <div id="method-list"> <h3 class="section-bar">Methods</h3> <div class="name-list"> - <a href="#M000007">classifications</a>&nbsp;&nbsp; - <a href="#M000006">classify</a>&nbsp;&nbsp; - <a href="#M000010">doc_count_totals</a>&nbsp;&nbsp; - <a href="#M000009">get_word_probs</a>&nbsp;&nbsp; - <a href="#M000008">log_likelihoods</a>&nbsp;&nbsp; + <a href="#M000007">doc_count_totals</a>&nbsp;&nbsp; + <a href="#M000006">get_word_probs</a>&nbsp;&nbsp; <a href="#M000003">new</a>&nbsp;&nbsp; <a href="#M000004">train</a>&nbsp;&nbsp; <a href="#M000005">untrain</a>&nbsp;&nbsp; - <a href="#M000011">vocab_sizes</a>&nbsp;&nbsp; + <a href="#M000008">vocab_sizes</a>&nbsp;&nbsp; </div> </div> </div> @@ -154,109 +145,10 @@ </div> </div> <h3 class="section-bar">Public Instance methods</h3> - <div id="method-M000007" class="method-detail"> - <a name="M000007"></a> - - <div class="method-heading"> - <a href="#M000007" class="method-signature"> - <span class="method-name">classifications</span><span class="method-args">(text, classnames=nil)</span> - </a> - </div> - - <div class="method-description"> - <p> -Classes is an array of classes to look at -</p> - <p><a class="source-toggle" href="#" - onclick="toggleCode('M000007-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000007-source"> -<pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 53</span> - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword kw">nil</span>) - <span class="ruby-identifier">result</span> = <span class="ruby-identifier">log_likelihoods</span> <span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span> - <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> - <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-constant">Math</span>.<span class="ruby-identifier">exp</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] - } - - <span class="ruby-comment cmt"># normalize to get probs</span> - <span class="ruby-identifier">sum</span> = <span class="ruby-identifier">result</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> } - <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">/</span> <span class="ruby-identifier">sum</span> } - <span class="ruby-identifier">result</span> - <span class="ruby-keyword kw">end</span> -</pre> - </div> - </div> - </div> - - <div id="method-M000006" class="method-detail"> - <a name="M000006"></a> - - <div class="method-heading"> - <a href="#M000006" class="method-signature"> - <span class="method-name">classify</span><span class="method-args">(text, classes=nil)</span> - </a> - </div> - - <div class="method-description"> - <p><a class="source-toggle" href="#" - onclick="toggleCode('M000006-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000006-source"> -<pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 47</span> - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>=<span class="ruby-keyword kw">nil</span>) - <span class="ruby-comment cmt"># return the most probable class</span> - <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span> - <span class="ruby-keyword kw">end</span> -</pre> - </div> - </div> - </div> - - <div id="method-M000008" class="method-detail"> - <a name="M000008"></a> - - <div class="method-heading"> - <a href="#M000008" class="method-signature"> - <span class="method-name">log_likelihoods</span><span class="method-args">(text, classnames=nil)</span> - </a> - </div> - - <div class="method-description"> - <p> -Classes is an array of classes to look at -</p> - <p><a class="source-toggle" href="#" - onclick="toggleCode('M000008-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000008-source"> -<pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 66</span> - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword kw">nil</span>) - <span class="ruby-identifier">classnames</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@classnames</span> - <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span> - - <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span> - <span class="ruby-identifier">probs</span> = <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>) - <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> (<span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>]) <span class="ruby-operator">*</span> <span class="ruby-identifier">count</span>) } - } - - <span class="ruby-comment cmt"># add the prior and exponentiate</span> - <span class="ruby-identifier">doc_counts</span> = <span class="ruby-identifier">doc_count_totals</span>.<span class="ruby-identifier">select</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> }.<span class="ruby-identifier">map</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">v</span> } - <span class="ruby-identifier">doc_count_total</span> = (<span class="ruby-identifier">doc_counts</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> } <span class="ruby-operator">+</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">length</span>).<span class="ruby-identifier">to_f</span> - <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> - <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>((<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">k</span>) <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">doc_count_total</span>) - } - - <span class="ruby-identifier">result</span> - <span class="ruby-keyword kw">end</span> -</pre> - </div> - </div> - </div> - <div id="method-M000004" class="method-detail"> <a name="M000004"></a> <div class="method-heading"> <a href="#M000004" class="method-signature"> @@ -330,48 +222,48 @@ </div> </div> <h3 class="section-bar">Protected Instance methods</h3> - <div id="method-M000010" class="method-detail"> - <a name="M000010"></a> + <div id="method-M000007" class="method-detail"> + <a name="M000007"></a> <div class="method-heading"> - <a href="#M000010" class="method-signature"> + <a href="#M000007" class="method-signature"> <span class="method-name">doc_count_totals</span><span class="method-args">()</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" - onclick="toggleCode('M000010-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000010-source"> + onclick="toggleCode('M000007-source');return false;">[Source]</a></p> + <div class="method-source-code" id="M000007-source"> <pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 97</span> +<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 59</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_totals</span> <span class="ruby-ivar">@doc_count_totals</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">doc_count_totals</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> - <div id="method-M000009" class="method-detail"> - <a name="M000009"></a> + <div id="method-M000006" class="method-detail"> + <a name="M000006"></a> <div class="method-heading"> - <a href="#M000009" class="method-signature"> + <a href="#M000006" class="method-signature"> <span class="method-name">get_word_probs</span><span class="method-args">(word, classnames)</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" - onclick="toggleCode('M000009-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000009-source"> + onclick="toggleCode('M000006-source');return false;">[Source]</a></p> + <div class="method-source-code" id="M000006-source"> <pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 86</span> +<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 48</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>) <span class="ruby-identifier">probs</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">v</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> } <span class="ruby-identifier">vs</span> = <span class="ruby-identifier">vocab_sizes</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span> @@ -383,24 +275,24 @@ </pre> </div> </div> </div> - <div id="method-M000011" class="method-detail"> - <a name="M000011"></a> + <div id="method-M000008" class="method-detail"> + <a name="M000008"></a> <div class="method-heading"> - <a href="#M000011" class="method-signature"> + <a href="#M000008" class="method-signature"> <span class="method-name">vocab_sizes</span><span class="method-args">()</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" - onclick="toggleCode('M000011-source');return false;">[Source]</a></p> - <div class="method-source-code" id="M000011-source"> + onclick="toggleCode('M000008-source');return false;">[Source]</a></p> + <div class="method-source-code" id="M000008-source"> <pre> -<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 101</span> +<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 63</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">vocab_sizes</span> <span class="ruby-ivar">@vocab_sizes</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_vocabulary_sizes</span> <span class="ruby-keyword kw">end</span> </pre> </div> \ No newline at end of file