docs/classes/Ankusa/Classifier.html in ankusa-0.0.6 vs docs/classes/Ankusa/Classifier.html in ankusa-0.0.7
- old
+ new
@@ -3,11 +3,11 @@
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
- <title>Class: Ankusa::Classifier</title>
+ <title>Module: Ankusa::Classifier</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta http-equiv="Content-Script-Type" content="text/javascript" />
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
<script type="text/javascript">
// <![CDATA[
@@ -47,11 +47,11 @@
<div id="classHeader">
<table class="header-table">
<tr class="top-aligned-row">
- <td><strong>Class</strong></td>
+ <td><strong>Module</strong></td>
<td class="class-name-in-header">Ankusa::Classifier</td>
</tr>
<tr class="top-aligned-row">
<td><strong>In:</strong></td>
<td>
@@ -60,16 +60,10 @@
</a>
<br />
</td>
</tr>
- <tr class="top-aligned-row">
- <td><strong>Parent:</strong></td>
- <td>
- Object
- </td>
- </tr>
</table>
</div>
<!-- banner header -->
<div id="bodyContent">
@@ -84,19 +78,16 @@
<div id="method-list">
<h3 class="section-bar">Methods</h3>
<div class="name-list">
- <a href="#M000007">classifications</a>
- <a href="#M000006">classify</a>
- <a href="#M000010">doc_count_totals</a>
- <a href="#M000009">get_word_probs</a>
- <a href="#M000008">log_likelihoods</a>
+ <a href="#M000007">doc_count_totals</a>
+ <a href="#M000006">get_word_probs</a>
<a href="#M000003">new</a>
<a href="#M000004">train</a>
<a href="#M000005">untrain</a>
- <a href="#M000011">vocab_sizes</a>
+ <a href="#M000008">vocab_sizes</a>
</div>
</div>
</div>
@@ -154,109 +145,10 @@
</div>
</div>
<h3 class="section-bar">Public Instance methods</h3>
- <div id="method-M000007" class="method-detail">
- <a name="M000007"></a>
-
- <div class="method-heading">
- <a href="#M000007" class="method-signature">
- <span class="method-name">classifications</span><span class="method-args">(text, classnames=nil)</span>
- </a>
- </div>
-
- <div class="method-description">
- <p>
-Classes is an array of classes to look at
-</p>
- <p><a class="source-toggle" href="#"
- onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000007-source">
-<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 53</span>
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword kw">nil</span>)
- <span class="ruby-identifier">result</span> = <span class="ruby-identifier">log_likelihoods</span> <span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>
- <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span>
- <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-constant">Math</span>.<span class="ruby-identifier">exp</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>]
- }
-
- <span class="ruby-comment cmt"># normalize to get probs</span>
- <span class="ruby-identifier">sum</span> = <span class="ruby-identifier">result</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> }
- <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">/</span> <span class="ruby-identifier">sum</span> }
- <span class="ruby-identifier">result</span>
- <span class="ruby-keyword kw">end</span>
-</pre>
- </div>
- </div>
- </div>
-
- <div id="method-M000006" class="method-detail">
- <a name="M000006"></a>
-
- <div class="method-heading">
- <a href="#M000006" class="method-signature">
- <span class="method-name">classify</span><span class="method-args">(text, classes=nil)</span>
- </a>
- </div>
-
- <div class="method-description">
- <p><a class="source-toggle" href="#"
- onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000006-source">
-<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 47</span>
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>=<span class="ruby-keyword kw">nil</span>)
- <span class="ruby-comment cmt"># return the most probable class</span>
- <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
- <span class="ruby-keyword kw">end</span>
-</pre>
- </div>
- </div>
- </div>
-
- <div id="method-M000008" class="method-detail">
- <a name="M000008"></a>
-
- <div class="method-heading">
- <a href="#M000008" class="method-signature">
- <span class="method-name">log_likelihoods</span><span class="method-args">(text, classnames=nil)</span>
- </a>
- </div>
-
- <div class="method-description">
- <p>
-Classes is an array of classes to look at
-</p>
- <p><a class="source-toggle" href="#"
- onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000008-source">
-<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 66</span>
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword kw">nil</span>)
- <span class="ruby-identifier">classnames</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@classnames</span>
- <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
-
- <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
- <span class="ruby-identifier">probs</span> = <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>)
- <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> (<span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>]) <span class="ruby-operator">*</span> <span class="ruby-identifier">count</span>) }
- }
-
- <span class="ruby-comment cmt"># add the prior and exponentiate</span>
- <span class="ruby-identifier">doc_counts</span> = <span class="ruby-identifier">doc_count_totals</span>.<span class="ruby-identifier">select</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> }.<span class="ruby-identifier">map</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">v</span> }
- <span class="ruby-identifier">doc_count_total</span> = (<span class="ruby-identifier">doc_counts</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> } <span class="ruby-operator">+</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">length</span>).<span class="ruby-identifier">to_f</span>
- <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span>
- <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>((<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">k</span>) <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">doc_count_total</span>)
- }
-
- <span class="ruby-identifier">result</span>
- <span class="ruby-keyword kw">end</span>
-</pre>
- </div>
- </div>
- </div>
-
<div id="method-M000004" class="method-detail">
<a name="M000004"></a>
<div class="method-heading">
<a href="#M000004" class="method-signature">
@@ -330,48 +222,48 @@
</div>
</div>
<h3 class="section-bar">Protected Instance methods</h3>
- <div id="method-M000010" class="method-detail">
- <a name="M000010"></a>
+ <div id="method-M000007" class="method-detail">
+ <a name="M000007"></a>
<div class="method-heading">
- <a href="#M000010" class="method-signature">
+ <a href="#M000007" class="method-signature">
<span class="method-name">doc_count_totals</span><span class="method-args">()</span>
</a>
</div>
<div class="method-description">
<p><a class="source-toggle" href="#"
- onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000010-source">
+ onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
+ <div class="method-source-code" id="M000007-source">
<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 97</span>
+<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 59</span>
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_totals</span>
<span class="ruby-ivar">@doc_count_totals</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">doc_count_totals</span>
<span class="ruby-keyword kw">end</span>
</pre>
</div>
</div>
</div>
- <div id="method-M000009" class="method-detail">
- <a name="M000009"></a>
+ <div id="method-M000006" class="method-detail">
+ <a name="M000006"></a>
<div class="method-heading">
- <a href="#M000009" class="method-signature">
+ <a href="#M000006" class="method-signature">
<span class="method-name">get_word_probs</span><span class="method-args">(word, classnames)</span>
</a>
</div>
<div class="method-description">
<p><a class="source-toggle" href="#"
- onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000009-source">
+ onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
+ <div class="method-source-code" id="M000006-source">
<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 86</span>
+<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 48</span>
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>)
<span class="ruby-identifier">probs</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">v</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> }
<span class="ruby-identifier">vs</span> = <span class="ruby-identifier">vocab_sizes</span>
<span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
@@ -383,24 +275,24 @@
</pre>
</div>
</div>
</div>
- <div id="method-M000011" class="method-detail">
- <a name="M000011"></a>
+ <div id="method-M000008" class="method-detail">
+ <a name="M000008"></a>
<div class="method-heading">
- <a href="#M000011" class="method-signature">
+ <a href="#M000008" class="method-signature">
<span class="method-name">vocab_sizes</span><span class="method-args">()</span>
</a>
</div>
<div class="method-description">
<p><a class="source-toggle" href="#"
- onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
- <div class="method-source-code" id="M000011-source">
+ onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
+ <div class="method-source-code" id="M000008-source">
<pre>
-<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 101</span>
+<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 63</span>
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">vocab_sizes</span>
<span class="ruby-ivar">@vocab_sizes</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_vocabulary_sizes</span>
<span class="ruby-keyword kw">end</span>
</pre>
</div>
\ No newline at end of file