<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <title>Class: Ankusa::Classifier</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <meta http-equiv="Content-Script-Type" content="text/javascript" /> <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> <script type="text/javascript"> // <![CDATA[ function popupCode( url ) { window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") } function toggleCode( id ) { if ( document.getElementById ) elem = document.getElementById( id ); else if ( document.all ) elem = eval( "document.all." + id ); else return false; elemStyle = elem.style; if ( elemStyle.display != "block" ) { elemStyle.display = "block" } else { elemStyle.display = "none" } return true; } // Make codeblocks hidden by default document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" ) // ]]> </script> </head> <body> <div id="classHeader"> <table class="header-table"> <tr class="top-aligned-row"> <td><strong>Class</strong></td> <td class="class-name-in-header">Ankusa::Classifier</td> </tr> <tr class="top-aligned-row"> <td><strong>In:</strong></td> <td> <a href="../../files/lib/ankusa/classifier_rb.html"> lib/ankusa/classifier.rb </a> <br /> </td> </tr> <tr class="top-aligned-row"> <td><strong>Parent:</strong></td> <td> Object </td> </tr> </table> </div> <!-- banner header --> <div id="bodyContent"> <div id="contextContent"> </div> <div id="method-list"> <h3 class="section-bar">Methods</h3> <div class="name-list"> <a href="#M000007">classifications</a> <a href="#M000006">classify</a> <a href="#M000008">get_word_probs</a> <a href="#M000003">new</a> <a href="#M000004">train</a> <a href="#M000005">untrain</a> </div> </div> </div> <!-- if includes --> <div id="section"> <div id="attribute-list"> <h3 class="section-bar">Attributes</h3> <div class="name-list"> <table> <tr class="top-aligned-row context-row"> <td class="context-item-name">classnames</td> <td class="context-item-value"> [R] </td> <td class="context-item-desc"></td> </tr> </table> </div> </div> <!-- if method_list --> <div id="methods"> <h3 class="section-bar">Public Class methods</h3> <div id="method-M000003" class="method-detail"> <a name="M000003"></a> <div class="method-heading"> <a href="#M000003" class="method-signature"> <span class="method-name">new</span><span class="method-args">(storage)</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" onclick="toggleCode('M000003-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000003-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 6</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">storage</span>) <span class="ruby-ivar">@storage</span> = <span class="ruby-identifier">storage</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">init_tables</span> <span class="ruby-ivar">@classnames</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">classnames</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> <h3 class="section-bar">Public Instance methods</h3> <div id="method-M000007" class="method-detail"> <a name="M000007"></a> <div class="method-heading"> <a href="#M000007" class="method-signature"> <span class="method-name">classifications</span><span class="method-args">(text)</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" onclick="toggleCode('M000007-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000007-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 44</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>) <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span> <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span> <span class="ruby-identifier">probs</span> = <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>) <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> (<span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>]) <span class="ruby-operator">*</span> <span class="ruby-identifier">count</span>) } } <span class="ruby-comment cmt"># add the prior and exponentiate</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">k</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">doc_count_total</span>.<span class="ruby-identifier">to_f</span>) <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-constant">Math</span>.<span class="ruby-identifier">exp</span>(<span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>]) } <span class="ruby-comment cmt"># normalize to get probs</span> <span class="ruby-identifier">sum</span> = <span class="ruby-identifier">result</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> } <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">/</span> <span class="ruby-identifier">sum</span> } <span class="ruby-identifier">result</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> <div id="method-M000006" class="method-detail"> <a name="M000006"></a> <div class="method-heading"> <a href="#M000006" class="method-signature"> <span class="method-name">classify</span><span class="method-args">(text)</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" onclick="toggleCode('M000006-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000006-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 39</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>) <span class="ruby-comment cmt"># return the most probable class</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> <div id="method-M000004" class="method-detail"> <a name="M000004"></a> <div class="method-heading"> <a href="#M000004" class="method-signature"> <span class="method-name">train</span><span class="method-args">(klass, text) {|word, count if block_given?| ...}</span> </a> </div> <div class="method-description"> <p> text can be either an array of strings or a string klass is a symbol </p> <p><a class="source-toggle" href="#" onclick="toggleCode('M000004-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000004-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 14</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>) <span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>) <span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span> <span class="ruby-keyword kw">yield</span> <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">block_given?</span> } <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span> <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span> <span class="ruby-ivar">@classnames</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> <div id="method-M000005" class="method-detail"> <a name="M000005"></a> <div class="method-heading"> <a href="#M000005" class="method-signature"> <span class="method-name">untrain</span><span class="method-args">(klass, text) {|word, count if block_given?| ...}</span> </a> </div> <div class="method-description"> <p> text can be either an array of strings or a string klass is a symbol </p> <p><a class="source-toggle" href="#" onclick="toggleCode('M000005-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000005-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 28</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>) <span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>) <span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">count</span> <span class="ruby-keyword kw">yield</span> <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">block_given?</span> } <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span> <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> <h3 class="section-bar">Protected Instance methods</h3> <div id="method-M000008" class="method-detail"> <a name="M000008"></a> <div class="method-heading"> <a href="#M000008" class="method-signature"> <span class="method-name">get_word_probs</span><span class="method-args">(word)</span> </a> </div> <div class="method-description"> <p><a class="source-toggle" href="#" onclick="toggleCode('M000008-source');return false;">[Source]</a></p> <div class="method-source-code" id="M000008-source"> <pre> <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 65</span> <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>) <span class="ruby-identifier">probs</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>) <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span> <span class="ruby-comment cmt"># use a laplacian smoother</span> <span class="ruby-identifier">probs</span>[<span class="ruby-identifier">cn</span>] = (<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">cn</span>] <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> (<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">cn</span>) <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> } <span class="ruby-identifier">probs</span> <span class="ruby-keyword kw">end</span> </pre> </div> </div> </div> </div> </div> <div id="validator-badges"> <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> </div> </body> </html>