one_r.rb in ai4r-1.3

- old
+ new

@@ -6,73 +6,41 @@
 # You can redistribute it and/or modify it under the terms of 
 # the Mozilla Public License version 1.1  as published by the 
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
 
 require 'set'
-require File.dirname(__FILE__) + '/classifier_helper'
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
 
 module Ai4r
   module Classifiers
 
     # = Introduction
     # 
     # The idea of the OneR algorithm is identify the single
     # attribute to use to classify data that makes 
     # fewest prediction errors.
     # It generates rules based on a single attribute.
-    class OneR
+    class OneR < Classifier
       
-      attr_accessor :data_labels, :rule
-      include ClassifierHelper
+      attr_reader :data_set, :rule
 
-      # Build a new OneR classifier. If your data is classified with N attributed
-      # and M examples, then your data examples must have the following format:
-      # 
-      #     [   [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1,  CLASS_VAL1], 
-      #         [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2,  CLASS_VAL2], 
-      #         ...
-      #         [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM], 
-      #     ]
-      #     
-      # e.g.
-      #     [   ['New York',  '<30',      'M', 'Y'],
-      #          ['Chicago',     '<30',      'M', 'Y'],
-      #          ['Chicago',     '<30',      'F', 'Y'],
-      #          ['New York',  '<30',      'M', 'Y'],
-      #          ['New York',  '<30',      'M', 'Y'],
-      #          ['Chicago',     '[30-50)',  'M', 'Y'],
-      #          ['New York',  '[30-50)',  'F', 'N'],
-      #          ['Chicago',     '[30-50)',  'F', 'Y'],
-      #          ['New York',  '[30-50)',  'F', 'N'],
-      #          ['Chicago',     '[50-80]', 'M', 'N'],
-      #          ['New York',  '[50-80]', 'F', 'N'],
-      #          ['New York',  '[50-80]', 'M', 'N'],
-      #          ['Chicago',     '[50-80]', 'M', 'N'],
-      #          ['New York',  '[50-80]', 'F', 'N'],
-      #          ['Chicago',     '>80',      'F', 'Y']
-      #        ]
-      #
-      # Data labels must have the following format:
-      #     [ 'city', 'age_range', 'gender', 'marketing_target'  ]
-      #
-      # If you do not provide labels for you data, the following labels will
-      # be created by default:
-      #     [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value'  ]
-      #      
-      def build(data_examples, data_labels = nil)
-        check_data_examples(data_examples)
-        @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
-        if (num_attributes(data_examples) == 1) 
-          @zero_r = ZeroR.new.build(data_examples, data_labels)
+      # Build a new OneR classifier. You must provide a DataSet instance
+      # as parameter.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        if (data_set.num_attributes == 1) 
+          @zero_r = ZeroR.new.build(data_set)
           return self;
         else
           @zero_r = nil;
         end
-        domains = build_domains(data_examples)
+        domains = @data_set.build_domains
         @rule = nil
         domains[1...-1].each_index do |attr_index|
-          rule = build_rule(data_examples, attr_index, domains)
+          rule = build_rule(@data_set.data_items, attr_index, domains)
           @rule = rule if !@rule || rule[:correct] > @rule[:correct]
         end
         return self
       end
       
@@ -86,39 +54,32 @@
       end
       
       # This method returns the generated rules in ruby code.
       # e.g.
       #   
-      #   classifier.to_s
+      #   classifier.get_rules
       #     # =>  if age_range == '<30' then marketing_target = 'Y'
       #           elsif age_range == '[30-50)' then marketing_target = 'N'
       #           elsif age_range == '[50-80]' then marketing_target = 'N'
       #           end
       #
       # It is a nice way to inspect induction results, and also to execute them:  
       #     marketing_target = nil
-      #     eval classifier.to_s   
+      #     eval classifier.get_rules   
       #     puts marketing_target
       #       # =>  'Y'
-      def to_s
-        return @zero_r.to_s if @zero_r
+      def get_rules
+        return @zero_r.get_rules if @zero_r
         sentences = []
-        attr_label = @data_labels[@rule[:attr_index]]
-        class_label = @data_labels.last
+        attr_label = @data_set.data_labels[@rule[:attr_index]]
+        class_label = @data_set.data_labels.last
         @rule[:rule].each_pair do |attr_value, class_value|
           sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
         end
         return "if " + sentences.join("\nelsif ") + "\nend"        
       end
       
       protected
-      def build_domains(data_examples)
-        domains = Array.new(num_attributes(data_examples)) { Set.new }
-        data_examples.each do |data|
-          data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
-        end
-        return domains
-      end
       
       def build_rule(data_examples, attr_index, domains)
         domain = domains[attr_index]
         value_freq = Hash.new
         domain.each do |attr_value|