require 'supervised_learning/version'
require 'matrix'
require 'descriptive_statistics'

module SupervisedLearning
    
  # This class uses linear regression to make predictions based on a training set.
  # For datasets of less than 1000 columns, use #predict since this will give the most accurate prediction.
  # For larger datasets where the #predict method is too slow, use #predict_advanced.
  # The algorithms in #predict and #predict_advanced were provided by Andrew Ng (Stanford University).
  # @author Michael Imstepf
  class LinearRegression
    # Initializes a LinearRegression object with a training set
    # @param training_set [Matrix] training_set, each feature/dimension has one column and the last column is the output column (type of value #predict will return)
    # @raise [ArgumentError] if training_set is not a Matrix and has at least two columns and one row
    def initialize(training_set)      
      @training_set = training_set
      raise ArgumentError, 'input is not a Matrix' unless @training_set.is_a? Matrix
      raise ArgumentError, 'Matrix must have at least 2 columns and 1 row' unless @training_set.column_size > 1

      @number_of_training_set_columns = @training_set.column_size
      @number_of_features = @number_of_training_set_columns - 1
      @number_of_training_examples = @training_set.row_size      

      @output_set = @training_set.column_vectors.last      
    end

    # Makes prediction using normalization.
    # This algorithm is the most accurate one but with large
    # sets (more than 1000 columns) it might take too long to calculate.
    # @param prediction [Matrix] prediction    
    def predict(prediction)
      feature_set = get_feature_set(@training_set, true)      

      validate_prediction_input(prediction)
            
      transposed_feature_set = feature_set.transpose # only transpose once for efficiency                  
      theta = (transposed_feature_set * feature_set).inverse * transposed_feature_set * @output_set

      # add column of ones to prediction
      prediction = get_feature_set(prediction, true)
      
      result_vectorized = prediction * theta
      result = result_vectorized.to_a.first.to_f
    end

    # Makes prediction using gradient descent.
    # This algorithm is requires less computing power
    # than #predict but is less accurate since it uses approximation.
    # @param prediction [Matrix] prediction
    def predict_advanced(prediction, learning_rate = 0.01, iterations = 1000, debug = false) 
      validate_prediction_input(prediction)

      feature_set = get_feature_set(@training_set, false) 
      feature_set = normalize_feature_set(feature_set)
      # add ones to feature set after normalization      
      feature_set = get_feature_set(feature_set, true)

      # prepare theta column vector with zeros
      theta = Array.new(@number_of_training_set_columns, 0)
      theta = Matrix.columns([theta])

      iterations.times do        
        theta = theta - (learning_rate * (1.0/@number_of_training_examples) * (feature_set * theta - @output_set).transpose * feature_set).transpose
        if debug
          puts "Theta: #{theta}"
          puts "Cost: #{calculate_cost(feature_set, theta)}"
        end
      end

      # normalize prediction
      prediction = normalize_prediction(prediction)

      # add column of ones to prediction
      prediction = get_feature_set(prediction, true)

      result_vectorized = prediction * theta
      result = result_vectorized[0,0]
    end   

    private

    # Returns a feature set without output set (last column of training set)
    # and optionally adds a leading column of ones to a Matrix.
    # This column of ones is the first dimension of theta to easily calculate
    # the output of a function a*1 + b*theta_1 + c*theta_2 etc.    
    # Ruby's Matrix class has not built-in function for prepending,
    # hence some manual work is required.
    # @see http://stackoverflow.com/questions/9710628/how-do-i-add-columns-and-rows-to-a-matrix-in-ruby
    # @param matrix [Matrix] matrix
    # @param leading_ones [Boolean] whether to prepend a column of leading ones    
    # @return [Matrix] matrix
    def get_feature_set(matrix, leading_ones = false)
      # get array of columns
      existing_columns = matrix.column_vectors      

      columns = []
      columns << Array.new(existing_columns.first.size, 1) if leading_ones           
      # add remaining columns
      existing_columns.each_with_index do |column, index|
        # output column (last column of @training_set) needs to be skipped        
        # when called from #get_feature_set, matrix includes output column
        # when called from #prediction, matrix does not inlcude output column
        break if index + 1 > @number_of_features    
        columns << column.to_a
      end
      
      Matrix.columns(columns)      
    end

    # Validates prediction input.
    # @param prediction [Matrix] prediction
    # @raise [ArgumentError] if prediction is not a Matrix
    # @raise [ArgumentError] if prediction does not have the correct number of columns (@training set minus @output_set)
    # @raise [ArgumentError] if prediction has more than one row
    def validate_prediction_input(prediction)
      raise ArgumentError, 'input is not a Matrix' unless prediction.is_a? Matrix
      raise ArgumentError, 'input has more than one row' if prediction.row_size > 1
      raise ArgumentError, 'input has wrong number of columns' if prediction.column_size != @number_of_features 
    end

    # Normalizes feature set for quicker and more reliable calculation.
    # @param feature_set [Matrix] feature set
    # @return [Matrix] normalized feature set
    def normalize_feature_set(feature_set)
      # create Matrix with mean
      mean = []
      feature_set.column_vectors.each do |feature_set_column|
        # create Matrix of length of training examples for later substraction
        mean << Array.new(@number_of_training_examples, feature_set_column.mean)
      end          
      mean = Matrix.columns(mean)
      
      # save for later usage as Matrix and not as Vector
      @mean = Matrix[mean.row(0)]

      # substract mean from feature set
      feature_set = feature_set - mean

      # create Matrix with standard deviation
      standard_deviation = []
      feature_set.column_vectors.each do |feature_set_column|
        # create row vector with standard deviation
        standard_deviation << [feature_set_column.standard_deviation]
      end            
      # save for later usage
      @standard_deviation = Matrix.columns(standard_deviation)

      # Dividing these non-square matrices has to be done manually
      # (non square matrices have no inverse and can't be divided in Ruby)
      # iterate through each column    
      columns = []
      feature_set.column_vectors.each_with_index do |feature_set_column, index|
        # manually divide each row within column with standard deviation for that row
        columns << feature_set_column.to_a.collect { |value| value / @standard_deviation[0,index] }
      end
      # reconstruct training set
      feature_set = Matrix.columns(columns)      
      feature_set
    end

    # Normalizes prediction.
    # @param prediction [Matrix] prediction
    # @return [Matrix] normalized prediction
    def normalize_prediction(prediction)
      # substract mean
      prediction = prediction - @mean      

      # Dividing these non-square matrices has to be done manually
      # (non square matrices have no inverse and can't be divided in Ruby)
      # iterate through each column
      columns = []
      prediction.column_vectors.each_with_index do |prediction_column, index|
        # manually divide each row within column with standard deviation for that row
        columns << prediction_column / @standard_deviation[0,index]
      end      
      # reconstruct prediction
      prediction = Matrix.columns(columns) 
    end

    # Calculates cost of current theta.
    # The closer to 0, the more accurate the prediction will be.
    # @param feature_set [Matrix] feature set
    # @param theta [Matrix] theta
    # @return [Float] cost    
    def calculate_cost(feature_set, theta)
      cost_vectorized = 1.0/(2 * @number_of_training_examples) * (feature_set * theta - @output_set).transpose * (feature_set * theta - @output_set)
      cost_vectorized[0,0]
    end    
  end
end