# -*- coding: utf-8 -*- ########################################################################################## # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify, # and distribute this software and its documentation, without fee and without a signed # licensing agreement, is hereby granted, provided that the above copyright notice, this # paragraph and the following two paragraphs appear in all copies, modifications, and # distributions. # # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, # OR MODIFICATIONS. ########################################################################################## require 'rubygems' require "test/unit" require 'shoulda' require '../config' if @platform == nil require 'scicom' class SciComTest < Test::Unit::TestCase context "R environment" do #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- setup do end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "Read file and apply linear model to dataset" do # This dataset comes from Baseball-Reference.com. baseball = R.read__csv("baseball.csv") # convert the second column of dataframe 'baseball' to a Ruby vector # arr = baseball[[2]].get # arr.print # Lets look at the data available for Momeyball. # (baseball.Year < R.d(2002)).pp moneyball = baseball.subset(baseball.Year < 2002) # Let's see if we can predict the number of wins, by looking at # runs allowed (RA) and runs scored (RS). RD is the runs difference. # We are making a linear model for predicting wins (W) based on RD moneyball.RD = moneyball.RS - moneyball.RA wins_reg = R.lm("W ~ RD", data: moneyball) wins_reg.summary.pp end end end