# -*- coding: utf-8 -*- ########################################################################################## # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify, # and distribute this software and its documentation, without fee and without a signed # licensing agreement, is hereby granted, provided that the above copyright notice, this # paragraph and the following two paragraphs appear in all copies, modifications, and # distributions. # # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, # OR MODIFICATIONS. ########################################################################################## require 'rubygems' require "test/unit" require 'shoulda' require 'env' require 'scicom' class SciComTest < Test::Unit::TestCase context "R environment" do #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- setup do end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "create data-frame from a single vector" do vec = R.seq(20) vec.attr.dim = R.c(4, 5) df = R.as__data__frame(vec) df.pp assert_equal(4, df.nrow.gz) assert_equal(5, df.ncol.gz) df[0].pp df[1].pp df["V2"].pp df["V4"].pp end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "work with build-in data-frames" do # We use built-in data frames in R for our tutorials. For example, here is a built-in # data frame in R, called mtcars. # to access a build-in data-frame, use method R.d with the data-frame's name mtcars = R.d("mtcars") p "mtcars build-in data-frame" mtcars.pp # Here is the cell value from the first row, second column of mtcars. assert_equal(6, mtcars[1, 2].gz) # Moreover, we can use the row and column names instead of the numeric coordinates. assert_equal(6, mtcars["Mazda RX4", "cyl"].gz) # Lastly, the number of data rows in the data frame is given by the nrow function. assert_equal(32, mtcars.nrow.gz) # number of data rows # And the number of columns of a data frame is given by the ncol function. assert_equal(11, mtcars.ncol.gz) # number of columns p "mtcars head" mtcars.head.pp end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "access data-frames by column vector" do mtcars = R.d("mtcars") # We reference a data frame column with the double square bracket "[[]]" operator. # For example, to retrieve the ninth column vector of the built-in data set mtcars, # we write mtcars[[9]]. mtcars[[9]].pp # We can retrieve the same column vector by its name. mtcars[["am"]].pp # We can also retrieve with the "." operator in lieu of the double square # bracket operator. mtcars.am.pp # Yet another way to retrieve the same column vector is to use the single square # bracket "[]" operator. We prepend the column name with 'nil', which signals a # wildcard match for the row position. mtcars[nil, "am"].pp end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "access data-frames by column slice" do mtcars = R.d("mtcars") # We retrieve a data frame column slice with the single square bracket "[]" operator. # Numeric Indexing # The following is a slice containing the first column of the built-in data set # mtcars. mtcars[1].pp # Name Indexing # We can retrieve the same column slice by its name. mtcars["mpg"].pp # To retrieve a data frame slice with the two columns mpg and hp, we pack the # column names in an index vector inside the single square bracket operator. mtcars[R.c("mpg", "hp")].pp end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "access data-frames by row slice" do mtcars = R.d("mtcars") # We retrieve rows from a data frame with the single square bracket operator, just # like what we did with columns. However, in additional to an index vector of row # positions, we append an nil. This is important, as the nil signals a wildcard match # for the second coordinate for column positions. # Numeric Indexing # For example, the following retrieves a row record of the built-in data set mtcars. # Please notice the nil in the square bracket operator. It states that the 1974 Camaro # Z28 has a gas mileage of 13.3 miles per gallon, and an eight cylinder 245 horse power # engine, ..., etc. mtcars[24, nil].pp # To retrieve more than one rows, we use a numeric index vector. mtcars[R.c(3, 24), nil].pp # Name Indexing # We can retrieve a row by its name. mtcars["Camaro Z28", nil].pp # And we can pack the row names in an index vector in order to retrieve multiple # rows. mtcars[R.c("Datsun 710", "Camaro Z28"), nil].pp # Logical Indexing # Lastly, we can retrieve rows with a logical index vector. In the following # vector L, the member value is TRUE if the car has automatic transmission, and # FALSE if otherwise. auto = mtcars.am == 0 auto.pp # Here is the list of vehicles with automatic transmission. mtcars[auto, nil].pp # And here is the gas mileage data for automatic transmission. mtcars[auto, nil].mpg.pp end #-------------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------------- should "create data-frame from multiple vectors" do # name age hgt wgt race year SAT # Bob 21 70 180 Cauc Jr 1080 # Fred 18 67 156 Af.Am Fr 1210 # Barb 18 64 128 Af.Am Fr 840 # Sue 24 66 118 Cauc Sr 1340 # Jeff 20 72 202 Asian So 880 name = R.c("Bob", "Fred", "Barb", "Sue", "Jeff") age = R.c(21, 18, 18, 24, 20) hgt = R.c(70, 67, 64, 66, 72) wgt = R.c(180, 156, 128, 118, 202) race = R.c("Cauc", "Af. Am", "Af. Am", "Cauc", "Asian") sat = R.c(1080, 1210, 840, 1340, 880) df = R.data__frame(name, age, hgt, wgt, race, sat) df.colnames.pp df.colnames(prefix: "sc").pp # Renjin allows changes to variable properties R.eval("colnames(#{df.r}) = c('name', 'age', 'height', 'weigth', 'race', 'SAT')") R.eval("print(colnames(#{df.r}))") rbvec = R.eval("vec = c(1, 2, 3, 4, 5)") # this is a new vector with the same name. Assigning a new value to a large # vector can then be very costly as every assignment does copy the old data. R.eval("vec[1] = 10") R.eval("print(vec)") # this proves that vec is actually a new vec. We have kept the old vector in # variable rbvec. rbvec.print =begin # R.colnames(df) = R.c("name", "age", "height", "weigth", "race", "SAT") df.print summ = R.summary(df.r) p summ summ.print R.eval("print(colnames(#{df.r}))") col = R.colnames(:df) col.print =end end end end