-> for assigning numbers or letters to objects
“ ” to identify characters (= words)
( ) for functions
[ ] for subsetting
$ for subsetting by name variable
# to indicate comments (=text to clarify your commands)
NA to indicate missing values
~ to regress one variable on a set of independent variables
Vector
## [1] 1 2 3 4
Matrix
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
## [3,] 5 6
## [4,] 7 8
Data
## movie genre rating revenues
## 5 (500) Days of Summer Comedy 7.8 404221
## 7 2 Guns Action 6.8 1490523
## 9 21 and Over Comedy 5.9 125991
## 11 21 Jump Street Action 7.2 1185398
## 13 22 Jump Street Action 7.1 4107128
## 15 3 Days to Kill Action 6.2 473113
## 17 30 Minutes or Less Action 6.1 122431
## 19 300: Rise of An Empire Action 6.3 4751276
## 21 47 Ronin Action 6.3 1744117
## 23 A Good Day to Die Hard Action 5.3 5715088
## 25 A Haunted House Comedy 5.0 585663
Vector[1] # select first element from vector
Vector[1:4] # select first four elements from vector
Matrix[1,2] # select second element from first row
Matrix[1,] # select complete first row
Matrix[1:4,] # select complete first four rows
Matrix[,2] # select complete second column
Data$movie[1:3] # select first three movies
Data[1:3,1] # select first three movies
Data[1:3,] # select all values of first three movies
X == Y # X is equal to Y
X != Y # X is not equal to Y
X >= Y # X is greater or equal than Y
X <= Y # X is smaller or equal than Y
X > Y # X is greater than Y
X < Y # X is smaller than Y
X & Y # X and Y
X|Y # X or Y
Vector != Y # not Y
read.table("imdb.txt", header=TRUE, sep=";") # read datafile, header and sep are necessary
nrow(Data) # calculates the number of rows
ncol(Data) # calculates the number of columns
colnames(Data) # gives the names of the columns
typeof(Data) # presents the type of object
str(Data) # shows what is stored in a object
sqrt(Vector) # calculate square root
mean(Vector) # calculates the mean
sum(Vector) # calculates the sum
var(Vector) # calculates the variance
sd(Vector) # calculates the standard deviation
summary(Data) # describes the min, max, median, and mean value of an object
table(Data$rating) # the frequency table
factor(Data$rating) # transforms a numerical vector into multinomial variable
lm(revenues~rating, Data)# linear model, revenues is dependent variable
glm(revenues~rating, Data, family=gaussian) # generalized linear model
t.test(revenues~genre, Data) # t-test
chisq.test(Matrix) # returns the X2 value
is.na(imdb) # returns TRUE when the object is missing (Not Available)
complete.cases(imdb)# uses only the cases that are complete
na.omit(imdb) # creates new object without missing data
mean(imdb$rating, na.rm=TRUE) # subcommand in many functions to remove the missings
na.exclude(imdb) # subcommand to exclude the missings from analysis