Symbols

-> for assigning numbers or letters to objects
“ ” to identify characters (= words)
( ) for functions
[ ] for subsetting
$ for subsetting by name variable
# to indicate comments (=text to clarify your commands)
NA to indicate missing values
~ to regress one variable on a set of independent variables

Example data

Vector
## [1] 1 2 3 4
Matrix
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6
## [4,]    7    8
Data
##                     movie  genre rating revenues
## 5    (500) Days of Summer Comedy    7.8   404221
## 7                  2 Guns Action    6.8  1490523
## 9             21 and Over Comedy    5.9   125991
## 11         21 Jump Street Action    7.2  1185398
## 13         22 Jump Street Action    7.1  4107128
## 15         3 Days to Kill Action    6.2   473113
## 17     30 Minutes or Less Action    6.1   122431
## 19 300: Rise of An Empire Action    6.3  4751276
## 21               47 Ronin Action    6.3  1744117
## 23 A Good Day to Die Hard Action    5.3  5715088
## 25        A Haunted House Comedy    5.0   585663

Subsetting

Vector[1]       # select first element from vector  
Vector[1:4]     # select first four elements from vector  
Matrix[1,2]     # select second element from first row  
Matrix[1,]      # select complete first row  
Matrix[1:4,]    # select complete first four rows  
Matrix[,2]      # select complete second column  
Data$movie[1:3] # select first three movies
Data[1:3,1]     # select first three movies
Data[1:3,]      # select all values of first three movies

Mathematical operations

X == Y      # X is equal to Y 
X != Y      # X is not equal to Y  
X >= Y      # X is greater or equal than Y  
X <= Y      # X is smaller or equal than Y  
X > Y       # X is greater than Y  
X < Y       # X is smaller than Y  
X & Y       # X and Y  
X|Y         # X or Y  
Vector != Y # not Y  

Describe data

read.table("imdb.txt", header=TRUE, sep=";") # read datafile, header and sep are necessary  
nrow(Data)      # calculates the number of rows  
ncol(Data)      # calculates the number of columns  
colnames(Data)  # gives the names of the columns  
typeof(Data)    # presents the type of object  
str(Data)       # shows what is stored in a object  

Functions to calculate descriptives

sqrt(Vector)        # calculate square root
mean(Vector)        # calculates the mean
sum(Vector)         # calculates the sum
var(Vector)         # calculates the variance
sd(Vector)          # calculates the standard deviation

Functions to run data analysis

summary(Data)            # describes the min, max, median, and mean value of an object
table(Data$rating)       # the frequency table
factor(Data$rating)      # transforms a numerical vector into multinomial variable
lm(revenues~rating, Data)# linear model, revenues is dependent variable
glm(revenues~rating, Data, family=gaussian) # generalized linear model 
t.test(revenues~genre, Data) # t-test
chisq.test(Matrix)       # returns the X2 value 

Functions wrt missing values

is.na(imdb)         # returns TRUE when the object is missing (Not Available)
complete.cases(imdb)# uses only the cases that are complete
na.omit(imdb)       # creates new object without missing data
mean(imdb$rating, na.rm=TRUE) # subcommand in many functions to remove the missings
na.exclude(imdb)    # subcommand to exclude the missings from analysis