#### The Exercise Answer from the Introduction to R by Pornprasertmanit, Gibson, & Drotar.
#### Latest updated: 5/1/12
### Exercise 1 ###
# Attitude data
attitude
# Summarize the attitude data
summary(attitude)
# Find the numbers of rows and columns of the attitude data
nrow(attitude) # or dim(attitude)[1]
ncol(attitude) # or dim(attitude)[2]
# Run the regression analysis predicting overall rating by complaints
out <- lm(rating ~ complaints, data=attitude)
summary(out)
# Extract the rating variable and find the mean and standard deviation of this variable.
overall.rating <- attitude$rating
mean(overall.rating, na.rm = TRUE)
sd(overall.rating, na.rm = TRUE)
# Find the means and standard deviations of each variable in the dataset.
apply(attitude, 2, mean)
apply(attitude, 2, sd)
### Exercise 2 ###
# 1. Save a number 5 in the object a. Create a new object b that is equal to 1-a^2/5.
a <- 5
b <- 1 - (a^2 / 5)
# 2. Check whether the object a is in between 2 and 4 (include 2 and 4 too).
(a >= 2) & (a <= 4)
# 3. Select attitude data that have rating over 50.
select <- attitude$rating > 50
attitude[select,]
# 4. Select attitude data that have rating and complaints over their means.
select1 <- attitude$rating > mean(attitude$rating)
select2 <- attitude$complaints > mean(attitude$complaints)
select <- select1 & select2
attitude[select,]
# 5. Select attitude data that the number of rows is the multiplicity of 3.
select <- seq(3, nrow(attitude), 3)
attitude[select,]
# 6. Add a new variable in the attitude data indicating whether rating score was greater than 50 or not.
high.rating <- attitude$rating > 50
attitude2 <- data.frame(attitude, high.rating) # or attitude2 <- cbind(attitude, high.rating)
# 7. Create a vector vec1 containing values 3, 6, 8, and 9. Create a vector vec2 containing values 1 to 4. Then, add them together.
vec1 <- c(3, 6, 8, 9)
vec2 <- 1:4
vec1 + vec2
# 8. Let’s check ?attitude, ?seq, and ?sum. Remove the comment to use the commands to go to the help pages
#?attitude
#?sum
#?seq
# 9. Search “Centering R” in any search engines and try the example from the help page of the function you found.
#?scale
scale(attitude)
scale(attitude, scale = FALSE)
### Exercise 3 ###
# Export the airquality data to file “RSem2.csv” using comma separated value and set NA equal to 999. Then, read the file back in and save to an object, RSem.
write.csv(airquality, file="RSem2.csv", na="999", row.names=FALSE)
RSem <- read.csv("RSem2.csv", na.strings="999", header=TRUE)
### Exercise 4 ###
# 1. Find the result of the analyzed regression.
summary(lm(airquality$Wind ~ airquality$Ozone))
# 2. Create a histogram of rating variable in the attitude data.
hist(attitude$rating)
# 3. Create a scatterplot of complaints and rating variables and impose the regression line in the scatterplot.
plot(attitude$complaints, attitude$rating, xlab="Handling of employee complaints", ylab="Overall Rating", main="Regression Analysis")
abline(lm(attitude$rating ~ attitude$complaints), col="blue") # Set the color of the line as blue
# 4. Create a new variable representing a median split of complaints (check ?median). Then, create a boxplot of rating by two groups (high vs. low rating on handling of employee complaints).
group <- attitude$complaints > median(attitude$complaints)
grouplab <- group
grouplab[group == TRUE] <- "High"
grouplab[group == FALSE] <- "Low"
boxplot(attitude$complaints ~ grouplab, xlab="Rating on handling of employee complaints", ylab="Overall rating", main="Boxplot")
# 5. Check the new dataset, Orange. Find group means and standard deviations of circumference by Tree.
aggregate(circumference ~ Tree, data=Orange, mean)
aggregate(circumference ~ Tree, data=Orange, sd)
### Exercise 5 ###
# 1. Check the cars dataset. Find the correlation between both variables.
cor(cars) # Find correlation matrix
cor.test(cars$speed, cars$dist) # Test of the correlation between both variables
# 2. Check the Seatbelts dataset. Find whether the averages of car driver killed are different between law effective months.
Seatbelts2 <- as.data.frame(Seatbelts)
t.test(DriversKilled ~ law, data=Seatbelts2)
# 3. Check occupationalStatus dataset. Use chi-square to detect a relationship between both variables.
chisq.test(occupationalStatus)
### Exercise 6 ###
# 1. Save all codes you write for the following items into an R script.
# 2. Use for loop to subtract 1000 by 1, 2, 3, …, and 20. The result should be 790.
x <- 1000 # Starting value
for(i in 1:20) {
x <- x - i # i will be changed from 1, 2, ..., 20.
}
x
# 3. Use if ... else statement to check whether x <- 22 is odd or even.
y <- 22
if(y %% 2 == 1) {
cat("y is odd.\n")
} else {
cat("y is even.\n")
}
# 4. Create a new variable called ratingclass to represent the degree of the rating variable in the attitude dataset. Use for loop and if ... else function to classify as 1 for the rating score lower than 50, 2 for the rating score from 50 to70, and 3 for the rating score greater than 70.
ratingclass <- rep(NA, nrow(attitude))
for(i in 1:nrow(attitude)) {
if(attitude$rating[i] < 50) {
ratingclass[i] <- 1
} else if((attitude$rating[i] >= 50) & (attitude$rating[i] <= 70)) {
ratingclass[i] <- 2
} else {
ratingclass[i] <- 3
}
}
ratingclass
# 5. Write a function to return TRUE if an input number is odd and return FALSE if the input number is even.
isOdd <- function(x) {
odd <- x %% 2 == 1
return(odd) # This line is actually not necessary but it should be written for the sake of clarity.
}
isOdd(2)
isOdd(11)