# R for simulation study # If statement examples x <- 5 if(x > 0) { print("positive") } if(x < 0) print("negative") if(x > 0) { print("positive") } else { print("nonpositive") } if(x > 0) { print("positive") } else if (x == 0) { print("zero") } else { print("negative") } if(x > 0) { print("positive") } else if (x < 0) { print("negative") } score <- 79 grade <- "" if(score >= 90) { grade <- "A" } else if(score >= 80) { grade <- "B" } else if (score >= 70) { grade <- "C" } else if (score >= 60) { grade <- "D" } else{ grade <- "F" } # Don't need to always use "if" scores <- 40:100 grades <- rep("F", length(scores)) grades[scores >= 60] <- "D" grades[scores >= 70] <- "C" grades[scores >= 80] <- "B" grades[scores >= 90] <- "A" # Generate data with different distributions: rnorm, rt(3), rchisq, rf x1 <- rnorm(1000, 0, 1) x2 <- rt(1000, 3) x3 <- rchisq(1000, 4) x4 <- 1 - rf(1000, 1, 30) gendat <- data.frame(x1, x2, x3, x4) library(psych) describe(gendat) par(mfrow = c(2, 2)) plot(density(x1)) plot(density(x2)) plot(density(x3)) plot(density(x4)) # Use if statement to select data distribution result <- NULL distype <- 2 n <- 1000 if(distype == 1) { result <- rnorm(n, 0, 1) } else if (distype == 2) { result <- rt(n, 3) } else if (distype == 3) { result <- rchisq(n, 4) } else if (distype == 4) { result <- 1 - rf(n, 1, 30) } m <- 0 stdev <- 0.6 result <- (result - mean(result)) / sd(result) result <- stdev * result + m mean(result) sd(result) result <- (result - mean(result)) / sd(result) temp <- rnorm(n, m, stdev) result <- sd(temp) * result + mean(temp) mean(result) sd(result) # Create a function to generate data gendata <- function(n, m, stdev, distype = 1) { result <- NULL if(distype == 1) { result <- rnorm(n, 0, 1) } else if (distype == 2) { result <- rt(n, 3) } else if (distype == 3) { result <- rchisq(n, 4) } else if (distype == 4) { result <- 1 - rf(n, 1, 30) } else { stop("The distype argument is not valid.") } result <- (result - mean(result)) / sd(result) # Find empirical mean and sd temp <- rnorm(n, m, stdev) result <- sd(temp) * result + mean(temp) return(result) } y1 <- gendata(1000, m = -2, stdev = 8) y2 <- gendata(1000, m = 10, stdev = 50, distype = 2) describe(data.frame(y1, y2)) # Simple regression simulation set.seed(123321) nrep <- 1000 b <- 0.5 n <- 200 xdist <- 2 edist <- 2 result <- matrix(NA, nrep, 3) for(i in 1:nrep) { x <- gendata(n, 0, 1, xdist) e <- gendata(n, 0, sqrt(1 - b^2), edist) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)] } sig <- result[,3] < 0.05 result <- cbind(result, sig) colMeans(result) # let's try to manipulate xdist: 1, 2, 3, 4 and edist: 1, 2, 3, 4 set.seed(123321) nrep <- 1000 b <- 0.5 n <- 200 ovresult <- NULL for(xdist in 1:4){ for(edist in 1:4) { result <- matrix(NA, nrep, 3) for(i in 1:nrep) { x <- gendata(n, 0, 1, xdist) e <- gendata(n, 0, sqrt(1 - b^2), edist) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)] } result <- cbind(xdist, edist, result) ovresult <- rbind(ovresult, result) } } sig <- ovresult[,5] < 0.05 ovresult <- cbind(ovresult, sig) colnames(ovresult) <- c("xdist", "edist", "est", "se", "p", "sig") aggresult <- aggregate(cbind(est, se, sig) ~ xdist + edist, data = ovresult, FUN = mean) # average aggsdresult <- aggregate(est ~ xdist + edist, data = ovresult, FUN = sd) # sd biasest <- aggresult[,"est"] - b relbiasest <- biasest / b biasse <- aggresult[,"se"] - aggsdresult[,"est"] relbiasse <- biasse / aggsdresult[,"est"] power <- aggresult[,"sig"] tableresult <- data.frame(aggresult[,c(1, 2)], biasest, relbiasest, biasse, relbiasse, power) tableresult # The apply function apply(attitude, 1, mean) apply(attitude, 2, mean) apply(attitude, 1, sd) apply(attitude, 2, sd) newdata <- attitude newdata[2, 4] <- NA apply(newdata, 1, mean) apply(newdata, 2, mean) apply(newdata, 1, sd) apply(newdata, 2, sd) apply(newdata, 1, mean, na.rm = TRUE) apply(newdata, 2, mean, na.rm = TRUE) apply(newdata, 1, sd, na.rm = TRUE) apply(newdata, 2, sd, na.rm = TRUE) apply(attitude, 2, range) cv <- function(x) { sd(x)/mean(x) } apply(attitude, 2, cv) cv2 <- function(x, na.rm = TRUE) { sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm) } apply(newdata, 2, cv) design <- matrix(1:9, 3, 3) FUN <- function(x) { x[1] * x[2] - x[3] } apply(design, 1, FUN) apply(design, 2, FUN) # Use apply for simulation study nonnormalsim <- function(cond, b, n, nrep, alpha) { result <- rep(NA, nrep) for(i in 1:nrep) { x <- gendata(n, 0, 1, cond[1]) e <- gendata(n, 0, sqrt(1 - b^2), cond[2]) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) result[i] <- summary(out)[["coefficients"]][2, 4] < alpha } result } xdists <- 1:4 edists <- 1:4 conds <- expand.grid(xdists, edists) superresult <- apply(conds, 1, nonnormalsim, b = 0, n = 250, nrep = 1000, alpha = 0.05) apply(superresult, 2, mean) # Type I error # Track the time system.time(superresult <- apply(conds, 1, nonnormalsim, b = 0, n = 250, nrep = 1000, alpha = 0.05)) system.time({ superresult2 <- matrix(NA, 1000, nrow(conds)) for(i in 1:nrow(conds)) { superresult2[,i] <- nonnormalsim(conds[i,], b = 0, n = 250, nrep = 1000, alpha = 0.05) } }) # Exercise gendata <- function(n, m, stdev, distype = 1) { result <- NULL if(distype == 1) { result <- rnorm(n, 0, 1) } else if (distype == 2) { result <- rt(n, 4) } else if (distype == 3) { result <- rf(n, 1, 15) } else if (distype == 4) { result <- 1 - rf(n, 1, 15) } else { stop("The distype argument is not valid.") } result <- (result - mean(result)) / sd(result) temp <- rnorm(n, m, stdev) result <- sd(temp) * result + mean(temp) return(result) } n <- 200 xdist <- 2 ydist <- 2 x <- gendata(n, 0, 1, xdist) y <- gendata(n, 0, 1, ydist) out <- cor.test(x, y) out[["p.value"]]