counter <- 0 # setting up initial zero condition for counter
vec <- sample(x = c(0:1), size = 25, replace = TRUE)
for (i in 1:length(vec)) {
if (vec[i] == 0)
counter <- counter + 1
}
print(counter)
## [1] 10
length(vec[vec == 0])
## [1] 10
matrix_frame <- function(numrow, numcol){
m <- matrix(nrow = numrow, ncol = numcol)
for (i in 1:numrow){
for (j in 1:numcol){
m[i,j] <- i * j
}
}
return(m)
}
matrix_frame(10,3)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
## [3,] 3 6 9
## [4,] 4 8 12
## [5,] 5 10 15
## [6,] 6 12 18
## [7,] 7 14 21
## [8,] 8 16 24
## [9,] 9 18 27
## [10,] 10 20 30
-Simulate a dataset with 3 groups of data, each group drawn from a distribution with a different mean. The final data frame should have 1 column for group and 1 column for the response variable. -Write a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3. -Use a for loop to repeat the function in b 100 times. Store the results in a data frame that has 1 column indicating the replicate number and 1 column for each new group mean, for a total of 4 columns. -Use qplot() to create a histogram of the means for each reshuffled group. Or, if you want a challenge, use ggplot() to overlay all 3 histograms in the same figure. How do the distributions of reshuffled means compare to the original means?
var1 <- rnorm(10, 10)
var2 <- rnorm(10, 20)
var3 <- rnorm(10, 30)
df <- data.frame(group = c(rep("var1",10),
rep("var2",10),
rep("var3",10)),
response = c(var1, var2, var3))
shuffle <- function(dataframe){
shuffle_df <- sample(dataframe$response)
dataframe$shuffle <- shuffle_df
aggregated <- aggregate(shuffle_df ~ group, dataframe, mean)
return(aggregated)
}
shuffle_output <- shuffle(df)
mean_vec <- shuffle_output$shuffle_df
print(mean_vec)
## [1] 15.12310 23.55469 22.50050
# make empty data frame for the results
results_df <- data.frame(replicate = numeric(0),
var1_mean = numeric(0),
var2_mean = numeric(0),
var3_mean = numeric(0))
for (i in 1:100){ # initialize 100 reps
shuffle_result <- shuffle(df) # call function
results_df <- rbind(results_df,# initialize calling empty df
c(i, shuffle_result[1,2],
shuffle_result[2,2],
shuffle_result[3,2]))
}
colnames(results_df) <- c("replicate", "var1_mean", "var2_mean", "var3_mean")
print(head(results_df))
## replicate var1_mean var2_mean var3_mean
## 1 1 19.13800 22.39189 19.64840
## 2 2 19.85861 20.56237 20.75731
## 3 3 23.66540 17.51981 19.99308
## 4 4 22.44929 19.31618 19.41282
## 5 5 22.87547 18.82308 19.47974
## 6 6 20.38705 20.71678 20.07447
p1 <- ggplot(data = results_df) +
aes(x = var1_mean) +
geom_histogram(fill = "thistle", color = "black")
p2 <- ggplot(data = results_df) +
aes(x = var2_mean) +
geom_histogram(fill = "goldenrod", color = "black")
p3 <- ggplot(data = results_df) +
aes(x = var3_mean) +
geom_histogram(fill = "thistle", color = "black")
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(p3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.