> # 620-374 Lab 2 Solutions > > # Part 1 > > # generate a random sample with a periodic mean > x <- runif(100, -1, 1) + sin((1:100)*2*pi/10) > plot(x, type = "l") > > # put the sample into an array row-wise > A <- matrix(x, nrow = 10, ncol = 10, byrow=T) > > # each possible systemmatic sample is a column average of A > for (i in 1:10) { + cat('i =', i, 'mu_i =', mean(A[,i]), '\n') + } i = 1 mu_i = 0.3066013 i = 2 mu_i = 1.06737 i = 3 mu_i = 1.011514 i = 4 mu_i = 0.456096 i = 5 mu_i = -0.190715 i = 6 mu_i = -0.6627296 i = 7 mu_i = -0.8369802 i = 8 mu_i = -1.223132 i = 9 mu_i = -0.5502729 i = 10 mu_i = 0.1579446 > > # calculate S2 and the within sample sum of squares > cat('S2 =', var(x), '\n') S2 = 0.8216126 > S2.wsy <- 0 > for (col in 1:10) { + mu.sy <- mean(A[,col]) + for (row in 1:10) { + S2.wsy <- S2.wsy + (A[row,col] - mu.sy)^2 + } + } > S2.wsy <- S2.wsy/(100-10) > cat('S2_wsy =', S2.wsy, '\n\n') S2_wsy = 0.2990152 > # # alternative > # f <- function(x) (x - mean(x))^2 > # S2.wsy <- sum(apply(A, 2, f)) > > # now sort x and repeat the exercise > x <- sort(x) > A <- matrix(x, nrow = 10, ncol = 10, byrow=T) > for (i in 1:10) { + cat('i =', i, 'mu_i =', mean(A[,i]), '\n') + } i = 1 mu_i = -0.2151045 i = 2 mu_i = -0.1849975 i = 3 mu_i = -0.1386223 i = 4 mu_i = -0.09691812 i = 5 mu_i = -0.06050032 i = 6 mu_i = -0.03470688 i = 7 mu_i = 0.007769799 i = 8 mu_i = 0.03554426 i = 9 mu_i = 0.08976027 i = 10 mu_i = 0.1334719 > cat('S2 =', var(x), '\n') S2 = 0.8216126 > S2.wsy <- 0 > for (col in 1:10) { + mu.sy <- mean(A[,col]) + for (row in 1:10) { + S2.wsy <- S2.wsy + (A[row,col] - mu.sy)^2 + } + } > S2.wsy <- S2.wsy/(100-10) > cat('S2_wsy =', S2.wsy, '\n\n') S2_wsy = 0.8904843 > > > # Part 2 > > # load the data > x <- read.csv('lab2data.txt') > x1 <- x[x$stratum == 1, 2] > x2 <- x[x$stratum == 2, 2] > x <- x[, 2] > > # population and strata means and variances > N <- length(x) > N.1 <- length(x1) > N.2 <- length(x2) > mu <- mean(x) > mu.1 <- mean(x1) > mu.2 <- mean(x2) > S2 <- var(x) > S2.1 <- var(x1) > S2.2 <- var(x2) > cat('mu, S2 =', mu, S2, '\n') mu, S2 = 1.44 1.511438 > cat('mu_1, S2_1 =', mu.1, S2.1, '\n') mu_1, S2_1 = 1.115 1.077161 > cat('mu_2, S2_2 =', mu.2, S2.2, '\n\n') mu_2, S2_2 = 2.09 1.759495 > > # verify sum of squares formula > A <- (N - 1)*S2 > B <- (N.1 - 1)*S2.1 + (N.2 - 1)*S2.2 > C <- N.1*(mu.1 - mu)^2 + N.2*(mu.2 - mu)^2 > cat('(N - 1)S2 =', A, '\n') (N - 1)S2 = 451.92 > cat('sum (N_h - 1)S2_h =', B, '\n') sum (N_h - 1)S2_h = 388.545 > cat('sum N_h(mu_h - mu)^2 =', C, '\n') sum N_h(mu_h - mu)^2 = 63.375 > cat('sum of the last two terms =', B + C, '\n\n') sum of the last two terms = 451.92 > > # sample variances under different sampling schemes > n <- 30 > W.1 <- N.1/N > W.2 <- N.2/N > cat('var muhat_srs =', (1/n)*S2*(1 - n/N), '\n') var muhat_srs = 0.04534314 > cat('var_prop muhat_st =', (1/n)*(S2.1*W.1 + S2.2*W.2)*(1 - n/N), '\n') var_prop muhat_st = 0.03913817 > cat('var_opt muhat_st =', (W.1*sqrt(S2.1) + W.2*sqrt(S2.2))^2/n - (W.1*S2.1 + W.2*S2.2)/N, '\n\n') var_opt muhat_st = 0.03852122 > > > # sample estimators and estimated sample variances > # simple random sample > y <- sample(x, n) > s2 <- var(y) > cat('muhat_srs - mu =', mean(y) - mu, '\n') muhat_srs - mu = -0.1733333 > cat('varhat muhat_srs =', (1/n)*s2*(1 - n/N), '\n\n') varhat muhat_srs = 0.04744828 > > # proportional sample > y1 <- sample(x1, 30*N.1/N) > y2 <- sample(x2, 30*N.2/N) > s2.1 <- var(y1) > s2.2 <- var(y2) > cat('muhar_st - mu =', W.1*mean(y1) + W.2*mean(y2) - mu, '\n') muhar_st - mu = -0.1066667 > cat('varhat_prop muhat_st =', (1/n)*(s2.1*W.1 + s2.2*W.2)*(1 - n/N), '\n\n') varhat_prop muhat_st = 0.03439766 > > # optimal sample > n.1 <- n*N.1*sqrt(S2.1)/(N.1*sqrt(S2.1) + N.2*sqrt(S2.2)) > n.2 <- n*N.2*sqrt(S2.2)/(N.1*sqrt(S2.1) + N.2*sqrt(S2.2)) > y1 <- sample(x1, n.1) > y2 <- sample(x2, n.2) > s2.1 <- var(y1) > s2.2 <- var(y2) > cat('muhar_st - mu =', W.1*mean(y1) + W.2*mean(y2) - mu, '\n') muhar_st - mu = 0.03811448 > cat('varhat_opt muhat_st =', (W.1*sqrt(s2.1) + W.2*sqrt(s2.2))^2/n - (W.1*s2.1 + W.2*s2.2)/N, '\n\n') varhat_opt muhat_st = 0.02909491