# R^2 coefficient of determination set.seed(211008) n <- 100 a <- 2 b <- 1 si2 <- 100 x <- 1:n y <- rnorm(n, a*x + b, sqrt(si2)) windows() plot(x, y) a.hat <- cor(x, y)*sd(y)/sd(x) b.hat <- mean(y) - a.hat*mean(x) R2 <- sum((a.hat*x + b.hat - mean(y))^2)/sum((y - mean(y))^2) cat("a.hat:", a.hat, " b.hat:", b.hat, " R^2:", R2, "\n") B <- 1000 windows() par(mfrow=c(3,1)) # repeated experiments R2.1 <- rep(0, B) for (i in 1:B) { x1 <- x y1 <- rnorm(n, a*x1 + b, sqrt(si2)) a1.hat <- cor(x1, y1)*sd(y1)/sd(x1) b1.hat <- mean(y1) - a1.hat*mean(x1) R2.1[i] <- sum((a1.hat*x1 + b1.hat - mean(y1))^2)/sum((y1 - mean(y1))^2) } hist(R2.1, breaks=seq(0.9, 1, by=0.005)) sd(R2.1) # parametric bootstrap si2.hat <- sum((a.hat*x + b.hat - y)^2)/(n-2) R2.2 <- rep(0, B) for (i in 1:B) { x2 <- x y2 <- rnorm(n, a.hat*x2 + b.hat, sqrt(si2.hat)) a2.hat <- cor(x2, y2)*sd(y2)/sd(x2) b2.hat <- mean(y2) - a2.hat*mean(x2) R2.2[i] <- sum((a2.hat*x2 + b2.hat - mean(y2))^2)/sum((y2 - mean(y2))^2) } hist(R2.2, breaks=seq(0.9, 1, by=0.005)) sd(R2.2) # bootstrap R2.3 <- rep(0, B) for (i in 1:B) { idx <- sample(1:n, n, replace=T) x3 <- x[idx] y3 <- y[idx] a3.hat <- cor(x3, y3)*sd(y3)/sd(x3) b3.hat <- mean(y3) - a3.hat*mean(x3) R2.3[i] <- sum((a3.hat*x3 + b3.hat - mean(y3))^2)/sum((y3 - mean(y3))^2) } hist(R2.3, breaks=seq(0.9, 1, by=0.005)) sd(R2.3) par(mfrow=c(1,1))