######################################################## # Introduction of the bootstrap algorithm: expectation # ######################################################## n <- 100; lam = 1/4 # An example of data X <- rexp(n,lam) # The estimate of the expectaion mean(X) # Estimate of the variance of the estimator of the expectation (theory) var(X)/n # Estimate of the variance of the estimator of the expectation (bootstrap) B <- 200; mean.X <- rep(0,B) for (b in 1:B){ X.b <- sample(X,n,rep=TRUE) mean.X[b] <- mean(X.b) } var(mean.X) # Parametric estimate of the variance of the estimator of the expectation (theory) (mean(X))^2/n # Estimate of the variance of the estimator of the expectation (parametric bootstrap) B <- 200; mean.X <- rep(0,B) lam.hat <- 1/mean(X) for (b in 1:B){ X.b <- rexp(n,lam.hat) mean.X[b] <- mean(X.b) } var(mean.X) ############################################ # Efron and Tibshirani, Sec. 2.: reg. coef # ############################################ x <- c(576,635,558,578,666,580,555,661,651,605,653,575,545,572,594) y <- c(3.39,3.3,2.81,3.03,3.44,3.07,3,3.43,3.36,3.13,3.12,2.74,2.76,2.88,2.96) plot(x,y) cor(x,y) # Estimate of the variance of the estimator of the correlation (non-bootstrap) B <- 200; cor.XY <- rep(0,B) n <- 15; index <- 1:n for (b in 1:B){ i.b <- sample(index,n,rep=TRUE) cor.XY[b] <- cor(x[i.b],y[i.b]) } var(cor.XY) # Estimate of the variance of the estimator of the expectation (parametric bootstrap) B <- 200; cor.XY <- rep(0,B) Mu <- c(mean(x),mean(y)) V <- cov(data.frame(x,y)) library(mvtnorm) for (b in 1:B){ XY <- rmvnorm(n,Mu,V) cor.XY[b] <- cor(XY[,1],XY[,2]) } var(cor.XY) # Parametric estimate of the variance of the estimator of the expectation (theory) (1-cor(x,y)^2)^2/(n-3) ###################################################### # Introduction of the bootstrap algorithm: reg. coef # ###################################################### n <- 10; mu <- 15; al <- 1.5; sig <- 3; p <- 0.3 # The large sample approximation of the variance n.sim <- 10^4 al.sim <- vector(length = n.sim) for(i in 1:length(al.sim)) { x.sim <- rbinom(n,2,p) y.sim <- rnorm(n,mu+al*x.sim,sig) v.x <- var(x.sim) if (v.x > 0) al.sim[i] <- cov(x.sim,y.sim)/v.x else al.sim[i] <- 0 } true.var <- var(al.sim) true.var # Creating a sample x <- rbinom(n,2,p) y <- rnorm(n,mu+al*x,sig) x y al.hat <- cov(x,y)/var(x) al.hat est.var.asym <- sig^2/((n-1)*var(x)) est.var.asym true.var # Estimating genetic coefficient from sample alpha.est = function(x,y,var.x,sig) { al.hat = cov(x,y)/var(x) return(al.hat) } # Bootstrapping the variance of the estimated correlation boot.rho.var = function(x,y,B=200,sig,p) { rho.boot = vector(length=B) n = length(x) if (var(x) > 0) { al.hat = cov(x,y)/var(x) mu.hat = mean(y) - al.hat*mean(x) for(b in 1:B) { x.s = rbinom(n,2,p) y.s = rnorm(n,mu.hat+al.hat*x.s,sig) rho.boot[b] = rho.est(x.s,y.s,var.x,sig) } v = var(rho.boot,na.rm=TRUE) } else v = NA return(v) } # Large sample approximation of the variance approx.rho.var = function(x,y,sig,p) { var.x = 2*p*(1-p) sd.x = sqrt(var.x) if (var(x) > 0) { al.hat = cov(x,y)/var(x) g.dot = sig^2*sd.x/sqrt((al.hat^2*var.x+sig^2)^3) } else g.dot = NA return(g.dot^2*sig^2/(n*var.x)) } #################### # Start simulation # #################### # Parameters n=10; mu=15; al=1.5; sig=3; p=0.3 var.x = 2*p*(1-p) # The observed sample x = rbinom(n,2,p) y = rnorm(n,mu+al*x,sig) boot.rho.var(x,y,200,sig,p) approx.rho.var(x,y,sig,p) # Compare with the variance estimators rho.var = matrix(nrow=100,ncol=2) colnames(rho.var)=c("boot","approx") for (i in 1:nrow(rho.var)) { x = rbinom(n,2,p) y = rnorm(n,mu+al*x,sig) rho.var[i,] = c(boot.rho.var(x,y,200,sig,p), approx.rho.var(x,y,sig,p)) } apply(rho.var,2,mean,na.rm=TRUE) apply(rho.var,2,sd,na.rm=TRUE) # Compare with the true variance rho.hat = numeric(length=10000) for (i in 1:length(rho.hat)) { x = rbinom(n,2,p) y = rnorm(n,mu+al*x,sig) rho.hat[i] = rho.est(x,y,var.x,sig) } var.rho = var(rho.hat,na.rm=TRUE) var.rho