######################### # Homework assignment 1 # ######################### # 1: Source of error #################### n <- 100 X <- rbinom(10^6,n,0.5) Z <- (X-n/2)/sqrt(n/4) mean(Z > 1.645) dZ <- table(Z)/length(Z) plot(names(dZ),dZ,type="h") abline(v=1.645,col=2) names(dZ) z <- seq(1.4,1.8,by=0.1) p.limit <- 1-pnorm(z) p.true <- p.limit for(i in 1:length(z)) p.true[i] <- mean(Z >= z[i]) cbind(z,p.true,p.limit) z.c <- (ceiling(1.645*sqrt(n/4)+n/2)-0.5-n/2)/sqrt(n/4) 1-pnorm(z.c) mean(Z >= 1.645) # 2.3: Investigating the null distribution ######################################### n1 <- 100 n2 <- 100 p <- 0.3 d <- 0 iter <- 10^5 X1 <- rbinom(iter,2*n1,p+d) X2 <- rbinom(iter,2*n2,p-d) phat1 <- X1/(2*n1) phat2 <- X2/(2*n2) phat <- (X1 + X2)/(2*n1+2*n2) Z <- (phat1-phat2)/sqrt(phat*(1-phat)*(1/(2*n1) + 1/(2*n2))) mean(abs(Z) > 1.96) z <- seq(-4,4,length=100) plot(z,dnorm(z),type="l", main="Density of Z",xlab="Z",ylab="density") lines(density(Z),col=2) sim.Z <- function(n1,n2,p=0.5,d=0,iter=10^5) { X1 <- rbinom(iter,2*n1,p+d) X2 <- rbinom(iter,2*n2,p-d) phat1 <- X1/(2*n1) phat2 <- X2/(2*n2) phat <- (X1 + X2)/(2*n1+2*n2) Z <- (phat1-phat2)/sqrt(phat*(1-phat)*(1/(2*n1) + 1/(2*n2))) return(Z) } Z <- sim.Z(n1,n2,p=0.5,d=0,iter=10^5) mean(abs(Z) > 1.96) lines(density(Z),col=3) # Effect of sample size on sig.level n1 <- 100 n2 <- n1*c(0.5,1:5) sig.level <- NULL z <- seq(-4,4,length=100) plot(z,dnorm(z),type="l", main="Density of Z",xlab="Z",ylab="density") for(i in 1:length(n2)) { Z <- sim.Z(n1,n2[i],p=0.5,d=0,iter=10^5) sig.level[i] <- mean(abs(Z) > 1.96) lines(density(Z),col=(i+1)) } names(sig.level) <- n2 sig.level # Effect of p on sig.level n1 <- 100 n2 <- 100 p <- c(0.05,0.1,0.3,0.5) sig.level <- NULL z <- seq(-4,4,length=100) plot(z,dnorm(z),type="l", main="Density of Z",xlab="Z",ylab="density") for(i in 1:length(p)) { Z <- sim.Z(n1,n2,p=p[i],d=0,iter=10^5) sig.level[i] <- mean(abs(Z) > 1.96) lines(density(Z),col=(i+1)) } names(sig.level) <- p sig.level # 2.4: Investigate the power ############################ # Effect of sample size and d on power n1 <- 100 n2 <- n1*c(0.5,1:5) d <- seq(0,0.1,by=0.02) P <- matrix(nrow=length(n2),ncol=length(d)) for(i in 1:length(n2)) for(j in 1:length(d)) { Z <- sim.Z(n1,n2[i],p=0.5,d=d[j],iter=10^5) P[i,j] <- mean(abs(Z) > 1.96) } rownames(P) <- n2 colnames(P) <- d round(P,3) # Effect of p and d on power n1 <- 100 n2 <- n1 d <- seq(0,0.1,by=0.02) p <- c(0.05,0.1,0.3,0.5) P <- matrix(nrow=length(p),ncol=length(d)) for(i in 1:length(p)) for(j in 1:length(d)) { Z <- sim.Z(n1,n2,p=p[i],d=d[j],iter=10^5) P[i,j] <- mean(abs(Z) > 1.96) } rownames(P) <- p colnames(P) <- d round(P,3)