require(mvtnorm) n <- 10 k <- round(n/3) d <- 2 mu <- c(2.3,4.7) rho <- 0.5 Sig <- matrix(c(1,rho,rho,1),2,2) X.full <- rmvnorm(n,mu,Sig) X <- X.full miss.ind <- sample(1:n,k) for(i in miss.ind) { if(rbinom(1,1,0.5)==1) X[i,1] <- NA else X[i,2] <- NA } mu0 <- colMeans(X,na.rm=TRUE) X.temp <- X i.miss <- is.na(X[,1]) X.temp[i.miss,1] <- mu0[1]+rho*(X.temp[i.miss,2]-mu0[2]) i.miss <- is.na(X[,2]) X.temp[i.miss,2] <- mu0[2]+rho*(X.temp[i.miss,1]-mu0[1]) mu1 <- colMeans(X.temp) # EM for missing normal observations EM.missing <- function(X,mu0,Tol=0.001,max.iter=1000) { X.temp <- X iter <- 1 tol <- 100 while((iter<=max.iter)&(tol>Tol)) { i.miss <- is.na(X[,1]) X.temp[i.miss,1] <- mu0[1]+rho*(X.temp[i.miss,2]-mu0[2]) i.miss <- is.na(X[,2]) X.temp[i.miss,2] <- mu0[2]+rho*(X.temp[i.miss,1]-mu0[1]) mu1 <- colMeans(X.temp) tol <- max(abs(mu0-mu1)) mu0 <- mu1 iter <- iter+1 } return(list(mu=mu1,iter=iter-1,X=X.temp)) } # Investigate the speed of convergence of the EM n <- 100 k <- round(c(n*0.1,n*0.2,n*0.3)) d <- 2 mu <- c(2.3,4.7) rho <- 0.5 Sig <- matrix(c(1,rho,rho,1),2,2) n.iter <- matrix(NA,nrow=200,ncol=3) colnames(n.iter) <- paste(c(10,20,30),"%",sep="") for(i in 1:nrow(n.iter)) { X.full <- rmvnorm(n,mu,Sig) for(j in 1:ncol(n.iter)) { X <- X.full miss.ind <- sample(1:n,k[j]) for(m in miss.ind) { if(rbinom(1,1,0.5)==1) X[m,1] <- NA else X[m,2] <- NA } mu0 <- colMeans(X,na.rm=TRUE) n.iter[i,j] <- EM.missing(X,mu0)$iter } } colMeans(n.iter) # The average number of iterations required for convergence apply(n.iter,2,sd) # The standard deviation of the number of iterations required for convergenc # The larger the number of missing values the more iterations are required.