############################## # Chap 10: Admixture mapping # ############################## # Function from previous chapters ################################# forward <- function(G.I,Tr,Pr) { n.samp <- dim(G.I)[1] n.mark <- dim(G.I)[2] F <- G.I F[,1,] <- sweep(G.I[,1,],2,Pr,"*") for (i in 2:n.mark) { F[,i,] <- G.I[,i,]*(F[,i-1,]%*%Tr) S <- F[,i,1] + F[,i,2] + F[,i,3] F[,i,] <- sweep(F[,i,],1,S,"/") } return(F) } backward <- function(G.I,Tr,Pr) { n.samp <- dim(G.I)[1] n.mark <- dim(G.I)[2] B <- G.I B[,n.mark,] <- 1 for (i in seq(n.mark-1,1)) { B[,i,] <- (G.I[,i+1,]*B[,i+1,])%*%t(Tr) S <- B[,i,1] + B[,i,2] + B[,i,3] B[,i,] <- sweep(B[,i,],1,S,"/") } return(B) } marginal.post <- function(F,B) { P <- F*B S <- P[,,1]+P[,,2]+P[,,3] P <- sweep(P,1:2,S,"/") return(P) } # End: Function from previous chapters ###################################### ncp.admixture <- function(alpha,delta,p1,p2,pi1=0.8, sig=1,n=1) { p.bar <- pi1*p1 + (1-pi1)*p2 a <- alpha + (1-2*p.bar)*delta d <- -2*delta v.a <- 2*p.bar*(1-p.bar)*a^2 v.d <- p.bar^2*(1-p.bar)^2*d^2 v.y <- v.a + v.d + sig^2 ncp.a <- a*(p1-p2)*sqrt(2*pi1*(1-pi1)*n/v.y) ncp.d <- d*(p1-p2)^2*pi1*(1-pi1)*sqrt(n/v.y) return(data.frame(ncp.a=ncp.a,ncp.d=ncp.d)) } p1 <- rep(c(0.7,0.9,0.99),3) p2 <- 0.5 alpha <- 1 delta <- rep(-1:1,rep(3,3)) cbind(p1,p2,delta,ncp.admixture(alpha,delta,p1,p2)) ###################################### # Estimating populations frequencies # ###################################### transition.matrix <- function(beta) { Tr <- matrix(0,3,3) Tr[1,] <- c((1-beta[1])^2,2*beta[1]*(1-beta[1]), beta[1]^2) Tr[2,] <- c(beta[2]*(1-beta[1]), prod(beta)+prod(1-beta),beta[1]*(1-beta[2])) Tr[3,] <- c(beta[2]^2,2*beta[2]*(1-beta[2]), (1-beta[2])^2) return(Tr) } stationary.probability <- function(beta) { p <- rev(beta)/sum(beta) Pr <- c(p[1]^2,2*p[1]*p[2],p[2]^2) return(Pr) } g <- 200/25 pi2 <- 0.8 delta <- 2 theta <- 0.5 - 0.5*exp(-0.02*delta) true.beta <- c(pi2,1-pi2)*(1-(1-theta)^g) Tr <- transition.matrix(true.beta) Pr <- stationary.probability(true.beta) Tr Pr sim.pop <- function(Pr,Tr,n.mark,n.subj) { pop <- matrix(ncol=n.mark,nrow=n.subj) states <- 0:(length(Pr)-1) pop[,1] <- sample(states,n.subj,rep=TRUE,prob=Pr) for (i in 2:ncol(pop)) { for(x in states) { initial.state <- (pop[,i-1] == x) prob <- Tr[x+1,] pop[initial.state,i] <- sample(states, sum(initial.state),rep=TRUE,prob=prob) } } return(pop) } geno.given.pop <- function(pop,f1,f2) { geno <- pop geno[pop==0] <- rbinom(sum(pop==0),2,f1) geno[pop==2] <- rbinom(sum(pop==2),2,f2) P.1 <- c((1-f2)*(1-f1),(1-f2)*f1+f2*(1-f1),f1*f2) geno[pop==1] <- sample(0:2,sum(pop==1),rep=TRUE,prob=P.1) return(geno) } prob.geno.given.pop <- function(geno,f1.hat,f2.hat) { n.subj <- nrow(geno) n.mark <- ncol(geno) G.nu <- array(dim=c(n.subj,n.mark,3)) G.0 <- G.1 <- G.2 <- geno for (g in 0:2) { G.0[geno==g] <- dbinom(g,2,f1.hat[geno==g]) G.2[geno==g] <- dbinom(g,2,f2.hat[geno==g]) } G.1[geno==0] <- (1-f1.hat[geno==0])*(1-f2.hat[geno==0]) G.1[geno==1] <- (1-f1.hat[geno==1])*f2.hat[geno==1] + (1-f2.hat[geno==1])*f1.hat[geno==1] G.1[geno==2] <- f1.hat[geno==2]*f2.hat[geno==2] G.nu[,,1] <- G.0 G.nu[,,2] <- G.1 G.nu[,,3] <- G.2 return(G.nu) } samp <- c(20,100,1000) f1 <- 0.75 f2 <- 0.25 n.mark <- round(140/delta)+1 locus <- ceiling(n.mark/2) n.rep <- 10^2 n.subj <- 10^3 pop.est.null <- matrix(nrow=3,ncol=length(samp)) colnames(pop.est.null) <-paste("Sample=",samp,sep="") rownames(pop.est.null) <-c("mean","var","cov") cor.est <- vector(mode="list",length=length(samp)) names(cor.est) <- paste("Sample=",samp,sep="") cor.est.pop <- cor.est for(i in 1:length(samp)) { n1 <- n2 <- samp[i] pop.est <- pop.est.1 <- pop <- NULL for (rep in 1:n.rep) { f1.hat <- rbinom(n.subj*n.mark,n1,f1)/n1 f1.hat <- matrix(f1.hat,nrow=n.subj,ncol=n.mark) f2.hat <- rbinom(n.subj*n.mark,n2,f2)/n2 f2.hat <- matrix(f2.hat,nrow=n.subj,ncol=n.mark) pop.temp <- sim.pop(Pr,Tr,n.mark,n.subj) pop <- c(pop,pop.temp[,locus]) geno <- geno.given.pop (pop.temp,f1,f2) G.pop <- prob.geno.given.pop(geno,f1.hat,f2.hat) P.F <- forward(G.pop,Tr,Pr) P.B <- backward(G.pop,Tr,Pr) P <- marginal.post(P.F,P.B) pop.est <- rbind(pop.est,P[,,2]+2*P[,,3]) pop.temp <- sim.pop(Pr,Tr,n.mark,n.subj) geno <- geno.given.pop (pop.temp,f1,f2) G.pop <- prob.geno.given.pop(geno,f1.hat,f2.hat) P.F <- forward(G.pop,Tr,Pr) P.B <- backward(G.pop,Tr,Pr) P <- marginal.post(P.F,P.B) pop.est.1 <- c(pop.est.1,P[,locus,2]+2*P[,locus,3]) } pop.est.null["mean",i] <- mean(pop.est[,locus]) pop.est.null["var",i] <- var(pop.est[,locus]) pop.est.null["cov",i] <- cov(pop.est.1,pop.est[,locus]) cor.est[[i]] <- cor(pop.est) cor.est.pop[[i]] <- cor(pop.est,pop) } round(pop.est.null,3) library(MASS) samp <- c(20,100,1000) z <- samp names(z) <- paste("n1=n2=",samp,sep="") n.rep <- 10^2 n.iter <- 10^4 for(i in 1:length(samp)) { Z.max <- NULL for(rep in 1:n.rep) { Z <- mvrnorm(n.iter,rep(0,n.mark),cor.est[[i]]) Z.max <- c(Z.max,apply(abs(Z),1,max)) } z[i] <- sort(Z.max)[n.rep*n.iter*(1-0.05/23)] } round(z,3) xi <-seq(3,8,by=0.1) n.rep <- 10 power.est <- matrix(nrow=length(xi),ncol=length(samp)) colnames(power.est) <- names(z) for (i in 1:length(samp)) { qtl <- ceiling(n.mark/2) rho <- (cor.est.pop[[i]]) Z <- mvrnorm(n.iter,rep(0,n.mark),cor.est[[i]]) for (j in 1:length(xi)) { ncp <- xi[j]*rho Z1 <- sweep(Z,2,ncp,"+") power.est[j,i] <- mean(apply(abs(Z1),1,max) >= z[i]) } } plot(range(xi),c(0,1),type="n",xlab="xi",ylab="power") gr = gray(0.5*(1:length(samp))/length(samp)) for(i in 1:length(samp)) { lines(xi,power.est[,i],col=gr[i],lwd=1.5) } legend(xi[1],1,bty="n",legend=names(z), lty=rep(1,length(z)),col=gr,lwd=1.5) ######################################################### # Estimating the admixture rates from genotypic markers # ######################################################### log.lik <- function(beta,MP.sum,JP.sum) { beta <- pmax(beta,0); beta <- pmin(beta,1) lt <- log(transition.matrix(beta)) lp <- log(stationary.probability(beta)) return(-sum(c(lp*MP.sum,lt*JP.sum))) } MLE.beta <- function(initial,pop) { M.s <- table(factor(pop[,1],0:2)) J.s <- table(factor(pop[,1:(n.mark-1)],0:2), factor(pop[,2:n.mark],0:2)) beta.hat <- optim(initial,log.lik,MP.s=M.s,JP.s=J.s)$par return(beta.hat) } pi2 <- 0.8 delta <- 2 theta <- 0.5 - 0.5*exp(-0.02*delta) true.beta <- c(pi2,1-pi2)*(1-(1-theta)^8) Tr <- transition.matrix(true.beta) Pr <- stationary.probability(true.beta) n.mark <- round(140/delta)+1 n.chr <- 23 pop <- sim.pop(Pr,Tr,n.mark,n.chr) initial.beta <- c(pi2,1-pi2)*(1-(1-theta)^6) MLE.beta(initial.beta,pop) true.beta joint.post <- function(F,B,G.pop,Tr) { n.samp <- dim(G.pop)[1] n.mark <- dim(G.pop)[2] n.state <- dim(G.pop)[3] GB <- G.pop*B; GB <- GB[,-1,] FF <- F[,-n.mark,] JP <- array(dim=c(n.samp,n.mark-1,n.state,n.state)) for (i in 1:n.state) for (j in 1:n.state) JP[,,i,j] <- FF[,,i]*GB[,,j]*Tr[i,j] S <- apply(JP,1:2,sum) JP <- sweep(JP,1:2,S,"/") return(JP) } EM.beta <- function(initial,G.pop,max.iter=1000,tol=0.001) { beta.new <- initial beta.old <- rep(-Inf,2) iter <- 1 while((itertol)) { beta.old <- beta.new Tr <- transition.matrix(beta.new) Pr <- stationary.probability(beta.new) P.F <- forward(G.pop,Tr,Pr) P.B <- backward(G.pop,Tr,Pr) MP <- marginal.post(P.F,P.B) MP.s <- apply(MP[,1,],2,sum) JP <- joint.post(P.F,P.B,G.pop,Tr) JP.s <- apply(JP,3:4,sum) beta.new <- optim(beta.new,log.lik, MP.s=MP.s,JP.s=JP.s)$par iter <- iter+1 } return(list(beta=beta.new,iter=iter)) } f1 <- 0.75 f2 <- 0.25 f1.true <- matrix(f1,nrow=n.chr,ncol=n.mark) f2.true <- matrix(f2,nrow=n.chr,ncol=n.mark) locus <- ceiling(n.mark/2) n.rep <- 10^2 beta.full <- beta.hat <- matrix(nrow=n.rep,ncol=2) iter <- vector(length=n.rep) pop.true <- pop.est <- NULL for(i in 1:n.rep) { pop <- sim.pop(Pr,Tr,n.mark,n.chr) beta.full[i,] <- MLE.beta(initial.beta,pop) geno <- geno.given.pop(pop,f1,f2) G.pop <- prob.geno.given.pop(geno,f1.true,f2.true) out <- EM.beta(initial.beta,G.pop) beta.hat[i,] <- out$beta iter[i] <- out$iter Tr.est <- transition.matrix(out$beta) Pr.est <- stationary.probability(out$beta) P.F <- forward(G.pop,Tr.est,Pr.est) P.B <- backward(G.pop,Tr.est,Pr.est) P <- marginal.post(P.F,P.B) pop.true <- rbind(pop.true,pop[,locus]) pop.est <- rbind(pop.est,P[,locus,2]+2*P[,locus,3]) } signif(true.beta,3) signif(apply(beta.full,2,mean),3) signif(apply(beta.hat,2,mean),3) signif(apply(beta.full,2,sd),3) signif(apply(beta.hat,2,sd),3) signif(c(mean(iter),sd(iter)),3) r.est <- NULL for (i in 1:n.chr) r.est[i] <- cor(pop.true[,i],pop.est[,i]) mean(r.est)