N <- 10 # sample size L <- 5 # number of loci K <- 2 # number of populations P.true <- matrix(rbeta(K*L,2,2),ncol=L,nrow=K) Z.true <- sample(1:K,N,rep=TRUE) X1 <- matrix(rbinom(N*L,1,P.true[Z.true,]),nrow=N,ncol=L) X2 <- matrix(rbinom(N*L,1,P.true[Z.true,]),nrow=N,ncol=L) P.true Z.true X1 X2 log.lik <- matrix(nrow=N,ncol=K) I <- diag(K) Z0 <- sample(1:K,N,rep=TRUE) # initiating the sampler Z0 A.count <- I[,Z0]%*%(X1+X2) a.count <- I[,Z0]%*%(2-X1-X2) P <- matrix(rbeta(K*L,1+A.count,1+a.count),ncol=L,nrow=K) for(k in 1:K) { P1 <- sweep(X1,2,P[k,],"*")+sweep(1-X1,2,1-P[k,],"*") P2 <- sweep(X2,2,P[k,],"*")+sweep(1-X2,2,1-P[k,],"*") log.lik[,k] <- rowSums(log(P1*P2)) } s <- rowMeans(log.lik) log.lik <- sweep(log.lik,1,s) Z1 <- apply(exp(log.lik),1,function(x) sample(1:K,1,rep=TRUE,prob=x)) Z1 gibbs.sample <- function(Z,K,X1,X2) { N <- nrow(X1) L <- ncol(X1) log.lik <- matrix(nrow=N,ncol=K) I <- diag(K) A.count <- I[,Z]%*%(X1+X2) a.count <- I[,Z]%*%(2-X1-X2) P <- matrix(rbeta(K*L,1+A.count,1+a.count),ncol=L,nrow=K) for(k in 1:K) { P1 <- sweep(X1,2,P[k,],"*")+sweep(1-X1,2,1-P[k,],"*") P2 <- sweep(X2,2,P[k,],"*")+sweep(1-X2,2,1-P[k,],"*") log.lik[,k] <- rowSums(log(P1*P2)) } s <- rowMeans(log.lik) log.lik <- sweep(log.lik,1,s) Z <- apply(exp(log.lik),1,function(x) sample(1:K,1,rep=TRUE,prob=x)) return(Z) } gibbs.cluster <- function(Z,K,X1,X2,n.iter=100,burn.down = 10) { Z.dist <- matrix(0,nrow=N,ncol=K) for(i in 1:burn.down) Z <- gibbs.sample(Z,K,X1,X2) for(i in (burn.down+1):n.iter) { Z <- gibbs.sample(Z,K,X1,X2) for(k in 1:K) Z.dist[,k] <- Z.dist[,k] + (Z==k) } return(Z.dist) } N <- 100 # sample size L <- 500 # number of loci K <- 5 # number of populations P.true <- matrix(rbeta(K*L,1,1),ncol=L,nrow=K) Z.true <- sample(1:K,N,rep=TRUE) X1 <- matrix(rbinom(N*L,1,P.true[Z.true,]),nrow=N,ncol=L) X2 <- matrix(rbinom(N*L,1,P.true[Z.true,]),nrow=N,ncol=L) Z0 <- sample(1:K,N,rep=TRUE) Z.dist <- gibbs.cluster(Z0,K,X1,X2,n.iter=100,burn.down = 10) Z.est <- apply(Z.dist,1,which.max) table(Z.true,Z.est)