####### This is a simulation based on the mixture models in Ji et al. (2007) JRSS-C paper ############ ####### For detailed simulation setup and results, refer to the paper ################################ library(MCMCpack) ## like.pdf is the kernel from the poisson likelihood like.pdf <- function(mu,n.ij1,n.ij2,n.ij3,y){ ##res <- -mu*(exp(y)+exp(2*y)+exp(3*y)) + (y*(n.ij1+2*n.ij2+3*n.ij3))##*mu^(n.ij0+n.ij1+n.ij2) res <- -mu*(1+exp(y)+exp(2*y)) + (y*(n.ij2+2*n.ij3)) ##res <- -mu*(exp(y)+exp(2*y)+exp(3*y)) + (y*(n.ij1+2*n.ij2+3*n.ij3)) return(res) } post.a.mu <- function(y){ num <- -y*sum.mu/mu0 + (y-1)*(sum(log(mu))-n*log(mu0)) denom <- n*lgamma(y) - n*y*log(y) res <- num-denom -y/n.a +log(y)*(m.a-1) return(res) } ######################################################################### ### samp.beta.neg is the function to generate new numbers for negative beta using M-H samp.beta.neg <- function(cand.sd,ini.val,acc,mu){ neg.NN <- array(neg.NN, c(num.neg, 3)) for(i in 1:num.neg){ epsilon <- rnorm(1)*cand.sd[i] cand <- ini.val[i] + epsilon ratio <- log(dnorm(cand,mean=s1,sd=sqrt(tau1))) + like.pdf(mu[i], neg.NN[i,1],neg.NN[i,2],neg.NN[i,3],cand) - (log(dnorm(ini.val[i],mean=s1,sd=sqrt(tau1))) + like.pdf(mu[i],neg.NN[i,1],neg.NN[i,2],neg.NN[i,3],ini.val[i])) #print(ratio) if(ratio > log(runif(1))){ ini.val[i] <- cand acc[i] <- acc[i] + 1 } } return(ini.val,acc) } ### samp.beta.pos is the function to generate new numbers for positive beta using M-H samp.beta.pos <- function(cand.sd,ini.val,acc,mu){ pos.NN <- array(pos.NN, c(num.pos,3)) for(i in 1:num.pos){ epsilon <- rnorm(1)*cand.sd[i] cand <- ini.val[i] + epsilon ratio <- log(dnorm(cand,mean=s2,sd=sqrt(tau2))) + like.pdf(mu[i],pos.NN[i,1],pos.NN[i,2],pos.NN[i,3],cand) - (log(dnorm(ini.val[i],mean=s2,sd=sqrt(tau2))) + like.pdf(mu[i],pos.NN[i,1],pos.NN[i,2],pos.NN[i,3],ini.val[i])) #print(ratio) if(ratio > log(runif(1))){ ini.val[i] <- cand acc[i] <- acc[i] + 1 ##print(acc[i]) } } return(ini.val,acc) } ### samp.beta.pos is the function to generate new numbers for zero beta using M-H samp.beta.zero <- function(cand.sd,ini.val,acc,mu){ zero.NN <- array(zero.NN, c(num.zero, 3)) for(i in (1:num.zero)){ epsilon <- rnorm(1)*cand.sd[i] cand <- ini.val[i] + epsilon ratio <- log(dnorm(cand, 0, sd=sqrt(tau))) + like.pdf(mu[i],zero.NN[i,1],zero.NN[i,2],zero.NN[i,3],cand) - (log(dnorm(ini.val[i], 0,sd=sqrt(tau)))+like.pdf(mu[i],zero.NN[i,1],zero.NN[i,2],zero.NN[i,3],ini.val[i])) if(ratio > log(runif(1))){ ini.val[i] <- cand acc[i] <- acc[i] + 1 } } return(ini.val,acc) } ### sample.a.mu is the function to generate new numbers for a.mu (the prior var.) of mu using M-H sample.a.mu <- function(cand.sd, ini.val, acc){ cand <- -1 while(cand <= 0){ epsilon <- rnorm(1)*cand.sd sgn <- sign(ini.val + epsilon) epsilon <- sgn*epsilon cand <- ini.val + epsilon } ratio <- post.a.mu(cand) - post.a.mu(ini.val) if(ratio > log(runif(1))){ ini.val <- cand acc <- acc + 1 } return(ini.val, acc) } set.seed(0) ### Read in the data file #all <- read.delim("/home/yuanj/personal/research/phage/jessica/data/all6organs.txt", header=T, sep="\t") #name <- as.vector(all[,1]) ## "name" contains the names of the 3-mers ## round1,2,3 are the data for the 3 rounds #round1 <- as.matrix(all[, seq(2,17, by=3)]) #round2 <- as.matrix(all[, seq(3,18, by=3)]) #round3 <- as.matrix(all[, seq(4,19, by=3)]) ########################################### #n <- length(name) ## n is the number of 3-mers in the data #m <- dim(round1)[2] ## m is the number of organs ## NN combines 3 rounds into one array with dim 4,200 by 6 by 3 #NN <- array(rep(0, n*m*3), c(n,m,3)) #for(i in 1:dim(round1)[1]){ # for(j in 1:dim(round1)[2]){ # NN[i,j,1] <- round1[i,j] # NN[i,j,2] <- round2[i,j] # NN[i,j,3] <- round3[i,j] # } #} ################################### #cutoff <- 1 #sumNN <- matrix(rep(0,n*m)) #count <- 0 #for(i in 1:n){ # for(j in 1:m){ # count <- count + 1 # sumNN[count] <- sum(NN[i,j,]) # } #} #large.n <- length(sumNN[sumNN>cutoff]) #count <- 0 #new.NN <- matrix(rep(0, large.n*3),ncol=3) #new.i <- rep(0, large.n) #new.j <- rep(0, large.n) #for(i in 1:n){ # for(j in 1:m){ # if(sum(NN[i,j,])>cutoff){ # count <- count + 1 # new.NN[count,] <- NN[i,j,] ## new.NN is the new data # new.i[count] <- i ## new.i contains the row number # new.j[count] <- j ## new.j contains the col number # } # } #} #round1.cutoff <- 10 #n <- large.n #low.new <- c(1:n)[new.NN[,1] < round1.cutoff] #new.NN <- new.NN[low.new,] #n <- dim(new.NN)[1] ## simulating data following exactly the proposed model ## ## The model (N1,N2,N3) ~ Poi(\mu*exp(k*\beta)) ## Simulate 30 vectors of N's and three sets of (mu, beta) M <- 500 ## number of simulations result.prop <- array(rep(0,M*30),c(M,30)) result.pos.beta <- array(rep(0,M*30), c(M,30)) result.pos.mu <- array(rep(0,M*30), c(M,30)) for(sim in 1:M){ sim.n <- 30 sim.mu <- c(rep(0.5,10), rep(1,10), rep(4,10)) ## mu = .2, 2, 4 sim.beta <- c(rep(1,10), rep(0,10), rep(-1,10)) ## beta = 2, 0, -2 sim.k <- c(0,1,2) sim.new.NN <- matrix(rep(0,30*3), ncol=3) count <- 0 for(i in 1:sim.n){ count <- count + 1 for(j in 1:3){ poi.mean <- sim.mu[i]*exp(sim.k[j]*sim.beta[i]) sim.new.NN[count,j] <- rpois(1,poi.mean) }} new.NN <- sim.new.NN plot(c(1,2,3), c(-1,0,9), type="n") for(i in 1:(sim.n)){ lines(c(1:3),new.NN[i,],col=trunc((i-1)/10)+1) } #for(i in 11:20){ # new.NN[i,] <- c(2,2,2) #} n <- dim(new.NN)[1] ########################################################################### #### Start MCMC ##### #### First update the \beta's given all the other unknowns ### Initialize mu1, alpha1, mu2, alpha2, tau s1 <- -.7; s2 <- .7 tau1 <- 0.3^2; tau2 <- 0.3^2 m1 <- -1; m2 <- 1 eta1 <- 0.3; eta2 <- 0.3 ##eta1 <- 1; eta2 <- 1 alpha.tau1 <- 3; alpha.tau2 <- 3 beta.tau1 <- 1/(2*1); beta.tau2 <- 1/(2*1) tau <- .2^2; alpha.tau <- 3; beta.tau <- 1/(2*1) a.mu0 <-3; b.mu0 <- .5 ## prior para. for IG for mu0 m.a <- 3; n.a <- 1/2 ## prior para. for G for a.mu beta <- rep(0,n) ### Initialize lambda flag.round <- rep(0,n) #lambda <- array(rep(0,n*3),c(n,3)) #tmp.unif <- runif(n) #flag.round <- trunc(3*tmp.unif)+1 ## flag.round gives which component beta_ij belongs #for(i in 1:n){ # k <- flag.round[i] # lambda[i,k] <- 1 ## only one element of lambda equals 1 #} ### Initialize mu, mu0, a.mu, and beta ##mu <- c(rep(.2,10),rep(2,10),rep(3,10)); mu <- rep(1,n); mu.alpha <- sum(new.NN)-1 mu0 <- 1; a.mu <- 20 ## Initialize ppi's -- The mixing probabilities ppi <- rep(1/3, 2) ##beta <- c(rep(2,10),rep(0.01,10),rep(-1,10)) ##beta <- rep(0.1,n) ini.slope <- (log(new.NN[,3]+1)-log(new.NN[,1]+1))/2 pos.id <- c(1:n)[ini.slope > .2] pos.NN <- new.NN[pos.id,] num.pos <- length(pos.id) beta[pos.id] <- ini.slope[pos.id] neg.id <- c(1:n)[ini.slope < -.2] neg.NN <- new.NN[neg.id,] num.neg <- length(neg.id) beta[neg.id] <- ini.slope[neg.id] zero.id <- c(1:n)[(ini.slope>=-0.2)&(ini.slope<=0.2)] zero.NN <- new.NN[zero.id,] num.zero <- length(zero.id) beta[zero.id] <- ini.slope[zero.id] #acc.pos <- 0 #acc.zero <- 0 #acc.neg <- 0 acc.beta <- rep(0,n) acc.alpha1 <- 0 acc.alpha2 <- 0 acc.a.mu <- 0 acc.ppi <- 0 nn <- n BB <- 30000 chain.s1 <- rep(0,BB) chain.s2 <- rep(0,BB) chain.tau1 <- rep(0,BB) chain.tau2 <- rep(0,BB) chain.beta <- array(rep(0,n*BB), c(n,BB)) chain.lambda <- array(rep(0,n*3), c(n,3)) chain.mu1 <- rep(0,BB); chain.mu2 <- rep(0,BB) chain.tau <- rep(0,BB) chain.flag <- rep(0,BB) chain.alpha1 <- rep(0,BB); chain.alpha2 <- rep(0, BB) chain.a.mu <- rep(0,BB) chain.mu0 <- rep(0,BB) chain.mu <- array(rep(0,nn*BB), c(nn,BB)) chain.ppi1 <- rep(0,BB) chain.ppi2 <- rep(0,BB) chain.ppi3 <- rep(0,BB) mh.sd <- rep(0,n) for(i in 1:n){ mh.sd[i] <- sqrt(1* (1/(max(new.NN[,1]) + max(new.NN[,2]) + max(new.NN[,3]) + 1 - new.NN[i,1] - new.NN[i,2]- new.NN[i,3])^2))*10 #if(max(abs(new.NN[i,1]-new.NN[i,2]),abs(new.NN[i,3]-new.NN[i,2]))<2) mh.sd[i] <- mh.sd[i] ##mh.sd[i] <- (max(new.NN[i,])-min(new.NN[i,])+1)/100 ##if((new.NN[i,1]>new.NN[i,2])&(new.NN[i,1]>new.NN[i,3])) mh.sd[i] <- mh.sd[i] } ##mh.sd <- c(rep(0.5,20), rep(0.1,10)) alpha1.sd <- 1; alpha2.sd <- 1 a.mu.sd <- 2 ppi.sd <- 0.2 ### start the MCMC for(iter in 1:BB){ ##print(iter) ##if(iter==25) readline() pos.sd <- mh.sd[pos.id] zero.sd <- mh.sd[zero.id] neg.sd <- mh.sd[neg.id] ##pos.sd <- rep(.4, num.pos) ##zero.sd <- rep(.4, num.zero) ##neg.sd <- rep(.4, num.neg) ### update all the beta's in the postive component if(num.pos > 0){ ini.val <- abs(beta[pos.id]) samp.pos <- samp.beta.pos(pos.sd,ini.val,acc.beta[pos.id],mu[pos.id]) beta[pos.id] <- samp.pos$ini.val acc.beta[pos.id] <- samp.pos$acc } ### update all the beta's in the negative component if(num.neg > 0){ ini.val <- -abs(beta[neg.id]) samp.neg <- samp.beta.neg(neg.sd,ini.val,acc.beta[neg.id],mu[neg.id]) beta[neg.id] <- samp.neg$ini.val acc.beta[neg.id] <- samp.neg$acc } ### update all the beta's in the zero component if(num.zero > 0){ ini.val <- beta[zero.id] samp.zero <- samp.beta.zero(zero.sd,ini.val,acc.beta[zero.id],mu[zero.id]) beta[zero.id] <- samp.zero$ini.val acc.beta[zero.id] <- samp.zero$acc } ### update tau, whose prior is IG(alpha.tau, beta.tau), mean and var =4 if (num.zero > 0){ sum.zerobeta <- sum(beta[zero.id]*beta[zero.id]) tau <- 1/rgamma(1, shape=alpha.tau + num.zero/2, scale = (1/(1/beta.tau + sum.zerobeta/2))) ##tau <- 0.3^2 chain.tau[iter] <- tau } ### update s1 and s2 if((num.neg>0)&(num.pos>0)&(num.zero>0)){ bmean.neg <- mean(beta[neg.id]) bmean.pos <- mean(beta[pos.id]) bmean.zero <- mean(beta[zero.id]) B.neg <- eta1/(eta1 + tau1/num.neg) B.pos <- eta2/(eta2 + tau2/num.pos) s1 <- rnorm(1)*sqrt(B.neg*tau1/num.neg) + B.neg*bmean.neg + (1-B.neg)*m1 s2 <- rnorm(1)*sqrt(B.pos*tau2/num.pos) + B.pos*bmean.pos + (1-B.pos)*m2 #s1 <- -1 #s2 <- 1 chain.s1[iter] <- s1 chain.s2[iter] <- s2 } ##print(c(s1,s2)) ##readline() ### update tau1 and tau2 if (num.neg > 0){ tau1 <- 1/rgamma(1,alpha.tau1+num.neg/2, scale=1/(1/beta.tau1 + sum((beta[neg.id]-s1)^2))) tau1 <- .4^2 ## a simplified model that fixes the value of tau1 and tau2 chain.tau1[iter] <- tau1 } if (num.pos >0){ tau2 <- 1/rgamma(1,alpha.tau2+num.pos/2, scale=1/(1/beta.tau2 + sum((beta[pos.id]-s2)^2))) tau2 <- .4^2 ## a simplified model that fixes the value of tau1 and tau2 chain.tau2[iter] <- tau2 } ##print(c(tau1,tau2)) ##readline() ### update all the lambda's tmp.unif <- runif(n) num.sum <- ppi[1]*dnorm(beta,s1,sd=sqrt(tau1)) + ppi[2]*dnorm(beta,s2,sd=sqrt(tau2)) + (1-ppi[1]-ppi[2])*dnorm(beta, sd=sqrt(tau)) flag.round <- 1*(log(tmp.unif) < (log(ppi[1]*dnorm(beta,s1,sd=sqrt(tau1))) - log(num.sum))) + 2*((log(tmp.unif) > (log(ppi[1]*dnorm(beta,s1,sd=sqrt(tau1))) - log(num.sum))) & (log(tmp.unif) < (log(ppi[1]*dnorm(beta,s1,sd=sqrt(tau1)) + ppi[2]*dnorm(beta,s2,sd=sqrt(tau2))) - log(num.sum)))) + 3*(log(tmp.unif) > (log(ppi[1]*dnorm(beta,s1,sd=sqrt(tau1)) + ppi[2]*dnorm(beta,s2,sd=sqrt(tau2))) - log(num.sum))) ##print(c(beta[1],flag.round[1])) ##readline() ##print(iter) ##print(flag.round) chain.flag[iter] <- flag.round[2] neg.id <- c(1:n)[flag.round==1] zero.id <- c(1:n)[flag.round==3] pos.id <- c(1:n)[flag.round==2] pos.NN <- array(new.NN[pos.id,],c(length(pos.id),3)) num.pos <- length(pos.id) neg.NN <- array(new.NN[neg.id,],c(length(neg.id),3)) num.neg <- length(neg.id) zero.NN <- array(new.NN[zero.id,],c(length(zero.id),3)) num.zero <- length(zero.id) ##if(iter==5) readline() ##print(iter) ### update ppi the prior for ppi is Dir(30,30,30) prior.ppi <- 50 ppi <- rdirichlet(1,c(num.neg+prior.ppi, num.pos+prior.ppi, num.zero+prior.ppi)) chain.ppi1[iter] <- ppi[1] chain.ppi2[iter] <- ppi[2] chain.ppi3[iter] <- 1 - ppi[1] - ppi[2] ##print(ppi) ### update mu, mu0 for(i in 1:n){ mu[i] <- rgamma(1, sum(new.NN[i,])+a.mu, scale=1/(1+exp(beta[i]) + exp(2*beta[i]) + a.mu/mu0)) #mu[i] <- rgamma(1, sum(new.NN)+a.mu, scale=1/(sum(1+exp(beta) + exp(2*beta)) + a.mu/mu0)) } sum.mu <- sum(mu) #sum.mu <- sum(mu)/n mu0 <- 1/rgamma(1, a.mu*n + a.mu0, scale = 1/(1/b.mu0 + a.mu*sum.mu)) #mu0 <- 1/rgamma(1, a.mu + a.mu0, scale = 1/(1/b.mu0 + a.mu*sum.mu)) ### update a.mu using M-H #ini.val <- a.mu #samp.a.mu <- sample.a.mu(a.mu.sd, ini.val, acc.a.mu) #a.mu <- samp.a.mu$ini.val a.mu <- .1 #acc.a.mu <- samp.a.mu$acc chain.a.mu[iter] <- a.mu chain.mu0[iter] <- mu0 ##mu <- runif(n)/2 for(j in 1:n){ chain.beta[j,iter] <- beta[j] chain.mu[j,iter] <- mu[j] } #if(iter>1){ for(j in 1:3){ chain.lambda[,j] <- chain.lambda[,j] + (flag.round==j) # } } } out.id <- seq(1000,BB,by=5) leng.out <- length(out.id) print(sim) result.prop[sim,] <- apply((chain.beta[,out.id]>0),1,sum)/leng.out result.pos.beta[sim,] <- apply(chain.beta[,out.id], 1, mean) result.pos.mu[sim,] <- apply(chain.mu[,out.id], 1, mean) ##print(cbind(result.pos.beta[sim,],result.pos.mu[sim,])) } write.table(round(cbind(result.prop,result.pos.beta, result.pos.mu),3), "simresult3", row.names=F, col.names=F, quote=F, sep=" ") tmp<-read.table("simresult3") post.beta <- tmp[,31:60] post.mu <- tmp[,61:90] mean.beta <- apply(post.beta, 2, mean) sd.beta <- apply(post.beta, 2, sd) mean.mu <- apply(post.mu, 2, mean) sd.mu <- apply(post.mu, 2,sd) postscript("simbeta.ps",horiz=F) par(mfrow=c(2,1)) plot(c(-2,2), c(0,1.2), "n", xlab="Beta", ylab="Density") for(i in 1:9){ lines(density(post.beta[,i]),col=4) lines(density(post.beta[,10+i]),col=5) lines(density(post.beta[,20+i]),col=3) } #dev.off() #postscript("simmu.ps", horiz=F) #par(mfrow=c(3,3)) plot(c(0,7), c(0,1.2), "n", xlab="Mu", ylab="Density") for(i in 1:9){ lines(density(post.mu[,i]), col=4) lines(density(post.mu[,10+i]), col=5) lines(density(post.mu[,20+i]), col=3) } dev.off()