######### Nov 30, 2006, Yuan Ji ############# ## For simulation study, pi follows Berger's prior, taua=.2, taub=1, alpha=3, beta=40, metvar0=.1, metvar1=.1 qqplots look ok except for the tails. For alternative qqplot, several points were below the 45 degree line. For null qqplot, several points were above the 45 degree line. Seed = 1. set.seed(1) #0.7559792 system("rm -f postprob.txt pvalue.txt") ##### This program performs the Bayesian FDR based on test statistics ###### ####################################################### ## Define density function for chisq-distribution chiden <- function(x,k){ dgamma(x, k/2, 1/2) } ####################################################### ######################################################################## ## a function that generates rand. numbers from IG based on mean and sd## iggam <- function(n,mu,sig) 1/rgamma(n,(mu^2/sig^2)+2,mu*(1+mu^2/sig^2)) ####################################################### ####################################################### ## Define the density of f-statistics under H1 ### h1den <- function(x,tau,k){ res <- chiden(x/(1+tau), k)/(1+tau) return(res) } ####################################################### ####################################################### ## Define the density of f-statistics under H0 ### h0den <- function(x,tau,k){ res <- chiden(x/tau, k)/tau return(res) } ####################################################### ####################################################### ## compute the log-likelihood for tau1## h1like <- function(tau, J,k){ logh1 <- 0 logh1 <- logh1 + sum(J*log(h1den(f,tau, k))) # for(i in 1:G){ # logh1 <- logh1 + J[i] * log(h1den(f[i], tau, mm, (mm+1)*(n[i]-1))) # } res <- logh1 return(res) } ####################################################### ####################################################### ## compute the log-likelihood for tau0## h0like <- function(tau, J,k){ logh1 <- 0 logh1 <- logh1 + sum((1-J)*log(h0den(f,tau,k))) # for(i in 1:G){ # logh1 <- logh1 + (1-J[i]) * log(h1den(f[i], tau, mm, (mm+1)*(n[i]-1))) # } res <- logh1 return(res) } ####################################################### ####################################################### ## Define the log prior density of tau, which is an inverse gamma ## tauprior <- function(tau){ res <- alpha * log(beta) - (alpha+1)*log(tau) - beta/tau - lgamma(alpha) ##res <- -tau/beta + (alpha-1)*log(tau) - lgamma(alpha)- alpha*log(beta) ##res <- 1 return(res) } ####################################################### ###################### simulate data sets ####################### G <- 2000 ##G is the number of test statistics J <- rep(0,G) ## J is the vector of indicator variables nn <-16 ## assuming the number of replicates is 10 across all the genes n <- rep(nn,G) ## n is the vector the component of which is the number of replicate3 indic <- rep(0, G) f <- rep(0,G); t <- rep(0,G) pi <- .2 tau1 <- 5 tau0 <- -.5 ## size of tau is fixed at 100 for this simulation for(i in 1:G){ indic[i] <- rbinom(1, 1, pi) if(indic[i] == 0){f[i] <- rchisq(1,1)*(1+tau0)} if(indic[i] == 1){f[i] <- rchisq(1,1)*(1+tau1)} } flagset <- c(1:G)[indic==1] print(f[flagset]) print(" ") boxplot(f, f[flagset]) G <- length(f) kk <- 1 hist(f) numsig <- length(flagset) ################################################################# J <- rep(0,G) ## J is the vector of indicator variables alpha <- 3; beta <- 5 ## For Hedent and Alon, we need 1. taua <- 1; taub <- 1 ## for hedent data, we need 10 and .01. For Alon data, we need 10 and .05. metvar1 <- .05; metvar0 <- .05 ## metvar is the variance of the proposal normal distribution in the Metropolis step ############################################################################ ############# Begin MCMC ####################### ############################################################################ ##pi.val <- 1 - sum(cumsum(sort(f))/c(1:G)<(nn-1)/(nn-1-2))/G ## pi.val is the number of proportion burn.in <- 1000 thin.num <- 5 mc.num <- 5000 count <- 0 sim.tau0 <- rep(0, mc.num) sim.tau1 <- rep(0, mc.num) #sim.J <- matrix(rep(0,mc.num*G), nrow=mc.num) newJprob <- rep(0,G) sim.pi <- rep(0, mc.num) ############# initialize MCMC ################# sim.pi[1]<- .2 ## initial value of sim.pi should be close to what is observed at data sim.tau0[1] <- .1 sim.tau1[1] <- 1 ## initialize indicators -- the largest sim.pi[1] percent of f values gets 1, the smaller ones get 0 sfid <- sort(f,index.return=T)$ix ## Sort f numzero <- round(G*(1-sim.pi[1])) ## NUMBER OF 0's in the intial J zeroid <- sfid[1:numzero] ## these id's have 0 inital values for J simJp1 <- rep(1,G) simJp1[zeroid] <- 0 sumJ <- rep(0, G) #sim.J[1,] <- rbinom(G,1,.5) U0 <- runif(mc.num) U1 <- runif(mc.num) ## uniform random numbers for Metropolis acceptance acpt1 <- 0; acpt0 <- 0 keep.sim <- seq(burn.in, mc.num, thin.num) keep.lgth <- length(keep.sim) draw <- sample(seq(burn.in,mc.num,thin.num),1) ##mc.num<-3; for(sim in 1:(mc.num-1)){ ####################################################### ## Sample tau1 using Metropolis ## ##ss <- 0 ## while(ss==0){ epsilon <- rnorm(1,0, metvar1) tauprop1 <- sim.tau1[sim] * exp(epsilon) epsilon <- rnorm(1,0, metvar0) tauprop0 <- sim.tau0[sim] * exp(epsilon) ## since 1+\tau_0 ~ Gamma ##ss <- ss + (tauprop0 < tauprop1) ##print(c(tauprop0,tauprop1)) ## } if(tauprop0 < (1+tauprop1)){ metratio <- h1like(tauprop1, simJp1, kk) + tauprior(tauprop1) + log(tauprop1) - h1like(sim.tau1[sim] , simJp1, kk) - tauprior(sim.tau1[sim]) - log(sim.tau1[sim]) + h0like(tauprop0, simJp1, kk) + log(dgamma(tauprop0, taua, scale=taub)) + log(tauprop0) - h0like(sim.tau0[sim], simJp1, kk) - log(dgamma(sim.tau0[sim], taua, scale=taub)) - log(sim.tau0[sim]) prob <- min(1, exp(metratio)) tmp <- (U1[sim] <= prob) sim.tau1[sim+1] <- sim.tau1[sim]*(1 - tmp) + tauprop1*tmp sim.tau0[sim+1] <- sim.tau0[sim]*(1 - tmp) + tauprop0*tmp acpt1 <- acpt1 + tmp } else{ sim.tau1[sim+1] <- sim.tau1[sim]; sim.tau0[sim+1] <- sim.tau0[sim] } ##sim.tau1[sim+1] <- 20; sim.tau0[sim+1] <- .1 newJprob <- (h1den(f, sim.tau1[sim+1], kk) * sim.pi[sim]) / (h1den(f, sim.tau1[sim+1], kk) * sim.pi[sim] + h0den(f, sim.tau0[sim+1], kk) * (1-sim.pi[sim])) ##print(newJprob[4]) simJp1 <- rbinom(G,1,newJprob) sumJ <- sumJ + simJp1*((sim%%thin.num)==0) count <- count + ((sim%%thin.num)==0) ## sim.J[sim+1,] <- rbinom(G,1,newJprob) ## Sample pi directly ## tmpa <- sum(simJp1) ##sim.pi[sim+1] <- .2 sim.pi[sim+1] <- rbeta(1, 1+tmpa, 5.58+G-tmpa) ##sim.pi[sim+1] <- rbeta(1, .05*G/100+tmpa, .95*G/100+G-tmpa) if(sim==draw){ tau1.draw <- sim.tau1[sim] tau0.draw <- sim.tau0[sim] J.draw <- simJp1 } ####################################################### print(c(sim, sim.pi[sim], sim.tau1[sim], sim.tau0[sim], acpt1/sim))#, sim.tau0[sim], acpt0/sim)) } ########################################################################### ###### compute Bayesian FDR ########## ######################################## nullprob <- rep(0,G) ## null posterior prob. nullprob <- 1-sumJ/(count) ##for(i in 1:G){ ## nullprob[i] <- sum(1-sim.J[keep.sim,i])/keep.lgth ## } nullsort <- sort(nullprob) ## sorted null posterior prob. nullsort.id <- sort(nullprob, index.return=T)$ix ## sorted index print(mean(sim.pi[keep.sim])) write.table(cbind(nullsort, nullsort.id), "postprob.txt", append=T, row.names=F, col.names=F, quote=F) FDR <- c(0.01, .05, .1, .2) FDR <- seq(0, .8, .005) num.FDR <- length(FDR) num.gene <- rep(0, num.FDR) num.cut <- rep(0, num.FDR) for(cc in 1:num.FDR){ curr.FD <- nullsort[1] size <- 0 while((curr.FD/(size+1) <= FDR[cc])&(size <= G)){ size <- size + 1 curr.FD <- curr.FD + nullsort[size] } num.gene[cc] <- size-1 num.cut[cc] <- nullsort[size-1] } postscript("chisq_sim_FDR.ps",horiz=F) plot(num.gene, FDR, ylim=c(0,1), "l", lty=2, xlim=c(0,2000), ylab=" ", xlab="Number of significant tests") lines(c(1:G), nullsort, lty=1) legend(1000, .3, c("Posterior prob. of null", "Bayesian FDR"), lty=1:2) dev.off() ####################### model diagnostic ######################## #diag<-function(sed){ #set.seed(sed) #posttau1 <- sim.tau1[keep.sim] #posttau0 <- sim.tau0[keep.sim] #postJ <- sim.J[keep.sim,] #ltau <- length(posttau1) #draw <- sample(c(1:ltau),1) write.table(cbind(tau1.draw, tau0.draw, J.draw), "diagdraw.txt", quote = F, row.names = F, col.names = F) tmp<-read.table("diagdraw.txt") tau1.draw <- tmp[1,1] tau0.draw <- tmp[1,2] J.draw <- tmp$V3 samp.f <- f[J.draw==1] samp.nf <- f[J.draw==0] lsampn <- length(samp.nf) lsamp <- length(samp.f) #chiden2 <- function(x,df1=kk/2, df2=1/2, log=FALSE){ # dgamma(x/(1+tau1.draw), df1, df2, log)/(1+tau1.draw); #} chiden2 <- function(x){ dgamma(x/(1+tau1.draw), kk/2, 1/2)/(1+tau1.draw); } quant <- function(q){ res <- uniroot(function(y){integrate(chiden2, 0, y)$value-q}, c(0.00000000001, 100))$root return(res) } model.quan2 <- rep(0, lsamp) for(i in 1:lsamp){ model.quan2[i] <- quant(i/(1+lsamp)) } #chiden3 <- function(x,df1=kk/2, df2=1/2, log=FALSE){ # dgamma(x/(1+tau0.draw), df1, df2, log)/(1+tau0.draw); #} chiden3 <- function(x){ dgamma(x/(tau0.draw), kk/2, 1/2)/(tau0.draw); } quant0 <- function(q){ res <- uniroot(function(y){integrate(chiden3, 0, y)$value-q}, c(0.00000000001, 100))$root return(res) } model.nquan2 <- rep(0, lsampn) for(i in 1:lsampn){ model.nquan2[i] <- quant0(i/(1+lsampn)) } par(mfrow=c(2,3)) ##set.seed(2) ##lsampn <- length(samp.nf) ##nrep <- 1 ##rep.f <- matrix(rep(0, lsampn*nrep), nrow=nrep) ##for(i in 1:nrep){ ## rep.f[i,] <- sort(rf(lsampn, 1, 2*(nn-1))*(1+0.1)) ##} ##model.nquan2 <- apply(rep.f, 2, mean) ##lsamp <- length(samp.f) ##nrep <- 1 ##rep.f <- matrix(rep(0, lsamp*nrep), nrow=nrep) ##for(i in 1:nrep){ ## rep.f[i,] <- sort(rf(lsamp, 1, 2*(nn-1))*(1+20)) ##} ##model.quan2 <- apply(rep.f, 2, mean) plot(sim.pi) plot(density(sim.tau1),xlim=c(0,20)) lines(density(sim.tau0)) ##plot(log(model.quan2), log(sort(samp.f))) ##lines(c(-600,600),c(-600,600)) plot(sim.tau0) plot(sim.tau1) plot(model.quan2, sort(samp.f)) lines(c(-600,600),c(-600,600)) plot(model.nquan2, sort(samp.nf)) lines(c(-600,600), c(-600,600)) postscript("quant_sim_chisq.ps",horiz=F) par(mfrow=c(1,2)) plot(model.quan2, sort(samp.f), xlab="Model quantiles", ylab="Sample quantiles", main="Alternative") lines(c(-600,600),c(-600,600)) plot(model.nquan2, sort(samp.nf), xlab="Model quantiles", ylab="Sample quantiles", main="Null") lines(c(-100,100),c(-100,100)) dev.off() cid <- c(1:G)[indic==1] ncid <- c(1:G)[indic==0] postscript("sim_diag_chisq.ps", horiz=F) par(mfrow=c(2,2)) plot(density(sim.tau0[keep.sim], bw=.1), xlim=c(0,1), xlab="Posterior 1+Tau0", ylab="Kernel density", main=" ") plot(density(sim.tau1[keep.sim], bw=.5), xlim=c(1,8), xlab="Posterior Tau1", ylab="Kernel density", main=" ") boxplot(f[ncid], f[cid], names=c("Null", "Alternative"),ylab="Chi-square stat") boxplot(1-nullprob[ncid], 1-nullprob[cid], names=c("Null", "Alternative"), ylab="Post. prob. of Alternative") dev.off()