## The R code to implement the simulations for the paper "A Shrinkage Method for Testing the Hardy-Weinberg Equilibrium in Case-Control Studies" (Zang and Yuan, 2013) ## Four methods are considered, the pool case-control Chi^2, the control-only Chi^2, the LRT and the proposed shrinkage test ## Parameter setting ##################### ## k is the disease prevalence ## gamma1 is GRR1 ###### ## gamma2 is GRR2 ###### ## MAF is the minor allele frequency ## er is the deviation from HWE ## type is the HWD type ## PO is the prior odds for association ## r is the number of cases ## s is the number of controls ## nsim is the number of replicates ########################################## HWD=function(k,gamma1,gamma2,MAF,er,type,PO,r,s,nsim){ ## g is the genotype probabilities in the population ######### if(type==1){ g=c(MAF^2+er*MAF*(1-MAF),2*MAF*(1-MAF)*(1-er),1-2*MAF*(1-MAF)*(1-er)-MAF^2-er*MAF*(1-MAF)) } else {g=c(MAF^2*(1-er),2*MAF*(1-MAF)*(1-er)+er,1-2*MAF*(1-MAF)*(1-er)-er-MAF^2*(1-er))} ## f is the penetrances ########## f=c(gamma2*k/sum(gamma2*g[1]+gamma1*g[2]+g[3]),gamma1*k/sum(gamma2*g[1]+gamma1*g[2]+g[3]),k/sum(gamma2*g[1]+gamma1*g[2]+g[3])) ## gca is the genotype probabilities in the case group #### gca=f*g/k ## gco is the genotype probabilities in the control group ##### gco=(1-f)*g/(1-k) n=r+s phi=r/n w=0.005 ca=rmultinom(nsim,r,gca) co=rmultinom(nsim,s,gco) caco=rbind(ca,co) ## shrinkage test shrink=function(caco,x){ tab=matrix(caco,nrow=2,byrow=T) ca=tab[1,] co=tab[2,] nca=sum(ca) nco=sum(co) p1ca=ca[2]/nca p2ca=ca[3]/nca p1co=co[2]/nco p2co=co[3]/nco n=nca+nco phi=nca/n trendnu=sqrt(n)*((x*p1ca+p2ca)-(x*p1co+p2co)) trendde1=((x^2*p1ca+p2ca)-(x*p1ca+p2ca)^2)/phi trendde2=((x^2*p1co+p2co)-(x*p1co+p2co)^2)/(1-phi) trendde=sqrt(trendde1+trendde2) trend=trendnu/trendde BFA=exp(trend^2*n*w/(2*(n*w+1)))/sqrt(n*w+1) PPA=min(PO*BFA/(1+PO*BFA)+0.1,1) p=k*(p2ca+0.5*p1ca)+(1-k)*(p2co+0.5*p1co) paa=k*p2ca+(1-k)*p2co nu=paa-p^2 de1=k^2*(p^2*p1ca*(1-p1ca)/nca+(2*p-1)^2*p2ca*(1-p2ca)/nca-2*(2*p-1)*p*p1ca*p2ca/nca) de2=(1-k)^2*(p^2*p1co*(1-p1co)/nco+(2*p-1)^2*p2co*(1-p2co)/nco-2*(2*p-1)*p*p1co*p2co/nco) de=de1+de2 pp=phi*(p2ca+0.5*p1ca)+(1-phi)*(p2co+0.5*p1co) paap=phi*p2ca+(1-phi)*p2co nup=paap-pp^2 de1p=phi^2*(pp^2*p1ca*(1-p1ca)/nca+(2*pp-1)^2*p2ca*(1-p2ca)/nca-2*(2*pp-1)*pp*p1ca*p2ca/nca) de2p=(1-phi)^2*(pp^2*p1co*(1-p1co)/nco+(2*pp-1)^2*p2co*(1-p2co)/nco-2*(2*pp-1)*pp*p1co*p2co/nco) dep=de1p+de2p de1cov=k*phi*(p*pp*p1ca*(1-p1ca)/nca+(2*pp-1)*(2*p-1)*p2ca*(1-p2ca)/nca-(2*p-1)*pp*p1ca*p2ca/nca-(2*pp-1)*p*p1ca*p2ca/nca) de2cov=(1-k)*(1-phi)*(p*pp*p1co*(1-p1co)/nco+(2*pp-1)*(2*p-1)*p2co*(1-p2co)/nco-(2*p-1)*pp*p1co*p2co/nco-(2*pp-1)*p*p1co*p2co/nco) decov=de1cov+de2cov nusum=(1-PPA)*nup+PPA*nu desum=(1-PPA)^2*dep+PPA^2*de+2*(1-PPA)*PPA*decov return(nusum/(sqrt(desum))) } ## Chi^2 test uses both case and control chi2.pool=function(caco){ tab=matrix(caco,nrow=2,byrow=T) te=apply(tab,2,sum) nte=sum(te) p1=te[2]/nte p2=te[3]/nte pa=p2+0.5*p1 nu=p2-pa^2 return(sqrt(nte)*(p2-pa^2)/((1-pa)*(pa))) } ## Chi^2 test uses control only chi2.control=function(caco){ tab=matrix(caco,nrow=2,byrow=T) co=tab[2,] nco=sum(co) p1=co[2]/nco p2=co[3]/nco pa=p2+0.5*p1 nu=p2-pa^2 return(sqrt(nco)*(p2-pa^2)/((1-pa)*(pa))) } ## likelihood ratio test lrt=function(caco){ r0=caco[1] r1=caco[2] r2=caco[3] s0=caco[4] s1=caco[5] s2=caco[6] L1=function(x){ gamma1=x[1] gamma2=x[2] p1=x[3] p2=x[4] p0=1-p1-p2 f0=k/(p0+gamma1*p1+gamma2*p2) f1=gamma1*f0 f2=gamma2*f0 a0=r0*log(f0)+s0*log(1-f0)+(r0+s0)*log(p0) a1=r1*log(f1)+s1*log(1-f1)+(r1+s1)*log(p1) a2=r2*log(f2)+s2*log(1-f2)+(r2+s2)*log(p2) return(-(a0+a1+a2)) } L0=function(x){ gamma1=x[1] gamma2=x[2] p=x[3] p0=(1-p)^2 p1=2*(1-p)*p p2=p^2 f0=k/(p0+gamma1*p1+gamma2*p2) f1=gamma1*f0 f2=gamma2*f0 a0=r0*log(f0)+s0*log(1-f0)+(r0+s0)*log(p0) a1=r1*log(f1)+s1*log(1-f1)+(r1+s1)*log(p1) a2=r2*log(f2)+s2*log(1-f2)+(r2+s2)*log(p2) return(-(a0+a1+a2)) } LRT1=-optim(c(1,1,0.42,0.09),L1,method="Nelder-Mead")$value LRT0=-optim(c(1,1,0.3),L0,method="Nelder-Mead")$value LRT=2*(LRT1-LRT0) return(LRT) } tempp=apply(caco,2,chi2.pool) tempco=apply(caco,2,chi2.control) templ=apply(caco,2,lrt) tempsh=apply(caco,2,shrink,x=0.5) resultp=mean(tempp^2>qchisq(0.95,1)) resultco=mean(tempco^2>qchisq(0.95,1)) resultl=mean(templ>qchisq(0.95,1)) resultsh=mean(tempsh^2>qchisq(0.95,1)) return(list("pool case control test"=resultp,"control only test"=resultco,"lrt"=resultl,"shrinkage test"=resultsh)) } ## Power/Type I error for the pool case-control Chi^2, control-only Chi^2, LRT and shrinkage test HWD(k=0.01,gamma1=1,gamma2=1.5,MAF=0.3,er=0.05,type=1,PO=4,r=1000,s=1000,nsim=10000)