## The R code to implement the simulations for the paper "A Shrinkage Method for Testing the Hardy-Weinberg Equilibrium in Case-Control Studies" (Zang and Yuan, 2013)
## Four methods are considered, the pool case-control Chi^2, the control-only Chi^2, the LRT and the proposed shrinkage test

## Parameter setting #####################
## k is the disease prevalence 
## gamma1 is GRR1 ######
## gamma2 is GRR2 ######
## MAF is the minor allele frequency
## er is the deviation from HWE
## type is the HWD type
## PO is the prior odds for association 
## r is the number of cases 
## s is the number of controls 
## nsim is the number of replicates 
##########################################


HWD=function(k,gamma1,gamma2,MAF,er,type,PO,r,s,nsim){
## g is the genotype probabilities in the population #########
if(type==1){
g=c(MAF^2+er*MAF*(1-MAF),2*MAF*(1-MAF)*(1-er),1-2*MAF*(1-MAF)*(1-er)-MAF^2-er*MAF*(1-MAF))
} else {g=c(MAF^2*(1-er),2*MAF*(1-MAF)*(1-er)+er,1-2*MAF*(1-MAF)*(1-er)-er-MAF^2*(1-er))}
## f is the penetrances ##########
f=c(gamma2*k/sum(gamma2*g[1]+gamma1*g[2]+g[3]),gamma1*k/sum(gamma2*g[1]+gamma1*g[2]+g[3]),k/sum(gamma2*g[1]+gamma1*g[2]+g[3]))
## gca is the genotype probabilities in the case group ####
gca=f*g/k
## gco is the genotype probabilities in the control group #####
gco=(1-f)*g/(1-k)
n=r+s
phi=r/n
w=0.005
ca=rmultinom(nsim,r,gca)
co=rmultinom(nsim,s,gco)
caco=rbind(ca,co)

## shrinkage test
shrink=function(caco,x){
  tab=matrix(caco,nrow=2,byrow=T)
  ca=tab[1,]
  co=tab[2,]
  nca=sum(ca)
  nco=sum(co)
  p1ca=ca[2]/nca
  p2ca=ca[3]/nca
  p1co=co[2]/nco
  p2co=co[3]/nco
  n=nca+nco
  phi=nca/n
  trendnu=sqrt(n)*((x*p1ca+p2ca)-(x*p1co+p2co))
  trendde1=((x^2*p1ca+p2ca)-(x*p1ca+p2ca)^2)/phi
  trendde2=((x^2*p1co+p2co)-(x*p1co+p2co)^2)/(1-phi)
  trendde=sqrt(trendde1+trendde2)
  trend=trendnu/trendde
  BFA=exp(trend^2*n*w/(2*(n*w+1)))/sqrt(n*w+1)
  PPA=min(PO*BFA/(1+PO*BFA)+0.1,1)
  p=k*(p2ca+0.5*p1ca)+(1-k)*(p2co+0.5*p1co)
  paa=k*p2ca+(1-k)*p2co
  nu=paa-p^2
  de1=k^2*(p^2*p1ca*(1-p1ca)/nca+(2*p-1)^2*p2ca*(1-p2ca)/nca-2*(2*p-1)*p*p1ca*p2ca/nca)
  de2=(1-k)^2*(p^2*p1co*(1-p1co)/nco+(2*p-1)^2*p2co*(1-p2co)/nco-2*(2*p-1)*p*p1co*p2co/nco) 
  de=de1+de2
  pp=phi*(p2ca+0.5*p1ca)+(1-phi)*(p2co+0.5*p1co)
  paap=phi*p2ca+(1-phi)*p2co
  nup=paap-pp^2
  de1p=phi^2*(pp^2*p1ca*(1-p1ca)/nca+(2*pp-1)^2*p2ca*(1-p2ca)/nca-2*(2*pp-1)*pp*p1ca*p2ca/nca)
  de2p=(1-phi)^2*(pp^2*p1co*(1-p1co)/nco+(2*pp-1)^2*p2co*(1-p2co)/nco-2*(2*pp-1)*pp*p1co*p2co/nco) 
  dep=de1p+de2p
  de1cov=k*phi*(p*pp*p1ca*(1-p1ca)/nca+(2*pp-1)*(2*p-1)*p2ca*(1-p2ca)/nca-(2*p-1)*pp*p1ca*p2ca/nca-(2*pp-1)*p*p1ca*p2ca/nca)
  de2cov=(1-k)*(1-phi)*(p*pp*p1co*(1-p1co)/nco+(2*pp-1)*(2*p-1)*p2co*(1-p2co)/nco-(2*p-1)*pp*p1co*p2co/nco-(2*pp-1)*p*p1co*p2co/nco)
  decov=de1cov+de2cov
  nusum=(1-PPA)*nup+PPA*nu
  desum=(1-PPA)^2*dep+PPA^2*de+2*(1-PPA)*PPA*decov
  return(nusum/(sqrt(desum)))
}

##  Chi^2 test uses both case and control
chi2.pool=function(caco){
  tab=matrix(caco,nrow=2,byrow=T)
  te=apply(tab,2,sum)
  nte=sum(te)
  p1=te[2]/nte
  p2=te[3]/nte
  pa=p2+0.5*p1
  nu=p2-pa^2
  return(sqrt(nte)*(p2-pa^2)/((1-pa)*(pa)))
}





## Chi^2 test uses control only
chi2.control=function(caco){
  tab=matrix(caco,nrow=2,byrow=T)
  co=tab[2,]
  nco=sum(co)
  p1=co[2]/nco
  p2=co[3]/nco  
  pa=p2+0.5*p1
  nu=p2-pa^2
  return(sqrt(nco)*(p2-pa^2)/((1-pa)*(pa)))
}


## likelihood ratio test
lrt=function(caco){
  r0=caco[1]
  r1=caco[2]
  r2=caco[3]
  s0=caco[4]
  s1=caco[5]
  s2=caco[6]
  L1=function(x){
  gamma1=x[1]
  gamma2=x[2]
  p1=x[3]
  p2=x[4]
  p0=1-p1-p2
  f0=k/(p0+gamma1*p1+gamma2*p2)
  f1=gamma1*f0
  f2=gamma2*f0
  a0=r0*log(f0)+s0*log(1-f0)+(r0+s0)*log(p0)
  a1=r1*log(f1)+s1*log(1-f1)+(r1+s1)*log(p1)
  a2=r2*log(f2)+s2*log(1-f2)+(r2+s2)*log(p2)
  return(-(a0+a1+a2))
}
  L0=function(x){
  gamma1=x[1]
  gamma2=x[2]
  p=x[3]
  p0=(1-p)^2
  p1=2*(1-p)*p
  p2=p^2
  f0=k/(p0+gamma1*p1+gamma2*p2)
  f1=gamma1*f0
  f2=gamma2*f0
  a0=r0*log(f0)+s0*log(1-f0)+(r0+s0)*log(p0)
  a1=r1*log(f1)+s1*log(1-f1)+(r1+s1)*log(p1)
  a2=r2*log(f2)+s2*log(1-f2)+(r2+s2)*log(p2)
  return(-(a0+a1+a2))
}
  LRT1=-optim(c(1,1,0.42,0.09),L1,method="Nelder-Mead")$value
  LRT0=-optim(c(1,1,0.3),L0,method="Nelder-Mead")$value
  LRT=2*(LRT1-LRT0)
  return(LRT)
}



tempp=apply(caco,2,chi2.pool)
tempco=apply(caco,2,chi2.control)
templ=apply(caco,2,lrt)
tempsh=apply(caco,2,shrink,x=0.5)

resultp=mean(tempp^2>qchisq(0.95,1))
resultco=mean(tempco^2>qchisq(0.95,1))
resultl=mean(templ>qchisq(0.95,1))
resultsh=mean(tempsh^2>qchisq(0.95,1))

return(list("pool case control test"=resultp,"control only test"=resultco,"lrt"=resultl,"shrinkage test"=resultsh))
}


## Power/Type I error for the pool case-control Chi^2, control-only Chi^2, LRT and shrinkage test

HWD(k=0.01,gamma1=1,gamma2=1.5,MAF=0.3,er=0.05,type=1,PO=4,r=1000,s=1000,nsim=10000)