library(KMsurv) ## 8.11 #data(pneumon) #str(pneumon) #summary(pneumon) pneumon$Z <- ifelse(pneumon$wmonth == 0, 0, 1) pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z, method="efron")) #hist(pneumon$chldage) #hist(pneumon$urban) #pneumon[,c("chldage","hospital")] tongue$type[tongue$type == 2] <- 0 ## a) ## Test hypothesis: ## Score test: pneumon.sum$sctest ## Likelihood test: pneumon.sum$logtest ## Wald test pneumon.sum$waldtest ## The 3 tests show that beta is significantly different from zero ## Beta coefficient estimation and hazard ratio: pneumon.sum$coef ## b) Adding other factors ## Mother age (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+pneumon$mthage, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Urban environment (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+pneumon$urban, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Alcohol consumption alcohol.cat <- as.factor(pneumon$alcohol) (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+alcohol.cat, method="efron"))) pneumon.sum$waldtest ## p-value =.013 Highly significant ## (we considered it a categorical variable. If considered a ratio variable ## it is highly significant ~=0.001) ## Cigarette smoke.cat <- as.factor(pneumon$smoke) (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+smoke.cat, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Region region.cat <- as.factor(pneumon$region) (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+region.cat, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Birth weight (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+pneumon$bweight, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Poverty (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+pneumon$poverty, method="efron"))) pneumon.sum$waldtest ## p-value ~=.001 Highly significant ## Race race.cat <- as.factor(pneumon$race) (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+race.cat, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant ## Siblings (pneumon.sum <- summary(coxph(Surv(pneumon$chldage,pneumon$hospital)~pneumon$Z+pneumon$nsibs, method="efron"))) pneumon.sum$waldtest ## p-value <.001 Highly significant #c# Model selection via AIC #str(pneumon.sum) pneumon$alcohol.cat <- as.factor(pneumon$alcohol) completemodel<-coxph(Surv(pneumon$chldage,pneumon$hospital)~ pneumon$Z+ pneumon$mthage+ pneumon$urban+ alcohol.cat+ smoke.cat+ region.cat+ pneumon$bweight+ pneumon$poverty+ race.cat+ pneumon$nsibs, method="efron") summary(completemodel) model_aic<-step(completemodel) summary(model_aic) ############################# #9.4 Bone marrow transplant data(bmt) time<-bmt$t2 status<-bmt$d2 attach(bmt) id<-1:length(status) ##Coding the data to consider time dependant variables time_coding<-function(i){ dataset<-matrix(0,ncol=7+11,nrow=1+da[i]+dc[i]) dataset[,1]<-i if(ta[i]>t2[i]) da[i]=0 #If so the covariate change after the event (relapsing) if(tc[i]>t2[i]) dc[i]=0 #happened if(da[i]+dc[i]==2){ start0<-0 index<-ifelse(ta[i]=1975") survival<-Surv(larynx$time,larynx$delta) stageII<-ifelse(larynx$stage==2,1,0) stageIII<-ifelse(larynx$stage==3,1,0) stageIV<-ifelse(larynx$stage==4,1,0) model <- coxph(survival~stageII+stageIII+stageIV+larynx$age+strata(stratum), method="breslow") summary(model)$coef #When stratifying, stages III and IV turn out to be significant #In contrast, when not stratifying (table 8.1 in the textbook) #just stage IV is significant. The last result is erroneous because the in such a case the proportional hazard rates assumption does not hold. # #b# Likelihood ratio test effects of the stage the same accross strata model$loglik #-2log(likelihood(same_coefiicents)) (loglike_samecoefficients<-2*model$loglik[2]) #-2log(likelihood(different_coefiicents)) model_stratum1<-coxph(survival~stageII+stageIII+stageIV+larynx$age,subset=which(stratum=="<1975"), method="breslow") model_stratum2<-coxph(survival~stageII+stageIII+stageIV+larynx$age,subset=which(stratum==">=1975"), method="breslow") (loglike_diffcoefficients<-2*(model_stratum1$loglik[2]+model_stratum2$loglik[2])) # Chi square test #Statistics (xi<-loglike_diffcoefficients-loglike_samecoefficients) #P-value of the likelihood test (1-pchisq(xi,4)) #c# The same using tje Wald test #models with different covariates in each strata s<-ifelse(stratum=="<1975",0,1) n<-length(stageII) covariates<-matrix(0,nrow=n,ncol=8) covariates<-matrix(0,nrow=n,ncol=8) for(i in 1:n) covariates[i,(s[i]*4+1):((s[i]+1)*4)]=c(stageII[i],stageIII[i],stageIV[i],larynx$age[i]) model_strata<- coxph(survival~covariates+strata(stratum),method="breslow") contrast=rbind(c(1,0,0,0,-1,0,0,0), c(0,1,0,0,0,-1,0,0), c(0,0,1,0,0,0,-1,0), c(0,0,0,1,0,0,0,-1) ) contrast_beta_0 <-matrix(0,4) beta <-model_strata$coefficients contrast_beta <-contrast%*%beta #Wald statistic (wald_stat <-t(contrast_beta-contrast_beta_0)%*% solve(contrast%*%model_strata$var%*%t(contrast))%*% (contrast_beta-contrast_beta_0) ) #pvalue (1-pchisq(wald_stat,4)) #There is no evidence to reject H_0. That is, we can assume that the #coefficients are the same in both strata.