libname t "c:\teaching"; options nodate nonumber ls=100 ps=55 error=2; title; /* Note SAS does NOT distinguish lower and upper cases. So variables "TA" and "ta" are the same variable. */ Data t.bmt; infile 'c:\teaching\bmt.dat'; input g T1 T2 d1 d2 d3 TA A TC C TP P Z1 Z2 Z3 Z4 Z5 Z6 Z7 Z8 Z9 Z10; run; /* Make sure the data is read correctly */ /* I comment it out to save output space */ /* proc print data=t.bmt; run; */ /* It is always a good idea to have a summary look of the data to check a few things: 1. For categorical variables (disease group and hospital in this example), make sure each category has a sufficient sample szie to do analysis. 2. For continuous variables, check their distributions. 3. pay attention to missing data. */ /* Use ods to generate html output */ ods html; title "BMT data"; proc freq data=t.bmt; table g d1 d2 d3 A C P Z3 Z4 Z5 Z6 Z8 Z9 Z10; run; /* By the output from above, we see that three hospitals have less than 30 patients in each. so it is not good to use a model stratified by hospital. We can use them as categorical variables. */ proc means data=t.bmt; /* better to use proc univariate to get more details */ var T1 T2 TA TC TP Z1 Z2 Z7 ; run; /* A simple model is to just include the three time-dependent covariates */ title "Model 1"; proc phreg data=t.bmt; model t2*d2(0)=acute chronic both/rl ; /* rl gives confidence intervals for hazard ratios */ acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; /* some of you forgot to use the indicates a and c as above. However, since all the observations with a=0 have ta=t2, and all (except 2 records with error) with c=0 have tc=t2, the following code should also be fine. */ title "Model 2"; proc phreg data=t.bmt; model t2*d2(0)=acute chronic both/rl ; acute=(t2 >= ta); chronic=(t2 >=tc); both=acute*chronic; run; data p; set t.bmt; if t2= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; /* The output for the above model has a problem for Z10, probabaly because it is perfectly correlated with a combination other covariates */ title "Model 4"; /* reduce some variables */ proc phreg data=t.bmt; model t2*d2(0)= g2 g3 z1 z2 z3 z4 z5 z6 z7 z8 z10 acute chronic both/risklimits; g2=(g=2); g3=(g=3); h2=(z9=2); h3=(z9=3); h4=(z9=4); acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; title "Model 5 by stepwise selection"; /* the first 5 variables in the list are forced to be included in the model. The first 3 are our interest. One of g2 and g3 is significant, then we must inlude both of them since they are actually from one variable (disease group) */ proc phreg data=t.bmt; model t2*d2(0)=acute chronic both g2 g3 z1 z2 z3 z4 z5 z6 z7 z8 z10 h2 h3 h4 /risklimits selection=stepwise include=5; g2=(g=2); g3=(g=3); h2=(z9=2); h3=(z9=3); h4=(z9=4); acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; title; ods html close; libname t "c:\teaching"; options nodate nonumber ls=100 ps=55 error=2; title; /* Note SAS does NOT distinguish lower and upper cases. So variables "TA" and "ta" are the same variable. */ Data t.bmt; infile 'c:\teaching\bmt.dat'; input g T1 T2 d1 d2 d3 TA A TC C TP P Z1 Z2 Z3 Z4 Z5 Z6 Z7 Z8 Z9 Z10; run; /* Make sure the data is read correctly */ /* I comment it out to say output space */ /* proc print data=t.bmt; run; */ /* It is always a good idea to have a summary look of the data to check a few things: 1. For categorical variables (disease group and hospital in this example), make sure each category has a sufficient sample szie to do analysis. 2. For continuous variables, check their distributions. 3. pay attention to missing data. */ /* Use ods to generate html output */ ods html; title "BMT data"; proc freq data=t.bmt; table g d1 d2 d3 A C P Z3 Z4 Z5 Z6 Z8 Z9 Z10; run; /* By the output from above, we see that three hospitals have less than 30 patients in each. so it is not good to use a model stratified by hospital. We can use them as categorical variables. */ proc means data=t.bmt; /* better to use proc univariate to get more details */ var T1 T2 TA TC TP Z1 Z2 Z7 ; run; /* A simple model is to just include the three time-dependent covariates */ title "Model 1"; proc phreg data=t.bmt; model t2*d2(0)=acute chronic both/rl ; /* rl gives confidence intervals for hazard ratios */ acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; /* some of you forgot to use the indicates a and c as above. However, since all the observations with a=0 have ta=t2, and all (except 2 records with error) with c=0 have tc=t2, the following code should also be fine. */ title "Model 2"; proc phreg data=t.bmt; model t2*d2(0)=acute chronic both/rl ; acute=(t2 >= ta); chronic=(t2 >=tc); both=acute*chronic; run; data p; set t.bmt; if t2= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; /* The output for the above model has a problem for Z10, probabaly because it is perfectly correlated with a combination other covariates */ title "Model 4"; /* reduce some variables */ proc phreg data=t.bmt; model t2*d2(0)= g2 g3 z1 z2 z3 z4 z5 z6 z7 z8 z10 acute chronic both/risklimits; g2=(g=2); g3=(g=3); h2=(z9=2); h3=(z9=3); h4=(z9=4); acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; title "Model 5 by stepwise selection"; /* the first 5 variables in the list are forced to be included in the model. The first 3 are our interest. One of g2 and g3 is significant, then we must inlude both of them since they are actually from one variable (disease group) */ proc phreg data=t.bmt; model t2*d2(0)=acute chronic both g2 g3 z1 z2 z3 z4 z5 z6 z7 z8 z10 h2 h3 h4 /risklimits selection=stepwise include=5; g2=(g=2); g3=(g=3); h2=(z9=2); h3=(z9=3); h4=(z9=4); acute=((t2 >= ta) and a); chronic=((t2 >=tc) and c); both=acute*chronic; run; title; ods html close;