Please submit code along with solutions.

Question 1. (6 points)

Write a R (or python or matlab) function to compute the Breslow partial log likelihood for the Cox proportional hazards model (log of Equation 8.4.1 in textbook) for a one-dimensional parameter \(\beta\). This function should take \(\beta\) as well as the data as arguments. Test your function by finding the MLE on the kirc_small.RData data set using SLC7A11_ex as a predictor. You can use a grid search or an R optimizer such as optimize. Your MLE should match the parameter estimate here:

load("kirc_small.RData")
library(survival)
fit <- coxph(Surv(time,death_obs)~SLC7A11_ex,data=kirc_small,ties="breslow")
coef(fit)
##  SLC7A11_ex 
## 0.002459945

You may use the solution from the above code to guide your starting value for the grid search or optimize function.

Solution

## simplify variable names
ti <- kirc_small$time
del <- kirc_small$death_obs
z <- kirc_small$SLC7A11_ex

## put in time order, with time ties broken by censoring (deaths go first)
ords <- order(ti,1-del)
ti <- ti[ords]
del <- del[ords]
z <- z[ords]

## the log likelihood is
coxph_log_like <- function(beta,ti,del,z){
  ## standardize z and beta for numerical stability, not actually necessary
  sc <- sd(z)
  z <- (z - mean(z)) / sc
  beta <- beta*sc
  ## compute sum_j in R exp(beta z_j) for all deaths
  sumexp_betaz <- log(rev(cumsum(rev(exp(beta*z)))))[del==1]
  ## only consider deaths
  ti <- ti[del==1]
  z <- z[del==1]
  ## sum of covariates at each unique death time
  s <- aggregate(z,by=list(ti),FUN="sum")
  beta_s <- (beta*s[,2])
  ## find number of deaths at each time
  nd <- aggregate(z,by=list(ti),FUN="length")
  ## only keep first death time
  to_keep <- !duplicated(ti)
  return(sum(beta_s - nd[,2]*sumexp_betaz[to_keep]))
}

Now compute likelihood on grid centered at value found by coxph function. If you did not know this value, you would have to use a larger grid, adjusting the endpoints until you found a maximum.

gr <- seq(coef(fit)-.001,coef(fit)+.001,length.out=1000)
ll <- vapply(gr,function(x){coxph_log_like(x,ti,del,z)},c(0))

ix_mle <- which.max(ll)

plot(gr,ll,xlab="beta",ylab="log partial likelihood")
abline(v=gr[ix_mle],lwd=3)
abline(v=coef(fit),col='red')

## coefficient estimates match
gr[ix_mle]
## [1] 0.002458944
coef(fit)
##  SLC7A11_ex 
## 0.002459945
## likelihood at max match
max(fit$loglik)
## [1] -516.8141
max(ll)
## [1] -516.8141

Question 2. (8 points=2 points/part)

Complete Question 8.10 in textbook (on page 291).

Solution to a)

MTX appears to have limited to no effect on AGVHD time to infection.

library(KMsurv)
data(bmt)

bmt$MTX <- as.factor(bmt$z10)

fit <- coxph(Surv(ta,da)~z10,data=bmt,ties="breslow")

fit
## Call:
## coxph(formula = Surv(ta, da) ~ z10, data = bmt, ties = "breslow")
## 
##        coef exp(coef) se(coef)      z     p
## z10 -0.2990    0.7416   0.4655 -0.642 0.521
## 
## Likelihood ratio test=0.43  on 1 df, p=0.5099
## n= 137, number of events= 26
## relative risk point estimate
exp(coef(fit))
##      z10 
## 0.741584
fit$var
##           [,1]
## [1,] 0.2167283

A 95% confidence interval for relative risk is:

se <- sqrt(fit$var)
## Thus a 95% confidence interval is approximately:
exp(fit$coef + 2*se) ## upper
##          [,1]
## [1,] 1.881586
exp(fit$coef - 2*se) ## lower
##           [,1]
## [1,] 0.2922783

Solution to b)

Adjusting for disease does not change the fact that MTX appears to have limited to no effect on AGVHD time to infection.

disease <- c("ALL","AML Low","AML High")
bmt$disease <- as.factor(disease[bmt$group])
fit <- coxph(Surv(ta,da)~MTX+disease,data=bmt,ties="breslow")
fit
## Call:
## coxph(formula = Surv(ta, da) ~ MTX + disease, data = bmt, ties = "breslow")
## 
##                    coef exp(coef) se(coef)      z     p
## MTX1            -0.3543    0.7017   0.4749 -0.746 0.456
## diseaseAML High -0.6179    0.5391   0.5317 -1.162 0.245
## diseaseAML Low  -0.1591    0.8529   0.4581 -0.347 0.728
## 
## Likelihood ratio test=1.95  on 3 df, p=0.5838
## n= 137, number of events= 26

Solution to c)

There does not appear to be any interaction between MTX and disease group on AGVHD time to infection.

bmt$mtx_disease <- interaction(bmt$MTX,bmt$disease)
fit <- coxph(Surv(ta,da)~mtx_disease,data=bmt,ties="breslow")
fit
## Call:
## coxph(formula = Surv(ta, da) ~ mtx_disease, data = bmt, ties = "breslow")
## 
##                           coef exp(coef) se(coef)      z     p
## mtx_disease1.ALL      -0.39933   0.67077  0.70716 -0.565 0.572
## mtx_disease0.AML High -0.82119   0.43991  0.64553 -1.272 0.203
## mtx_disease1.AML High -0.45363   0.63532  0.81656 -0.556 0.579
## mtx_disease0.AML Low  -0.09784   0.90679  0.51650 -0.189 0.850
## mtx_disease1.AML Low  -1.08046   0.33944  1.08020 -1.000 0.317
## 
## Likelihood ratio test=3.01  on 5 df, p=0.6989
## n= 137, number of events= 26

Solution to d)

Forward stepwise selection results in Age and Disease being added to the model along with the covariate of primary interest, MTX.