-
Notifications
You must be signed in to change notification settings - Fork 0
/
WKMEDS OSM_Section 13_longitudinal data examples.R
460 lines (324 loc) · 17.6 KB
/
WKMEDS OSM_Section 13_longitudinal data examples.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
#############################################################################
##### ICES Workshop on Methods for Estimating Discard Survival (WKMEDS) #####
##### Online Supporting Materials for the Cooperative Research Report #####
##### #####
##### Section 13.2 - Analysis of Longitudinal Data #####
##### #####
##### Version 2.1 1st July 2021 #####
#############################################################################
## Original code by H Benoît 2014-12-08, revised 2016-06-07, 2020-12-17
## Edited by M Breen 2021-06-24 and 2012-07-01
# This code supports the examples presented in section 13.2 of
# the WKMEDS Cooperative Research Report (CRR).
### -------------------------------------------------------------------- ###
## Prep & Required Packages
rm(list=ls())
library(survival)
library(MASS)
## Set Working Directory
# This code is written assumming the user is running it and the supporting data from a working directory.
setwd("E:/IMR/UM/ICES WKMEDS/CRR Report/WKMEDS CRR Final Edit/Online Support Materials/Chapter_13_Modelling Survival Data/Longitudinal data examples")
#!=========================================================================!#
#!======= Section 13.2.2 ======!#
#!======= Non-parametric method: Kaplan-Meier survival model ======!#
#!=========================================================================!#
#!=========================================================================!#
#!==== Info Box 13.5 - Kaplan-Meier (KM) model - Worked example =====!#
#!=========================================================================!#
# This worked example presents Kaplan-Meier analysis of the mortality data for
# cod in excellent and poor condition (i.e. vitality), from Benoît et al. (2012)
### Read in data
cod=read.table("sGSL_cod.csv", header=TRUE,sep=",")
## select the results for the two condition codes of interest
codKM=cod[cod$condition %in% c(1,3),]
### Estimate the Kaplan-Meier survival functions
km_est <- survfit(Surv(hours, (1-censored)) ~ condition, data=codKM, type="kaplan-meier")
# note - the function uses an indicator for survival rather than for censoring
# hence the use of (1-censored)
## log-rank test for differences between groups
survdiff(Surv(hours, (1-censored)) ~ condition, data=codKM)
## Wilcoxon test for differences between groups
survdiff(Surv(hours, (1-censored)) ~ condition, data=codKM,rho=1)
### -------------------------------------------------------------------- ###
### Plot the KM survival functions
# windows()
# tiff("Figure 13_7.tif", width = 4, height = 4, units = "in", res = 600, pointsize = 10, compression="lzw")
plot(km_est, main="Kaplan-Meier estimate for Excellent (black) and Poor (grey) vitality classes",lty=2,cex=1,cex.main=0.5,
xlab="Hours", ylab="Survival function", col= c("black","darkgrey"))
## Plot the confidence intervals
# first create a matrix to index the correct rows to use from the km_est object
rm=matrix(nrow=2,ncol=2)
rm[1,1]=1
rm[1,2]=as.numeric(km_est$strata[1])
rm[2,1]=rm[1,2]+1
rm[2,2]=rm[1,2]+as.numeric(km_est$strata[2])
censor=km_est$n.censor
censor[censor>1]=1
## get the data from those rows and plot as a semi-transparent polygon
cond=1
km_est1=as.data.frame(cbind(time=km_est$time[rm[cond,1]:rm[cond,2]],upper=km_est$upper[rm[cond,1]:rm[cond,2]],lower=km_est$lower[rm[cond,1]:rm[cond,2]]))
with(km_est1,polygon(c(time, time[length(time):1]), c(lower, upper[length(time):1]), col = rgb(0, 0, 0, alpha = 0.35), border = NA))
points(km_est$time[c(rm[cond,1]:rm[cond,2])],km_est$surv[c(rm[cond,1]:rm[cond,2])],cex=censor[c(rm[cond,1]:rm[cond,2])],pch=3,col='black')
cond=2
km_est2=as.data.frame(cbind(time=km_est$time[rm[cond,1]:rm[cond,2]],upper=km_est$upper[rm[cond,1]:rm[cond,2]],lower=km_est$lower[rm[cond,1]:rm[cond,2]]))
with(km_est2,polygon(c(time, time[length(time):1]), c(lower, upper[length(time):1]), col = rgb(0.5, 0.5, 0.5, alpha = 0.35), border = NA))
points(km_est$time[c(rm[cond,1]:rm[cond,2])],km_est$surv[c(rm[cond,1]:rm[cond,2])],cex=censor[c(rm[cond,1]:rm[cond,2])],pch=3,col='darkgrey')
# dev.off()
### -------------------------------------------------------------------- ###
#!=========================================================================!#
#!======= Semi-parametric method: Cox proportional hazards model ======!#
#!======= Section 13.2.3 ======!#
#!=========================================================================!#
#!=========================================================================!#
#!==== Info Box 13.6 - The Cox proportional hazards (CPH) model =====!#
#!==== worked example =====!#
#!=========================================================================!#
# Cox proportional-hazards analysis of the mortality data for cod
# in excellent and poor condition (i.e. vitality) from Benoît et al. (2012)
### Estimate the predicted survival functions for the two condition classes
coxfit=coxph(Surv(hours, (1-censored)) ~ condition, data=codKM)
summary(coxfit)
## test proportional hazards assumption
cox.zph(coxfit)
# => not significant p=0.057, therefore the proportional hazards assumption is valid.
### -------------------------------------------------------------------- ###
### Plot the CPH survival functions
# windows()
# tiff("Figure 13_8.tif", width = 4, height = 4, units = "in", res = 600, pointsize = 10, compression="lzw")
coxpred1=survfit(coxfit, newdata=data.frame(condition=3))
plot(coxpred1,col="darkgrey",main="Cox prop. hazards model estimate for Excellent (black) and Poor (grey) vitality classes",cex.main=0.5,xlab="Hours", ylab="Survival function")
censor=coxpred1$n.censor
censor[censor>1]=1
points(coxpred1$time,coxpred1$surv,cex=censor,pch=3,col='darkgrey')
coxpred2=survfit(coxfit, newdata=data.frame(condition=1))
lines(coxpred2,col="black")
censor=coxpred2$n.censor
censor[censor>1]=1
points(coxpred2$time,coxpred2$surv,cex=censor,pch=3,col='black')
# dev.off()
### -------------------------------------------------------------------- ###
#!=========================================================================!#
#!======= Parametric modelling methods ======!#
#!======= Section 13.2.4 ======!#
#!=========================================================================!#
#!=========================================================================!#
#!==== Info Box 13.7 - The Weibull model - Worked example =====!#
#!=========================================================================!#
### estimate the model parameters
(sr=survreg(Surv(hours, (1-censored)) ~ condition, data=codKM,dist="weibull"))
sr$coefficients
sr$scale
## calculate AICc
AIC=-sr$loglik[2]*2 + 2*sr$df
AICc=AIC + (2*sr$df*(sr$df+1))/(sr$df.residual-1)
### use the estimates to plot the survival functions
## Weibull cumulative density function
dweibull=function(x, theta, omega){
G=1 - exp(-((theta*x)^omega))
return(G)
}
### Plot the results
xx=seq(0, 110, len = 110)
theta1=exp(-sr$coefficients[1])
theta2=exp(-(sr$coefficients[1]+sr$coefficients[2]))
omega=sr$scale
## Plot curves for each injury code.
# NOTE - this model does not fit the previously plotted KM estimates well!
# windows()
# tiff("Figure 13_10.tif", width = 4, height = 4, units = "in", res = 600, pointsize = 10, compression="lzw")
plot(xx, (1-dweibull(xx, theta1, omega)), type = "l", lwd = 2, col = "black",ylim=c(0,1),main="Weibull model estimate for Excellent (black) and Poor (grey) vitality classes",cex.main=0.5,xlab="Hours", ylab="Survival function")
lines(xx, (1-dweibull(xx, theta2, omega)), lwd = 2, col = "grey")
# dev.off()
### -------------------------------------------------------------------- ###
#!=========================================================================!#
#!==== Info Box 13.8 - Mixed distribution model - Worked example =====!#
#!=========================================================================!#
# an example of model fit for a mixed distribution model (aka Cure-rate model)
# using the optim function for a Weibul mixture model with vitality effects on theta and pi
## define the required variables
x=codKM$hours
y=codKM$censored
X=codKM$cond1 #dummy variable for vitality
## likelihood function used in the optimization
loglike=function(parms0, x, X, y){
# Parse parameter vector:
omega=exp(parms0[1])
beta=parms0[2:5]
linpred=beta[1] + beta[2]*X
theta=exp(-linpred)
# Linear predictor for mixture
mu=beta[3] + beta[4]*X
# Calculate survivor group probability from linear model:
pi=1/(1+exp(-mu))
# Calculate censored survival for each group:
G_t=exp(-(theta*x)^omega)
# Calculate density for each uncensored group:
g=(omega * theta) * ((x*theta)^(omega-1)) * G_t
# Calculate neg log-likelihood values:
ll=-(((1-y)*(log(pi)+log(g))) + (y*log(1-pi + pi*G_t)))
# Return sum of log-likelihood values:
return(sum(ll))
}
## Define initial parameter vector:
parms0=c(log(1.0), 0, 0, 0,0)
## Calculate test log-likelhood value for initial parameter vector to ensure the functions are defined
loglike(parms0, x, X, y)
## Perform minimization of neg log likelihood:
temp=optim(parms0, loglike, x = x, X = X, y = y,hessian=TRUE, control = list(fnscale = 1, trace = 3, maxit = 1500))
parms0=temp$par #parameters
parms=cbind(parms0, SE= sqrt(diag(solve(temp$hessian))) ) #parameters and standard errors
AIC=temp$value*2 + 2*nrow(parms)
AICc=AIC + (2*nrow(parms)*(nrow(parms)+1))/(length(y)-nrow(parms)-1)
### Plot results:
## 1.extract parameter estimates
xx=seq(0, 150, len = 150)
omega=exp(parms0[1])
X1=matrix(c(1,1,1,0),2,2,byrow=T)
linpred <- X1 %*% parms0[2:3]
theta=exp(-linpred)
mu <- X1 %*% parms0[4:5]
pi=1/(1+exp(-mu))
## calculate confidence intervals on pi
cov=solve(temp$hessian)
mu.se <- sqrt(diag(X1 %*% cov[4:5,4:5] %*% t(X1)))
pi.uci <- exp(mu + 1.96 *mu.se) / (1 + exp(mu + 1.96 *mu.se))
pi.lci <- exp(mu - 1.96 *mu.se) / (1 + exp(mu - 1.96 *mu.se))
## 2. plot the KM estimates to compare model fit
km_est <- survfit(Surv(hours, (1-censored)) ~ condition, data=codKM) #not that the function uses an indicator for survival rather than for censoring hence the use of (1-censored)
# windows()
# tiff("Figure 13_11.tif", width = 4, height = 4, units = "in", res = 600, pointsize = 10, compression="lzw")
plot(km_est, main="KM estimate (dash) and mixture model fit (solid) by vitality class",lty=2,cex=1,cex.main=0.5,
xlab="Hours", ylab="Survival function", col= c("black","darkgrey"))
rm=matrix(nrow=2,ncol=2)
rm[1,1]=1
rm[1,2]=as.numeric(km_est$strata[1])
rm[2,1]=rm[1,2]+1
rm[2,2]=rm[1,2]+as.numeric(km_est$strata[2])
cond=1
km_est1=as.data.frame(cbind(time=km_est$time[rm[cond,1]:rm[cond,2]],upper=km_est$upper[rm[cond,1]:rm[cond,2]],lower=km_est$lower[rm[cond,1]:rm[cond,2]]))
with(km_est1,polygon(c(time, time[length(time):1]), c(lower, upper[length(time):1]), col = rgb(0, 0, 0, alpha = 0.35), border = NA))
cond=2
km_est2=as.data.frame(cbind(time=km_est$time[rm[cond,1]:rm[cond,2]],upper=km_est$upper[rm[cond,1]:rm[cond,2]],lower=km_est$lower[rm[cond,1]:rm[cond,2]]))
with(km_est2,polygon(c(time, time[length(time):1]), c(lower, upper[length(time):1]), col = rgb(0.5, 0.5, 0.5, alpha = 0.35), border = NA))
## 3. Plot curves for each injury code:
# Weibull cumulative density function
dweibull=function(x, theta, omega){
G=1 - exp(-(theta*x)^omega)
return(G)
}
points(xx, pi[1]*(1-dweibull(xx, theta[1], omega))+1-pi[1] , type = "l", lwd = 2, col = "black")
points(xx, pi[2]*(1-dweibull(xx, theta[2], omega))+1-pi[2] , type = "l", lwd = 2, col = "darkgrey")
# dev.off()
# tiff("cure rate 2.tif", width = 4.5, height = 4.5, units = "in", res = 600, pointsize = 10, compression="lzw")
plot(xx, pi[1]*(1-dweibull(xx, theta[1], omega))+1-pi[1] , type = "l", lwd = 2,ylim=c(0,1),xlab="Time",ylab='Survival probability')
points(xx, pi[2]*(1-dweibull(xx, theta[2], omega))+1-pi[2] , type = "l", lwd = 2, col = "red")
# dev.off()
### -------------------------------------------------------------------- ###
#!=========================================================================!#
#!==== Section 13.2.4.3 Generalized model for discard mortality data =====!#
#!=========================================================================!#
# A generalized discard mortality model applied to data from Capizzano et al (2016; ICES J Mar Sci)
# Mixture-ditribution model with capture-handling mortality and natural mortality
# No effects of covariates
### Read in data
dat=read.csv("Capizzano et al 2016 cod.csv") #data from Capizzano et al (2016)
### Kaplan-Meier plot
# windows()
# tiff("Fig_all fish.tif", width = 4, height = 4, units = "in", res = 600, pointsize = 10, compression="lzw")
maxy=100
km_est <- survfit(Surv(event.time, (1-right.censor)) ~ 1, data=dat)
with(km_est,plot(time,surv,xlim=c(0,maxy),ylim=c(0.5,1),xaxs="i",yaxs="i", pch=1,lty=2,cex=1,xlab="Days", ylab="Survival probability", col= c("darkgrey")))
with(km_est,lines(time,surv,lty=1,col="darkgrey"))
with(km_est,polygon(c(time[time<=maxy], time[length(time[time<=maxy]):1]), c(lower[time<=maxy], upper[length(time[time<=maxy]):1]), col = rgb(0, 0, 0, alpha = 0.1), border = NA))
#dev.off()
### subset the required data
x=dat$event.time
y=dat$right.censor
lcensor=dat$left.censor
### Define functions for model fitting
## Weibull density function
pweibull=function(x, theta, omega){
G=exp(-(theta*x)^omega)
g=(omega * theta) * ((x*theta)^(omega-1)) * G
return(g)
}
## Weibull cumulative density function
dweibull=function(x, theta, omega){
G=1 - exp(-((theta*x)^omega))
return(G)
}
## log-likelihood function for the generalized discard mortality model
loglike=function(parms0, x, y){
# Parse parameter vector:
theta=exp(-parms0[1])
omega=exp(parms0[2])
nu=parms0[3]
mu=parms0[4]
M=exp(parms0[5])
# Calculate capture mort probability:
tau=1/(1+exp(-nu))
# Calculate survivor group probability from linear model:
pi=1/(1+exp(-mu))
# Calculate censored survival for each group:
G1 = tau*(exp(-(M*x/365)-(theta*x)^omega))
G2 = tau*exp(-M*x/365)
# Calculate density for each uncensored group:
g1 <- (((omega * theta) * ((x*theta)^(omega-1))) + (M/365)) * G1
g2 <- (M/365) * G2
# Calculate mixture density for uncensored group:
h <- (pi * g1 + (1-pi) * g2)
# Calculate mixture probability for right-censored group:
H <- (pi * G1 + (1-pi) * G2)
# Calculate mixture probability for left-censored group:
H2 <- pi *(1-G1) + (1-pi) *(1-G2)
# Calculate log-likelihood values:
ll <- -(1-y)*(1-lcensor)*log(h) - y*log(H) - lcensor*log(H2)
# Return sum of log-likelihood values:
return(sum(ll))
}
### Model fitting routine
## Define initial parameter vector:
parms0=c(0.1, 0, 0, 0,log(0.2))
## Calculate test log-likelhood value for initial parameter vector:
loglike(parms0, x, y)
## Perform minimization of negative log-likelihood:
temp=optim(parms0, loglike, x = x, y = y,hessian=TRUE, control = list(fnscale = 1, trace = 3, maxit = 1500))
parms0=temp$par
parms=cbind(parms0, SE= sqrt(diag(solve(temp$hessian))) )
AIC=temp$value*2 + 2*nrow(parms)
(AICc=AIC + (2*nrow(parms)*(nrow(parms)+1))/(length(y)-nrow(parms)-1))
## estimate survival and 95%CI
cov=solve(temp$hessian)
library(MASS)
parmrand=mvrnorm(1000,mu=parms0,cov)
## Natural mortality
(M=exp(parms0[5]))
(M_CI=quantile(exp(parmrand[,5]),c(0.025,0.5,0.975)))
## capture and handling mortality (CH), 1-tau
(CHmort=1-(1/(1+exp(-(parms0[3])))))
(CI_CHmort=quantile(1-(1/(1+exp(-(parmrand[,3])))),c(0.025,0.5,0.975)))
## conditional post-release mortality (R), tau*pi
(Rmort=(1/(1+exp(-(parms0[3]))))*(1/(1+exp(-(parms0[4])))))
(CI_Rmort=quantile((1/(1+exp(-(parmrand[,3]))))*(1/(1+exp(-(parmrand[,4])))),c(0.025,0.5,0.975)))
## total CHR mortality
(total=1-1/(1+exp(-(parms0[3])))+(1/(1+exp(-(parms0[4]))))*(1/(1+exp(-(parms0[3])))))
(CI_total=quantile(1- (1/(1+exp(-(parmrand[,3])))) +(1/(1+exp(-(parmrand[,3]))))*(1/(1+exp(-(parmrand[,4])))),c(0.025,0.5,0.975)))
### Plot model fit against the Kaplan-Meier fit
## define plotting parameters
xmax=100
xx=seq(0, xmax, len = xmax+1)
theta=exp(-parms0[1])
omega=exp(parms0[2])
tau=1/(1+exp(-(parms0[3])))
pi=1/(1+exp(-(parms0[4])))
## Plot
# windows()
# tiff("capiz.tif", width = 4.5, height = 4.5, units = "in", res = 600, pointsize = 10, compression="lzw")
with(km_est,plot(time,surv,xlim=c(0,maxy),ylim=c(0.5,1),xaxs="i",yaxs="i", pch=1,lty=2,cex=1,xlab="Days", ylab="Survival probability", col= c("darkgrey")))
with(km_est,lines(time,surv,lty=1,col="darkgrey"))
with(km_est,polygon(c(time[time<=maxy], time[length(time[time<=maxy]):1]), c(lower[time<=maxy], upper[length(time[time<=maxy]):1]), col = rgb(0, 0, 0, alpha = 0.1), border = NA))
points(xx, tau*(pi*(1-dweibull(xx, theta, omega))+1-pi)*(exp(-M*xx/365)) , type = "l", lwd = 2, col = "black")
legend(10,0.65,c("Kaplan-Meier","Model fit (max likelihood estimate)"),bt="n",cex=0.8,col=c("grey","black"),lty=c(1,1),lwd=3)
# dev.off()
### -------------------------------------------------------------------- ###
### -------------------------------------------------------------------- ###