forked from ruthkeogh/causal_sim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_TRUE_VALUES_1.R
150 lines (118 loc) · 6.83 KB
/
analysis_TRUE_VALUES_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#------------------------------------------------------
#------------------------------------------------------
# analysis_TRUE_VALUES_1.R
# - Obtains (estimated) true values of the cumulative coefficients of the correctly-specified MSM,
# - Obtains (estimated) true values of survival probabilities under the treatment regimes 'always treated' and 'never treated',
# using the large trial data generated in dat_sim_TRUE_VALUES_1.R.
# Created by Ruth Keogh
#------------------------------------------------------
#------------------------------------------------------
#----
#set seed
set.seed(17422)
#--------------------------
#times at which we obtain estimates of cumulative coefficients and survival probabilities
#--------------------------
t.hor=seq(0,5,0.01)
#--------------------------
#analysis of n.sim large trials
#--------------------------
n.sim.true=100
#----
#store results
coef_0_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
coef_A_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
coef_Alag1_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
coef_Alag2_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
coef_Alag3_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
coef_Alag4_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
surv1_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
surv0_true=matrix(nrow=n.sim.true,ncol=length(t.hor))
#----
#start of simulation loop
for(i in 1:n.sim.true){
print(i)
#-----
#simulate data using dat_sim_TRUE_VALUES_1
source("dat_sim_TRUE_VALUES_1.R")
#-----
#Fit the correctly specified MSM
#No inverse probability weights are needed because there is no time-dependent confounding here
#Note that the coefficient for Alag1 is 0 for t<=1; the coefficient for Alag2 is 0 for t<=2, etc.
#The aalen function doesn't like this, and if we try and fit a single model for 0<t<=5 it only gives estimates for 4<t<=5, where all coefficients are non-zero
#Hence we fit the model separately in a series of time periods: 0<t<=1, 1<t<=2,..., 4<t<=5
ah.p0=aalen(Surv(time,time.stop,event)~A,data=dat.long[dat.long$time==0,],n.sim=0)
ah.p1=aalen(Surv(time,time.stop,event)~A+Alag1,data=dat.long[dat.long$time==1,],n.sim=0)
ah.p2=aalen(Surv(time,time.stop,event)~A+Alag1+Alag2,data=dat.long[dat.long$time==2,],n.sim=0)
ah.p3=aalen(Surv(time,time.stop,event)~A+Alag1+Alag2+Alag3,data=dat.long[dat.long$time==3,],n.sim=0)
ah.p4=aalen(Surv(time,time.stop,event)~A+Alag1+Alag2+Alag3+Alag4,data=dat.long[dat.long$time==4,],n.sim=0)
#-----
#Create a step function for each estimated cumulative coefficient, enabling us to obtain the estimated cumulative coefficient at any time
maxrow.p0=dim(ah.p0$cum)[1]
maxrow.p1=dim(ah.p1$cum)[1]
maxrow.p2=dim(ah.p2$cum)[1]
maxrow.p3=dim(ah.p3$cum)[1]
maxrow.p4=dim(ah.p4$cum)[1]
ah.stepfun.0=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,ah.p0$cum[,2],
ah.p0$cum[maxrow.p0,2]+ah.p1$cum[-1,2],
ah.p0$cum[maxrow.p0,2]+ah.p1$cum[maxrow.p1,2]+ah.p2$cum[-1,2],
ah.p0$cum[maxrow.p0,2]+ah.p1$cum[maxrow.p1,2]+ah.p2$cum[maxrow.p2,2]+ah.p3$cum[-1,2],
ah.p0$cum[maxrow.p0,2]+ah.p1$cum[maxrow.p1,2]+ah.p2$cum[maxrow.p2,2]+ah.p3$cum[maxrow.p3,2]+ah.p4$cum[-1,2]))
ah.stepfun.A=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,ah.p0$cum[,3],
ah.p0$cum[maxrow.p0,3]+ah.p1$cum[-1,3],
ah.p0$cum[maxrow.p0,3]+ah.p1$cum[maxrow.p1,3]+ah.p2$cum[-1,3],
ah.p0$cum[maxrow.p0,3]+ah.p1$cum[maxrow.p1,3]+ah.p2$cum[maxrow.p2,3]+ah.p3$cum[-1,3],
ah.p0$cum[maxrow.p0,3]+ah.p1$cum[maxrow.p1,3]+ah.p2$cum[maxrow.p2,3]+ah.p3$cum[maxrow.p3,3]+ah.p4$cum[-1,3]))
ah.stepfun.Alag1=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,rep(0,dim(ah.p0$cum)[1]),
ah.p1$cum[-1,4],
ah.p1$cum[maxrow.p1,4]+ah.p2$cum[-1,4],
ah.p1$cum[maxrow.p1,4]+ah.p2$cum[maxrow.p2,4]+ah.p3$cum[-1,4],
ah.p1$cum[maxrow.p1,4]+ah.p2$cum[maxrow.p2,4]+ah.p3$cum[maxrow.p3,4]+ah.p4$cum[-1,4]))
ah.stepfun.Alag2=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,rep(0,dim(ah.p0$cum)[1]),
rep(0,dim(ah.p1$cum)[1]-1),
ah.p2$cum[-1,5],
ah.p2$cum[maxrow.p2,5]+ah.p3$cum[-1,5],
ah.p2$cum[maxrow.p2,5]+ah.p3$cum[maxrow.p3,5]+ah.p4$cum[-1,5]))
ah.stepfun.Alag3=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,rep(0,dim(ah.p0$cum)[1]),
rep(0,dim(ah.p1$cum)[1]-1),
rep(0,dim(ah.p2$cum)[1]-1),
ah.p3$cum[-1,6],
ah.p3$cum[maxrow.p3,6]+ah.p4$cum[-1,6]))
ah.stepfun.Alag4=stepfun(c(ah.p0$cum[,1], ah.p1$cum[-1,1],ah.p2$cum[-1,1],ah.p3$cum[-1,1],ah.p4$cum[-1,1]),
c(0,rep(0,dim(ah.p0$cum)[1]),
rep(0,dim(ah.p1$cum)[1]-1),
rep(0,dim(ah.p2$cum)[1]-1),
rep(0,dim(ah.p3$cum)[1]-1),
ah.p4$cum[-1,7]))
#----
#estimated cumulative coefficients from the MSM at times t.hor
coef_0_true[i,]=ah.stepfun.0(t.hor)
coef_A_true[i,]=ah.stepfun.A(t.hor)
coef_Alag1_true[i,]=ah.stepfun.Alag1(t.hor)
coef_Alag2_true[i,]=ah.stepfun.Alag2(t.hor)
coef_Alag3_true[i,]=ah.stepfun.Alag3(t.hor)
coef_Alag4_true[i,]=ah.stepfun.Alag4(t.hor)
#----
#estimated survival probabilities at times t.hor in the 'always treated' (surv1) and in the 'never treated' (surv0)
surv1_true[i,]=exp(-(ah.stepfun.0(t.hor)+ah.stepfun.A(t.hor)+ah.stepfun.Alag1(t.hor)+ah.stepfun.Alag2(t.hor)+ah.stepfun.Alag3(t.hor)+ah.stepfun.Alag4(t.hor)))
surv0_true[i,]=exp(-(ah.stepfun.0(t.hor)))
}
#end of simulation loop
#----
#--------------------------
#True values of cumulative coefficients and survival probabilities
#Taken to be the average of the n.sim.true estimates from the large trials
#--------------------------
coef_0_true=colMeans(coef_0_true)
coef_A_true=colMeans(coef_A_true)
coef_Alag1_true=colMeans(coef_Alag1_true)
coef_Alag2_true=colMeans(coef_Alag2_true)
coef_Alag3_true=colMeans(coef_Alag3_true)
coef_Alag4_true=colMeans(coef_Alag4_true)
surv1_true=colMeans(surv1_true)
surv0_true=colMeans(surv0_true)