-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMETABRIC_CV_Genes.r
83 lines (64 loc) · 3.14 KB
/
METABRIC_CV_Genes.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
library(caret)
library(glmnet)
library(survival)
library(doParallel)
no_cores <- detectCores()
print(no_cores)
cl <- makeCluster(no_cores, type="FORK")
registerDoParallel(cl)
load("./../Output/metabric_er.RData")
source("Useful_functions.R")
# Create data splits for 50 repeats 10-fold CV
testindex <- list()
for (i in 1:50) {
set.seed(2020 + i)
testindex[[i]] <- createFolds(metabric.annotV2$OS_EVENT, k = 10)
}
# 50 rep 10-fold CV
MB_Cox_cv_results <- list()
MB_Cox_cv_results <- foreach (i=1:50) %dopar% {
sink("LOG_MB_Cox_CV.txt", append=TRUE)
cat(paste("Starting iteration", i, "\n"))
foreach (k=1:10, .errorhandling='pass') %do% {
cat(paste("Fold", k, "\n"))
temp <- list()
# Define TRAINing and test sample index
test_s <- testindex[[i]][[k]]
TRAIN_s <- c(1:nrow(metabric.annotV2))[-test_s]
# Survival objects
TRAIN.surv = Surv(metabric.annotV2$OS_MONTHS[TRAIN_s], as.numeric(metabric.annotV2$OS_EVENT[TRAIN_s]))
test.surv = Surv(metabric.annotV2$OS_MONTHS[test_s], as.numeric(metabric.annotV2$OS_EVENT[test_s]))
# Make the signatures
TRAIN.cv.exp <- cv.glmnet(x=cbind(t(metabric.exp[, TRAIN_s]),
metabric.annotV2$AGE_AT_DIAGNOSIS[TRAIN_s]),
y=TRAIN.surv, family = 'cox', alpha=1, parallel = T)
n.mbcv <- names(getCoefs(TRAIN.cv.exp, "min"))
# Create Cox proportional hazards model
TRAIN.df <- data.frame("Age"=metabric.annotV2$AGE_AT_DIAGNOSIS[TRAIN_s],
t(metabric.exp[n.mbcv, TRAIN_s]),
"Time"=metabric.annotV2$OS_MONTHS[TRAIN_s],
"Event"=as.numeric(metabric.annotV2$OS_EVENT[TRAIN_s]))
test.df <- data.frame("Age"=metabric.annotV2$AGE_AT_DIAGNOSIS[test_s],
t(metabric.exp[n.mbcv, test_s]),
"Time"=metabric.annotV2$OS_MONTHS[test_s],
"Event"=as.numeric(metabric.annotV2$OS_EVENT[test_s]))
# Model TRAINing datset with TRAINing gene signature
TRAIN.model <- coxph(Surv(Time, Event) ~ ., data=TRAIN.df)
# Model test datset with TRAINing gene signature
test.model <- coxph(Surv(Time, Event) ~ ., data=test.df)
# Predict on test datset
test.actual <- predict(coxph(Surv(Time, Event) ~ ., data=test.df), type='risk')
test.pred <- predict(TRAIN.model, newdata = test.df, type='risk')
plot(log(test.actual), log(test.pred))
cor(test.actual, test.pred, method='spearman')
# Export objects to save
temp[[1]] <- n.mbcv # geneset that can be used to calculate ssGSEA scores and get consensus geneset
temp[[2]] <- TRAIN.model # the Cox PH model for TRAINing data
temp[[3]] <- test.model # the Cox PH model for test data with geneset identified from TRAINing split
temp[[4]] <- test.actual # actual risk for test data
temp[[5]] <- test.pred # risk for test data predicted using TRAINing model with test age and ssGSEA score
return(temp)
}
}
sink()
save(MB_Cox_cv_results, file="./../Output/MB_Cox_cv_results.RData")