-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path08_ranger_scores_scw.R
74 lines (62 loc) · 2.18 KB
/
08_ranger_scores_scw.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library("sbcdata")
library("rusranger")
## don't exclude by time
ukl <- exclude_entries(
subset(sbcdata, Center == "Leipzig"), time = c(-Inf, Inf)
)
ukl <- ukl[!ukl$Excluded,]
umg <- exclude_entries(
subset(sbcdata, Center == "Greifswald"), time = c(-Inf, Inf)
)
umg <- umg[!umg$Excluded,]
mimic <- exclude_entries(
import_mimic("../../inst/intdata/mimic-iv-1.0/"), time = c(-Inf, Inf)
)
mimic <- mimic[!mimic$Excluded,]
ukl$Sex <- as.integer(ukl$Sex == "M")
umg$Sex <- as.integer(umg$Sex == "M")
mimic$Sex <- as.integer(mimic$Sex == "M")
ukl$Diagnosis <- as.integer(ukl$Diagnosis == "Sepsis")
umg$Diagnosis <- as.integer(umg$Diagnosis == "Sepsis")
mimic$Diagnosis <- as.integer(mimic$Diagnosis == "Sepsis")
set.seed(20220419)
train <- subset(ukl, Set == "Training")
validation <- list(
UKL = subset(ukl, Set == "Validation"), UMG = umg, MIMIC = mimic
)
xvar <- c("Age", "Sex", "PLT", "RBC", "WBC", "HGB", "MCV")
mtry <- 3
num.trees <- 1000
replace <- TRUE # doing bootstrap, shorter runtime, b/c much lower samplesize/samplefraction
scw <- 10L
caseweights <- function(y, y0, scw = 1) {
isControl <- !y
isSepsis <- as.logical(y)
isSepsisControl <- as.logical(y0) & !as.logical(y)
## control = 1
w <- rep_len(1, length(y))
## sepsis cases that are now control
w[isSepsisControl] <- scw
## sepsis cases
w[isSepsis] <- (sum(isControl) + sum(isSepsisControl) * scw) / sum(isSepsis)
w
}
cwranger <- function(d, scw = 1) {
ranger(
x = as.data.frame(d[, xvar, with = FALSE]), y = d$Diagnosis,
probability = TRUE, min.node.size = 10,
mtry = mtry, num.trees = num.trees,
case.weights = caseweights(d$Diagnosis, d$Diagnosis0, scw),
sample.fraction = rusranger:::.samplefraction(d$Diagnosis),
replace = replace
)
}
train$Diagnosis0 <- train$Diagnosis
train$Diagnosis <- as.integer(
train$Diagnosis & sbcdata:::.is_time_range(train, range = c(12, 0) * 3600)
)
rcw <- cwranger(train, scw = scw)
validation <- do.call(rbind, validation)
validation$RangerProbability <-
predict(rcw, as.data.frame(validation[, xvar, with = FALSE]))$prediction[, 2L]
saveRDS(validation, file = paste0("ValidationScw", scw, ".RDS"))