-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_targets.R
138 lines (115 loc) · 2.91 KB
/
_targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Load required packages
library(targets)
library(tarchetypes)
library(future)
library(tidyverse)
library(here)
here::i_am("_targets.R")
plan(multisession)
# File containing commands referenced in this pipeline
source(here("R", "functions.R"))
# --- Individual Targets
# 1
#
# Name:
# raw_data
#
# Description:
# Data file provided by Roee with whistles features. Annotated
# with presence of or absence of URN in the "label" column.
# See Diamant et al. (2024) for details.
T1 <- tar_target(raw_data, here("data", "study_data.csv"), format = "file")
# 2
#
# Name:
# study_data
#
# Description:
# Clean data that was read in properly and cleaned
T2 <- tar_target(study_data, read_study_data(raw_data), format = "rds")
# --- Mapped Targets
# Values:
# List of dates supplied for purpose of subsetting the data.
# This is necessary because a different model will be trained to be tested
# on the data from an individual day. See Diamant et al. (2024).
T_MAPPED <- tar_map(
values = list(date = c("2021_06_24", "2021_06_25", "2021_06_26", "2021_06_27", "2021_06_28",
"2021_06_29", "2021_06_30", "2021_07_01", "2021_07_02", "2021_07_03",
"2021_07_04", "2021_07_05", "2021_07_06", "2021_07_07", "2021_07_08",
"2021_07_09", "2021_07_10", "2021_07_11", "2021_07_12", "2021_07_13",
"2021_07_14", "2021_07_15"
)
),
# 1
#
# Name:
# training_data_XXX
# xxx denotes date
#
# Description:
# For the date specified, will subset study_data to exclude the date.
tar_target(
name = training_data,
command = subset_and_balance_training_data(study_data, date),
deployment = "worker"
),
# 2
#
# Name:
# testing_data_XXX
# xxx denotes date
# Description:
# For the date specified, will subset study_data to include only the date.
tar_target(
name = testing_data,
command = subset_testing_data(study_data, date),
deployment = "worker"
),
# 3
#
# Name:
# svm_XXX
# xxx denotes date
#
# Description:
# An SVM model trained on on the data exluding the date denoted
tar_target(
name = svm,
command = do_model(training_data),
deployment = "worker"
),
# 4
#
# Name:
# svm_predictions_XXX
# xxx denotes date
#
# Description:
# Results of model predictions for data from the date denoted.
tar_target(
name = svm_predictions,
command = do_predict(svm, testing_data),
deployment = "worker"
)
)
# --- COMBINED TARGETS
# Values:
# Model predictions for models trained for all dates
# Name:
# combined_predictions
#
# Description:
# Pertinent data concerning the predictions, available for all models
# included in the study
T_COMBINED <- tar_combine(
combined_predictions,
T_MAPPED[["svm_predictions"]],
command = aggregate_predictions(!!!.x)
)
# --- EXECUTE PIPELINE
list(
T1,
T2,
T_MAPPED,
T_COMBINED
)