forked from JDACS4C-IMPROVE/GraphDRP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraphdrp_infer_improve.py
161 lines (130 loc) · 5.34 KB
/
graphdrp_infer_improve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
""" Inference with GraphDRP for drug response prediction.
Required outputs
----------------
All the outputs from this infer script are saved in params["infer_outdir"].
1. Predictions on test data.
Raw model predictions calcualted using the trained model on test data. The
predictions are saved in test_y_data_predicted.csv
2. Prediction performance scores on test data.
The performance scores are calculated using the raw model predictions and
the true values for performance metrics specified in the metrics_list. The
scores are saved as json in test_scores.json
"""
import sys
from pathlib import Path
from typing import Dict
import pandas as pd
# [Req] IMPROVE/CANDLE imports
from improve import framework as frm
# Model-specific imports
from model_utils.torch_utils import (
build_GraphDRP_dataloader,
determine_device,
load_GraphDRP,
predicting,
)
# [Req] Imports from preprocess and train scripts
from graphdrp_preprocess_improve import preprocess_params
from graphdrp_train_improve import metrics_list, train_params
filepath = Path(__file__).resolve().parent # [Req]
# ---------------------
# [Req] Parameter lists
# ---------------------
# Two parameter lists are required:
# 1. app_infer_params
# 2. model_infer_params
#
# The values for the parameters in both lists should be specified in a
# parameter file that is passed as default_model arg in
# frm.initialize_parameters().
# 1. App-specific params (App: monotherapy drug response prediction)
# Currently, there are no app-specific params in this script.
app_infer_params = []
# 2. Model-specific params (Model: GraphDRP)
# All params in model_infer_params are optional.
# If no params are required by the model, then it should be an empty list.
model_infer_params = []
# [Req] Combine the two lists (the combined parameter list will be passed to
# frm.initialize_parameters() in the main().
infer_params = app_infer_params + model_infer_params
# ---------------------
# [Req]
def run(params):
""" Run model inference.
Args:
params (dict): dict of CANDLE/IMPROVE parameters and parsed values.
Returns:
dict: prediction performance scores computed on test data according
to the metrics_list.
"""
# import ipdb; ipdb.set_trace()
# ------------------------------------------------------
# [Req] Create output dir
# ------------------------------------------------------
frm.create_outdir(outdir=params["infer_outdir"])
# ------------------------------------------------------
# [Req] Create data names for test set
# ------------------------------------------------------
test_data_fname = frm.build_ml_data_name(params, stage="test")
# GraphDRP -- remove data_format
test_data_fname = test_data_fname.split(params["data_format"])[0]
# ------------------------------------------------------
# Prepare dataloaders to load model input data (ML data)
# ------------------------------------------------------
print("\nTest data:")
print(f"test_ml_data_dir: {params['test_ml_data_dir']}")
print(f"test_batch: {params['test_batch']}")
test_loader = build_GraphDRP_dataloader(params["test_ml_data_dir"],
test_data_fname,
params["test_batch"],
shuffle=False)
# ------------------------------------------------------
# CUDA/CPU device
# ------------------------------------------------------
# Determine CUDA/CPU device and configure CUDA device if available
device = determine_device(params["cuda_name"])
# ------------------------------------------------------
# Load best model and compute predictions
# ------------------------------------------------------
# Load the best saved model (as determined based on val data)
modelpath = frm.build_model_path(params, model_dir=params["model_dir"]) # [Req]
model = load_GraphDRP(params, modelpath, device)
model.eval()
# Compute predictions
test_true, test_pred = predicting(model, device, data_loader=test_loader)
# ------------------------------------------------------
# [Req] Save raw predictions in dataframe
# ------------------------------------------------------
frm.store_predictions_df(
params,
y_true=test_true, y_pred=test_pred, stage="test",
outdir=params["infer_outdir"]
)
# ------------------------------------------------------
# [Req] Compute performance scores
# ------------------------------------------------------
test_scores = frm.compute_performace_scores(
params,
y_true=test_true, y_pred=test_pred, stage="test",
outdir=params["infer_outdir"], metrics=metrics_list
)
return test_scores
# [Req]
def main(args):
# [Req]
additional_definitions = preprocess_params + train_params + infer_params
params = frm.initialize_parameters(
filepath,
# default_model="graphdrp_default_model.txt",
# default_model="graphdrp_params.txt",
# default_model="params_ws.txt",
default_model="params_cs.txt",
additional_definitions=additional_definitions,
# required=req_infer_args,
required=None,
)
test_scores = run(params)
print("\nFinished model inference.")
# [Req]
if __name__ == "__main__":
main(sys.argv[1:])