-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.lock
216 lines (216 loc) · 8.19 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
schema: '2.0'
stages:
ogt_protein_classifier_train_evaluate:
cmd: torchrun --nproc_per_node gpu pipeline/ogt_protein_classifier_train_evaluate.py
deps:
- path: ./data/ogt_protein_classifier/data
md5: 2289d72722f92f11cbda390392be687b.dir
size: 1261768131
nfiles: 527
- path: ./l2tml_utils/model_utils.py
md5: 13c9597c906c8f2291cc7902be3c622d
size: 4167
- path: ./pipeline/ogt_protein_classifier_train_evaluate.py
md5: 448657f641b9433869417ae84d156631
size: 14934
params:
params.yaml:
ogt_protein_classifier_train_evaluate.batch_size: 32
ogt_protein_classifier_train_evaluate.dev_subsample_data:
ogt_protein_classifier_train_evaluate.dropout: 0.0
ogt_protein_classifier_train_evaluate.epochs: 2
ogt_protein_classifier_train_evaluate.fp16: true
ogt_protein_classifier_train_evaluate.grad_accum: 25
ogt_protein_classifier_train_evaluate.grad_checkpointing: true
ogt_protein_classifier_train_evaluate.lr: 5e-05
ogt_protein_classifier_train_evaluate.lr_scheduler: linear
ogt_protein_classifier_train_evaluate.model: protbert
ogt_protein_classifier_train_evaluate.n_save_per_epoch: 10
ogt_protein_classifier_train_evaluate.protocol: finetune
ogt_protein_classifier_train_evaluate.push: true
outs:
- path: ./data/ogt_protein_classifier/dvclive/metrics.json
md5: 029addca8a847dc05a6e8fed55a0ef22
size: 359
- path: ./data/ogt_protein_classifier/dvclive/plots
md5: c32ace26b29dbc810c8a805c9d5af8e7.dir
size: 6057
nfiles: 10
- path: ./data/ogt_protein_classifier/dvclive/report.md
md5: d747d85b5823fbd7e75b83f110d79588
size: 1056
- path: ./data/ogt_protein_classifier/model
md5: 2b19254cc46b56234ba333a1b15eaf56.dir
size: 102481821985
nfiles: 269
- path: ./data/ogt_protein_classifier/model_metrics.yaml
md5: 559508ec6df968619c9aa8025b33fba9
size: 16375
ogt_protein_classifier_data_prep:
cmd: python pipeline/ogt_protein_classifier_data_prep.py
deps:
- path: ./data/database
md5: 5192612c6c18eeadaaf37a1ee14f307f
size: 9478352896
- path: ./l2tml_utils/data_utils.py
md5: 3c1a6d4ec1bbcd6dab1d5cd4d42960f4
size: 9550
- path: ./l2tml_utils/dataset_deduplication.py
md5: 99f75aae0fd91b47a18988dd609b3cc6
size: 12357
- path: ./pipeline/ogt_protein_classifier_data_prep.py
md5: 090e7ebe6774ed9aaa08702c8fb16265
size: 9695
params:
params.yaml:
ogt_protein_classifier_data_prep.data_batch_size: 200
ogt_protein_classifier_data_prep.deduplication:
- do: true
- jaccard: 0.6
- kgram: 3
- num_perm: 100
ogt_protein_classifier_data_prep.dev_keep_columns: true
ogt_protein_classifier_data_prep.dev_sample_init_data: false
ogt_protein_classifier_data_prep.max_protein_len: 250
ogt_protein_classifier_data_prep.max_upsampling: 0.0
ogt_protein_classifier_data_prep.min_balance: 0.5
ogt_protein_classifier_data_prep.ogt_window:
- 30.0
- 60.0
ogt_protein_classifier_data_prep.split_type: taxid
ogt_protein_classifier_data_prep.train_test_frac: 0.1
outs:
- path: ./data/ogt_protein_classifier/data
md5: 3a1c55014e1892b91b8cafd9c33be4ae.dir
size: 63092843
nfiles: 7
- path: ./data/ogt_protein_classifier/data_metrics.yaml
md5: 0f21bf03527551f756dcf0acb1ce979f
size: 172
ogt_protein_regressor_data_prep:
cmd: python pipeline/ogt_protein_regressor_data_prep.py
deps:
- path: ./data/database
md5: 5192612c6c18eeadaaf37a1ee14f307f
size: 9478352896
- path: ./l2tml_utils/data_utils.py
md5: d182589bf60a00d614e53d340e9f22fa
size: 9633
- path: ./l2tml_utils/dataset_deduplication.py
md5: 99f75aae0fd91b47a18988dd609b3cc6
size: 12357
- path: ./pipeline/ogt_protein_regressor_data_prep.py
md5: f84b38ab675ccbf3cbd503825e7667ab
size: 8445
params:
params.yaml:
ogt_protein_regressor_data_prep.balancing:
- do: true
- max_bin_size: auto
- num_bins: 20
- min_total_data_kept: 150000
ogt_protein_regressor_data_prep.data_batch_size: 200
ogt_protein_regressor_data_prep.deduplication:
- do: true
- jaccard: 0.6
- kgram: 3
- num_perm: 100
ogt_protein_regressor_data_prep.dev_keep_columns: true
ogt_protein_regressor_data_prep.dev_sample_init_data: false
ogt_protein_regressor_data_prep.max_protein_len: 250
ogt_protein_regressor_data_prep.min_protein_len: 50
ogt_protein_regressor_data_prep.split_type: taxid
ogt_protein_regressor_data_prep.train_test_frac: 0.1
outs:
- path: ./data/ogt_protein_regressor/data
md5: a2e77b898835ae8211405f0ce505cc4f.dir
size: 32563905
nfiles: 8
- path: ./data/ogt_protein_regressor/data_metrics.yaml
md5: f1accf8b74e9559eece74cd8076fa55b
size: 208
ogt_protein_regressor_train_evaluate:
cmd: torchrun --nproc_per_node gpu pipeline/ogt_protein_regressor_train_evaluate.py
deps:
- path: ./data/ogt_protein_regressor/data
md5: 3f03cf1571347b9a438c1280eb51977a.dir
size: 683110105
nfiles: 219
- path: ./l2tml_utils/model_utils.py
md5: 13c9597c906c8f2291cc7902be3c622d
size: 4167
- path: ./pipeline/ogt_protein_regressor_train_evaluate.py
md5: 5202b17e52f610981e62bbb6cc4d4d3a
size: 15195
params:
params.yaml:
ogt_protein_regressor_train_evaluate.batch_size: 32
ogt_protein_regressor_train_evaluate.dev_subsample_data: false
ogt_protein_regressor_train_evaluate.dropout: 0.0
ogt_protein_regressor_train_evaluate.epochs: 1
ogt_protein_regressor_train_evaluate.fp16: true
ogt_protein_regressor_train_evaluate.grad_accum: 25
ogt_protein_regressor_train_evaluate.grad_checkpointing: true
ogt_protein_regressor_train_evaluate.lr: 1e-05
ogt_protein_regressor_train_evaluate.lr_scheduler: linear
ogt_protein_regressor_train_evaluate.model: protbert
ogt_protein_regressor_train_evaluate.n_save_per_epoch: 10
ogt_protein_regressor_train_evaluate.protocol: finetune
outs:
- path: ./data/ogt_protein_regressor/dvclive/metrics.json
md5: a8720ee6f08cefa244a3acc3cfec0ebc
size: 364
- path: ./data/ogt_protein_regressor/dvclive/plots
md5: fd3d3a2fa1001e46338527cbe5b7ef8a.dir
size: 3143
nfiles: 11
- path: ./data/ogt_protein_regressor/dvclive/report.md
md5: 28021e7f51b7013bf37c643841f270bd
size: 1116
- path: ./data/ogt_protein_regressor/model
md5: 4d6cfc6266e573cb295d8e33e67d04da.dir
size: 52080836240
nfiles: 138
- path: ./data/ogt_protein_regressor/model_metrics.yaml
md5: f42805dcd37a84e4ea3af6371dae25d6
size: 4955
ogt_protein_regressor_predict:
cmd: python ./pipeline/ogt_protein_regressor_predict.py
deps:
- path: ./data/ogt_protein_regressor/data
md5: 2dca9bd770480a10a661cd1cdb4082f4.dir
size: 765053329
nfiles: 225
- path: ./data/ogt_protein_regressor/model
md5: 3ec1a2961551984d87cb04c8c5cf4305.dir
size: 52080836896
nfiles: 138
- path: ./pipeline/ogt_protein_regressor_predict.py
hash: md5
md5: 936aaf980e6f95a544f8156470629c31
size: 9446
outs:
- path: ./data/ogt_protein_regressor/error_bins.png
hash: md5
md5: 48fe1c0e4cfe41deab768a6691f98df3
size: 110953
- path: ./data/ogt_protein_regressor/error_bounds.png
hash: md5
md5: 2574722657e566b8ef505ffe4ac0edda
size: 128690
- path: ./data/ogt_protein_regressor/error_confidence.png
hash: md5
md5: 9612eceba55f0f19247ae9ce0a2318ad
size: 121694
- path: ./data/ogt_protein_regressor/error_dist.png
hash: md5
md5: 2126de576972480a6dbcdab3dbd5d99a
size: 70150
- path: ./data/ogt_protein_regressor/prediction_dist.png
hash: md5
md5: b804deb59b2eb7f75acfd173da3819fb
size: 129336
- path: ./data/ogt_protein_regressor/prediction_scatter.png
hash: md5
md5: 1595522dff0f27b4e7f34385ccef551d
size: 400532