-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdvc.lock
548 lines (548 loc) · 16.4 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
schema: '2.0'
stages:
prepare_data:
cmd: python scripts/prepare_data.py
deps:
- path: ./data/database.ddb
hash: md5
md5: b51482d0b753298b2bd522729dc69767
size: 14074523648
- path: ./nomelt/deduplication.py
hash: md5
md5: 43a77592878fbad6b852803f56c01114
size: 13055
- path: ./scripts/prepare_data.py
hash: md5
md5: 202a282af1cec60d96373bde1d0376e9
size: 11367
params:
params.yaml:
data.additional_filters:
- abs({seq_len_diff})/{meso_seq_len} < 0.1
data.dev_sample_data: false
data.max_meso_temp: 40.0
data.min_align_cov: 0.95
data.min_temp_diff: 0.0
data.min_thermo_temp: 60.0
data.mmseq_params:
coverage: 0.95
min-seq-id: 0.5
cluster-mode: 1
similarity-type: 2
sensitivity: 7
max-seqs: 1000
cluster-steps: 5
cluster-reassign: 1
e: 0.001
data.test_size: 0.1
outs:
- path: ./data/data_metrics.yaml
hash: md5
md5: 95ccb4a2c65395f4684610fd8e91825b
size: 159
- path: ./data/dataset/
hash: md5
md5: 4d54a3ebbad1c15ff712fe9ba0b60ea3.dir
size: 20196394186
nfiles: 32
blast_test_train:
cmd: python scripts/blast_test_train.py
deps:
- path: ./data/dataset/
hash: md5
md5: 34196e49b8542702b565b17f4da1375a.dir
size: 2396663610
nfiles: 15
- path: ./scripts/blast_test_train.py
hash: md5
md5: 9da7628a579040118532b401a4d01037
size: 3610
outs:
- path: ./data/plots/test_train_blast_hist.png
hash: md5
md5: 2d8cbaa2f8c63af8e1dfca67f8d6243c
size: 124685
- path: ./data/test_train_blast_metrics.json
hash: md5
md5: a948a2fe8397f44fbf23d213df8ce042
size: 601
train:
cmd: accelerate launch --config_file ./.config/accelerate/default_config.yaml
scripts/train.py
deps:
- path: ./.config/accelerate/default_config.yaml
hash: md5
md5: 944c78dde32aae09fb317726c143df3a
size: 524
- path: ./data/dataset/
hash: md5
md5: 56fcfb97dc44221059f2f88d6a0abfff.dir
size: 20176618266
nfiles: 24
- path: ./scripts/train.py
hash: md5
md5: 7d2f42f29c598416f4142efe208a63f6
size: 16297
params:
params.yaml:
model.generation_max_length: 250
model.model_hyperparams:
dropout_rate: 0.1
relative_attention_max_distance: 250
model.pretrained_model: Rostlab/prot_t5_xl_uniref50
model.task: translation
training.auto_find_batch_size: false
training.bf16: true
training.dev_sample_data: false
training.early_stopping: true
training.early_stopping_patience: 4
training.early_stopping_threshold: 0.01
training.epochs: 0.5
training.eval_single_example_per_cluster: true
training.evals_per_epoch: 500
training.evals_per_save: 3
training.fp16: false
training.freeze_early_layers: 0.2
training.gradient_accumulation: 8
training.gradient_checkpointing: true
training.label_smoothing_factor: 0.001
training.learning_rate: 0.0001
training.lr_scheduler_type: linear
training.optim: adamw_hf
training.optim_args:
training.per_device_batch_size: 20
training.reweight: false
training.warmup_ratio: 0.1
outs:
- path: ./data/nomelt-model/live/metrics.json
hash: md5
md5: cac471045f999bf8b0b97f739e164e0b
size: 390
- path: ./data/nomelt-model/live/plots/
hash: md5
md5: 86f3e41699e6e48c8223bb0874ecdf84.dir
size: 34687
nfiles: 11
- path: ./data/nomelt-model/live/report.md
hash: md5
md5: 269db379957fb781d64615d45221c358
size: 1281
- path: ./data/nomelt-model/live/static/
hash: md5
md5: 05ded1948dad0a7d8f648b2859eb7d6d.dir
size: 268742
nfiles: 11
- path: ./data/nomelt-model/model/
hash: md5
md5: 6b5612c36951f10b4fa70212c7fc39b7.dir
size: 283788179784
nfiles: 272
make_predictions:
cmd: accelerate launch --config_file ./.config/accelerate/data_parallel_config.yaml
scripts/make_predictions.py
deps:
- path: ./data/dataset/
hash: md5
md5: 4d54a3ebbad1c15ff712fe9ba0b60ea3.dir
size: 20196394186
nfiles: 32
- path: ./data/nomelt-model/model/
hash: md5
md5: 6b5612c36951f10b4fa70212c7fc39b7.dir
size: 283788179784
nfiles: 272
- path: ./scripts/make_predictions.py
hash: md5
md5: 200a2ca596f6a0a810b14bea3a78b05f
size: 8200
params:
params.yaml:
model.generation_max_length: 250
model.generation_num_beams: 10
outs:
- path: ./data/nomelt-model/predictions.tsv
hash: md5
md5: 720c39c021fbfe4a22a4976086e1fbf8
size: 554951
score_predictions:
cmd: python scripts/score_predictions.py
deps:
- path: ./data/nomelt-model/predictions.tsv
hash: md5
md5: 720c39c021fbfe4a22a4976086e1fbf8
size: 554951
- path: ./scripts/score_predictions.py
hash: md5
md5: ab43e910ba7db780115e01f82834c1a8
size: 3853
outs:
- path: ./data/nomelt-model/test_scores.json
hash: md5
md5: 7d309477d2545f2ba64dc5fbafae6af4
size: 292
compare_sequence_alignment:
cmd: python scripts/compare_sequence_alignment.py
deps:
- path: ./data/nomelt-model/predictions.tsv
hash: md5
md5: 720c39c021fbfe4a22a4976086e1fbf8
size: 554951
- path: ./scripts/compare_sequence_alignment.py
hash: md5
md5: 78aa3fd9a637f6b55a83c940e1bca96f
size: 2851
outs:
- path: ./data/nomelt-model/test_predictions_aligned_results.json
hash: md5
md5: f06f2ecf2cee2898f87a3341152121d1
size: 1896723
natural_diversity_entropy:
cmd: python scripts/proof_of_principle/natural_diversity_entropy.py
deps:
- path: ./data/dataset
hash: md5
md5: 2c8a92a27cee7e0585d154ba1960f7f5.dir
size: 20196520698
nfiles: 33
- path: ./scripts/proof_of_principle/natural_diversity_entropy.py
hash: md5
md5: c91a2e6c91b0c330a6ee027f065b7e66
size: 5063
outs:
- path: ./data/proof_of_principle/natural_diversity_entropy.json
hash: md5
md5: a32d29514079094dc4d1f6b8325739f2
size: 37
compute_test_embeddings:
cmd: python scripts/compute_test_embeddings.py
deps:
- path: ./data/dataset/
hash: md5
md5: 4d54a3ebbad1c15ff712fe9ba0b60ea3.dir
size: 20196394186
nfiles: 32
- path: ./data/nomelt-model/model/
hash: md5
md5: 6b5612c36951f10b4fa70212c7fc39b7.dir
size: 283788179784
nfiles: 272
- path: ./scripts/compute_test_embeddings.py
hash: md5
md5: 58ecaf32c14b6f218b22b7cbf7a4945f
size: 6405
outs:
- path: ./data/nomelt-model/test_loss.json
hash: md5
md5: c19777649cab56e690fe5a89c1068929
size: 33
compare_structure:
cmd: python scripts/compare_structure.py
deps:
- path: ./data/nomelt-model/predictions.tsv
hash: md5
md5: 720c39c021fbfe4a22a4976086e1fbf8
size: 554951
- path: ./scripts/compare_structure.py
hash: md5
md5: 249ef9e5716a9b42c3f137975c9fabfa
size: 10311
outs:
- path: ./data/nomelt-model/structure_metrics.json
hash: md5
md5: 9cb68fa0b923698b9cdd92d905c0708d
size: 106
translate_enh1:
cmd: python scripts/translate_enh1.py
deps:
- path: ./data/nomelt-model-full/model/
hash: md5
md5: 475cac1353e35b4ad63dc8b253151906.dir
size: 40406837410
nfiles: 41
- path: ./scripts/translate_enh1.py
hash: md5
md5: d397e2e9f337263630b8e60d3941db76
size: 1048
params:
params.yaml:
model.generation_max_length: 250
model.generation_num_beams: 10
outs:
- path: ./data/enh/translate_enh1.json
hash: md5
md5: 66bfc9352d449525ee99d06707aa3a9d
size: 172
estimate_trans_energy_enh1:
cmd: python scripts/estimate_trans_energy_enh1.py
deps:
- path: ./.config/af_singularity_config.yaml
hash: md5
md5: 6ff0e462bb3c6efc4c9b314193e06468
size: 275
- path: ./data/enh/translate_enh1.json
hash: md5
md5: 66bfc9352d449525ee99d06707aa3a9d
size: 172
- path: scripts/estimate_trans_energy_enh1.py
hash: md5
md5: 64b45fecf9c82bb671be17dd2eef8279
size: 1431
params:
params.yaml:
optimize.estimator: mAFminDGEstimator
optimize.estimator_args:
af_params: ./.config/af_singularity_config.yaml
use_relaxed: false
num_replicates: 25
fix_msas: true
residue_length_norm: true
outs:
- path: ./data/enh/initial_estimate/
hash: md5
md5: 06e72e8e798d055b7149dccd1537b37f.dir
size: 28622210
nfiles: 77
- path: ./data/enh/translated_energy_enh1.json
hash: md5
md5: 45c3d82b37e229be2a6d2ae2c74361a6
size: 103
data_estimator_distribution:
cmd: python scripts/data_estimator_distribution.py
deps:
- path: ./data/nomelt-model/predictions.tsv
hash: md5
md5: 720c39c021fbfe4a22a4976086e1fbf8
size: 554951
- path: ./scripts/data_estimator_distribution.py
hash: md5
md5: 2155430a3588de4fec1c47d45db82d59
size: 5538
outs:
- path: ./data/thermo_gen_estimated.json
hash: md5
md5: 6d760c2e03b187e98e2d8d3ee8ad546a
size: 12214
train_all:
cmd: accelerate launch --config_file ./.config/accelerate/default_config.yaml
scripts/train_all.py
deps:
- path: ./.config/accelerate/default_config.yaml
hash: md5
md5: 944c78dde32aae09fb317726c143df3a
size: 524
- path: ./data/dataset/
hash: md5
md5: 4d54a3ebbad1c15ff712fe9ba0b60ea3.dir
size: 20196394186
nfiles: 32
- path: ./scripts/train_all.py
hash: md5
md5: e7f956f3fb367b5b7699ee5ccc717c59
size: 13547
params:
params.yaml:
model.generation_max_length: 250
model.model_hyperparams:
dropout_rate: 0.1
relative_attention_max_distance: 250
model.pretrained_model: Rostlab/prot_t5_xl_uniref50
model.task: translation
training.auto_find_batch_size: false
training.bf16: true
training.dev_sample_data: false
training.early_stopping: true
training.early_stopping_patience: 4
training.early_stopping_threshold: 0.01
training.epochs: 0.5
training.eval_single_example_per_cluster: true
training.evals_per_epoch: 500
training.evals_per_save: 3
training.fp16: false
training.freeze_early_layers: 0.2
training.gradient_accumulation: 8
training.gradient_checkpointing: true
training.label_smoothing_factor: 0.001
training.learning_rate: 0.0001
training.lr_scheduler_type: linear
training.optim: adamw_hf
training.optim_args:
training.per_device_batch_size: 20
training.reweight: false
training.warmup_ratio: 0.1
outs:
- path: ./data/nomelt-model-full/live/metrics.json
hash: md5
md5: 8e93b09483a0458fa67d09e0761d7b7e
size: 232
- path: ./data/nomelt-model-full/live/plots/
hash: md5
md5: b4289512b05aff3d59da8448b3a591f5.dir
size: 33714
nfiles: 7
- path: ./data/nomelt-model-full/live/report.md
hash: md5
md5: 30df13390f7a7fc690a852ff6609cecd
size: 796
- path: ./data/nomelt-model-full/live/static/
hash: md5
md5: c349fdaa96cf470f42ca5f00e90ca1b8.dir
size: 135981
nfiles: 7
- path: ./data/nomelt-model-full/model/
hash: md5
md5: 475cac1353e35b4ad63dc8b253151906.dir
size: 40406837410
nfiles: 41
optimize_enh1:
cmd: python scripts/optimize_enh1.py
deps:
- path: ./data/enh/translate_enh1.json
hash: md5
md5: 66bfc9352d449525ee99d06707aa3a9d
size: 172
- path: ./scripts/optimize_enh1.py
hash: md5
md5: 2fc7e39a777bec95c85a61d735ca579b
size: 4559
params:
params.yaml:
optimize.cut_tails:
optimize.direction: minimize
optimize.estimator: mAFminDGEstimator
optimize.estimator_args:
af_params: ./.config/af_singularity_config.yaml
use_relaxed: false
num_replicates: 25
fix_msas: true
residue_length_norm: true
optimize.gap_compressed_mutations: true
optimize.gapextend: -1
optimize.gapopen: -4
optimize.match_score: 1
optimize.matrix: BLOSUM62
optimize.mismatch_score: -1
optimize.n_trials: 100
optimize.optuna_overwrite: true
optimize.penalize_end_gaps: false
optimize.sampler: NSGAIISampler
optimize.sampler_args:
population_size: 10
outs:
- path: ./data/enh/optimize_enh1/
hash: md5
md5: 796bcae8d3bed0be9cb132d5a51af23c.dir
size: 1416543
nfiles: 1
- path: ./data/enh/optimize_enh1_results.json
hash: md5
md5: a43c2cba4252f773bb227155f8e6dcf4
size: 252
- path: ./data/enh/optimize_enh1_trials.csv
hash: md5
md5: b7af82bef095745c20e6b8072ec16c7a
size: 82695
enh1_in_training_set:
cmd: python scripts/proof_of_principle/enh1_check_training_set.py
deps:
- path: ./scripts/proof_of_principle/enh1_check_training_set.py
hash: md5
md5: c69633aa76ec98a6401d04112d8a524e
size: 917
outs:
- path: ./data/enh/training_data_homologs.json
hash: md5
md5: 28961a1d7a0eca372a6e6c5cc2a16c99
size: 72
enh1_random_opt:
cmd: python scripts/proof_of_principle/enh1_random_opt.py
deps:
- path: ./scripts/proof_of_principle/enh1_random_opt.py
hash: md5
md5: e53c4f08416579acf3e90aeadfabaad1
size: 5285
outs:
- path: ./data/proof_of_principle/optimize_enh1_rand_trials.csv
hash: md5
md5: d3c0d888202498781a35b96b3de595a8
size: 79818
- path: ./data/proof_of_principle/optimize_enh1_random_results.json
hash: md5
md5: c090dd550bc7ad97cc26d5d403590291
size: 250
consensus_estimated:
cmd: python scripts/proof_of_principle/consensus_estimated.py
deps:
- path: ./scripts/proof_of_principle/consensus_estimated.py
hash: md5
md5: f3bf924d26d88dfa36482bba222f2ddb
size: 1442
outs:
- path: ./data/proof_of_principle/consensus_estimated.json
hash: md5
md5: e5eb5d689388c77b325d493b70904a14
size: 167
zero_shot_estimation:
cmd: python ./scripts/zero_shot_experiment.py
deps:
- path: ./data/nomelt-model-full/model/
hash: md5
md5: 475cac1353e35b4ad63dc8b253151906.dir
size: 40406837410
nfiles: 41
- path: ./scripts/zero_shot_experiment.py
hash: md5
md5: e3130e9dbf79741de94533e331a14eac
size: 9027
outs:
- path: ./data/nomelt-model-full/zero_shot_estimated.json
hash: md5
md5: b4d3daa9b504943531e0e32ba0017af6
size: 527
- path: ./data/plots/exp_tm_scores.png
hash: md5
md5: b87c631154fd3cf591985919cfb3dc72
size: 255366
tests_in_training_set:
cmd: python scripts/proof_of_principle/check_training_set_for_case_studies.py
deps:
- path: ./scripts/proof_of_principle/check_training_set_for_case_studies.py
hash: md5
md5: 76f9d0b97adf134bf5e5d061dae1ad9d
size: 1841
outs:
- path: ./data/enh/training_data_homologs.json
hash: md5
md5: b784a66ae8ef924c8a4a2cfdbf270d45
size: 184
mAF_length_diff_test:
cmd: python scripts/proof_of_principle/mAF_length_diff_test.py
outs:
- path: ./data/plots/mAF_length_diff_test.png
hash: md5
md5: 951048668a4bae9dc59f9cef1151fa45
size: 172536
- path: ./data/proof_of_principle/mAF_length_diff_test.json
hash: md5
md5: 1c462b05bd396844d19502c023f1f817
size: 455
protein_gym_benchmark:
cmd: python ./scripts/protein_gym_benchmark.py
deps:
- path: ./data/nomelt-model-full/model/
hash: md5
md5: 475cac1353e35b4ad63dc8b253151906.dir
size: 40406837410
nfiles: 41
- path: ./scripts/protein_gym_benchmark.py
hash: md5
md5: 74c4e39a2bc90936757e324ade333984
size: 8505
outs:
- path: ./data/nomelt-model-full/lipa_gym_zero_shot.json
hash: md5
md5: 59cca8507f5fca680f7622795f79e2fc
size: 192
- path: ./data/plots/lipa_gym.png
hash: md5
md5: 38d47965d452f5babdcc9161f25ae9a7
size: 122422