-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_pipeline.py
57 lines (47 loc) · 2.59 KB
/
run_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
import os
import subprocess
import sys
import datetime
def run_pipeline(input_file, output_dir, monomerizer_args=None):
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Step 1: Run monomerizer.py with its arguments
print(f"Running monomerizer.py... Input: {input_file}, Output: {output_dir}")
monomerizer_command = [sys.executable, "src/monomerizer.py", "--input_file", input_file, "--output_dir", output_dir]
if monomerizer_args:
monomerizer_command.extend(monomerizer_args)
subprocess.run(monomerizer_command, check=True)
# Step 2: Run standardizer.py with its arguments
print("Running standardizer.py...")
standardizer_command = [sys.executable, "src/standardizer.py", "--output_dir", output_dir]
subprocess.run(standardizer_command, check=True)
# Step 3: Run prepare_GPepT_data.py to process sequences
print("Running prepare_GPepT_data.py...")
prepare_gpept_data_command = [sys.executable, "src/prepare_GPepT_data.py", "--output_dir", output_dir]
subprocess.run(prepare_gpept_data_command, check=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run a pipeline of programs sequentially.")
# Add arguments
parser.add_argument("--input_file", default="demo/example_smiles.txt", help="Input file for the pipeline")
parser.add_argument("--output_dir", default=f"output/{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}", help="Output directory")
parser.add_argument("--process_cyclic", action="store_true", help="Process cyclic compounds")
parser.add_argument("--min_amino_acids", type=int, help="Minimum number of amino acids required")
parser.add_argument("--batch_size", type=int, help="Batch size for processing")
parser.add_argument("--max_workers", type=int, help="Maximum number of workers for parallel processing")
parser.add_argument("-draw", action="store_true", help="Draw the molecules")
args = parser.parse_args()
# Prepare extra arguments for monomerizer.py
monomerizer_args = []
if args.process_cyclic:
monomerizer_args.append("-process_cyclic")
if args.min_amino_acids:
monomerizer_args.extend(["--min_amino_acids", int(args.min_amino_acids)])
if args.batch_size:
monomerizer_args.extend(["--batch_size", str(args.batch_size)])
if args.max_workers:
monomerizer_args.extend(["--max_workers", str(args.max_workers)])
if args.draw:
monomerizer_args.append("-draw")
# Run the pipeline
run_pipeline(args.input_file, args.output_dir, monomerizer_args=monomerizer_args)