-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurqmdParser.py
130 lines (111 loc) · 4.87 KB
/
urqmdParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import pandas as pd
import numpy as np
import gc
import sys
import os
class UrqmdParser:
def __init__(self, pr14, eventNum, pr_save):
"""Initialize with the base filename, event number, and output path for Parquet files."""
self.pr14 = pr14
self.eventNum = eventNum
self.pr_save = pr_save
self.tottime = None
self.dtime = None
self.massA = None
self.chA = None
self.massB = None
self.chB = None
self.b = None
self.time_arr = None
self.seps = None
self.sliceStart = None
self.evStart = None
# Create output folder if it doesn't exist
output_folder = os.path.dirname(self.pr_save)
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
print(f"Created output folder: {output_folder}")
def load_pr14(self):
"""Load and prepare data from the reduced CSV file."""
try:
print(f"Loading data from: {self.pr14}_reduced.csv...")
df14 = pd.read_csv(
f"{self.pr14}_reduced.csv", sep=' ',
names=['t', 'x', 'y', 'z', 'p0', 'px', 'py', 'pz', 'm', 'ityp', 'di3', 'ch', 'pcn', 'ncoll', 'ppt', 'eta', 'nev'],
dtype=str
)
print("Data loaded successfully.")
except FileNotFoundError:
print(f"Error: File '{self.pr14}_reduced.csv' not found.")
sys.exit(1)
except Exception as e:
print(f"Error loading data: {e}")
sys.exit(1)
# Identify event starts and calculate initial parameters
evStarts = df14[df14['t'] == "UQMD"].index
if self.eventNum - 1 >= len(evStarts):
print("Error: Event number out of range.")
sys.exit(1)
self.evStart = evStarts[self.eventNum - 1]
self.seps = df14[df14['y'].isna()].index
self._set_collision_parameters(df14)
return df14
def _set_collision_parameters(self, df14):
"""Extract and print collision parameters from the data."""
self.tottime = float(df14.iloc[self.evStart + 5, 7])
self.dtime = float(df14.iloc[self.evStart + 5, 9])
self.massA = int(df14.iloc[self.evStart + 1, 3])
self.massB = int(df14.iloc[self.evStart + 1, 8])
self.chA = int(df14.iloc[self.evStart + 1, 4])
self.chB = int(df14.iloc[self.evStart + 1, 9])
self.b = df14.iloc[self.evStart + 3, 1]
print("Collision Parameters:")
print(f" Total Time (tottime): {self.tottime}")
print(f" Delta Time (dtime): {self.dtime}")
print(f" Impact parameter: {self.b} fm")
print(f" Mass and Charge of Particles:")
print(f" Particle A - Mass: {self.massA}, Charge: {self.chA}")
print(f" Particle B - Mass: {self.massB}, Charge: {self.chB}")
self.sliceStart = (self.eventNum - 1) * int(self.tottime / self.dtime)
def process_pr14(self, df14):
"""Process the loaded data and extract time slices, saving them to a Parquet file."""
data14 = None
total_slices = int(self.tottime / self.dtime)
print(f"Total slices to process: {total_slices}")
for i, sep in enumerate(self.seps[self.sliceStart:], start=1):
eSlice = df14.iloc[sep + 2 : sep + 2 + int(df14.iloc[sep]['t'])]
eSlice = eSlice.astype(float)
p = np.sqrt(np.square(eSlice['px']) + np.square(eSlice['py']) + np.square(eSlice['pz']))
eSlice['eta'] = np.log((p + eSlice['pz']) / (p - eSlice['pz']))
eSlice['nev'] = self.eventNum
# Aggregate data into data14
data14 = pd.concat((data14, eSlice), ignore_index=True) if data14 is not None else eSlice.copy()
# Progress the time
bar = "#" * int(20 * (i / total_slices)) + "-" * (20 - int(20 * (i / total_slices)))
sys.stdout.write(f"\r[{bar}] {i} / {total_slices} slices processed")
sys.stdout.flush()
if i >= total_slices:
break
if data14 is not None:
data14.to_parquet(self.pr_save)
print(f"\nData saved to {self.pr_save}")
else:
print("No data to save.")
return data14
def run(self):
"""Execute the parsing and processing workflow."""
print("Starting parsing...")
df14 = self.load_pr14()
data14 = self.process_pr14(df14)
del df14, data14
gc.collect()
print("Parsing completed, memory cleaned up.")
if __name__ == "__main__":
if len(sys.argv) != 4:
sys.stderr.write("Usage: python urqmdParser.py <input_basename> <event_number> <output_parquet>\n")
sys.exit(1)
input_basename = sys.argv[1]
event_number = int(sys.argv[2])
output_parquet = sys.argv[3]
parser = UrqmdParser(input_basename, event_number, output_parquet)
parser.run()