@@ -23,13 +23,14 @@ def get_proc_mask(
23
23
events : ak .Array ,
24
24
proc : str | od .Process ,
25
25
config_inst : od .Config | None = None ,
26
- ) -> np .ndarray :
26
+ ) -> tuple ( np .ndarray , list ) :
27
27
"""
28
- Creates a list of the Ids of all subprocesses and teh corresponding mask for all events .
28
+ Creates the mask selecting events belonging to the process *proc* and a list of all ids belonging to this process .
29
29
30
- :param events: Event array
31
- :param config_inst: An instance of the Config, can be None if Porcess instance is given.
30
+ :param events: Event array
32
31
:param proc: Either string or process instance.
32
+ :param config_inst: An instance of the Config, can be None if Porcess instance is given.
33
+ :return process mask and the corresponding process ids
33
34
"""
34
35
# get process instance
35
36
if config_inst :
@@ -52,27 +53,6 @@ def get_proc_mask(
52
53
return proc_mask , sub_id
53
54
54
55
55
- def del_sub_proc_stats (
56
- stats : dict ,
57
- proc : str ,
58
- sub_id : list ,
59
- ) -> np .ndarray :
60
- """
61
- Function deletes dict keys which are not part of the requested process
62
-
63
- :param stats: Dictionaire containing ML stats for each process.
64
- :param proc: String of the process.
65
- :param sub_id: List of ids of sub processes that should be reatined (!).
66
- """
67
- id_list = list (stats [proc ]["num_events_per_process" ].keys ())
68
- item_list = list (stats [proc ].keys ())
69
- for id in id_list :
70
- if int (id ) not in sub_id :
71
- for item in item_list :
72
- if "per_process" in item :
73
- del stats [proc ][item ][id ]
74
-
75
-
76
56
def input_features_sanity_checks (ml_model_inst : MLModel , input_features : list [str ]):
77
57
"""
78
58
Perform sanity checks on the input features.
@@ -134,9 +114,7 @@ def __init__(self, ml_model_inst: MLModel, process: "str", events: ak.Array, sta
134
114
self ._process = process
135
115
136
116
proc_mask , _ = get_proc_mask (events , process , ml_model_inst .config_inst )
137
- # TODO: die ohne _per_process müssen auch noch, still, per fold never make sense then anymore -> DISCUSS
138
117
self ._stats = stats
139
- # del_sub_proc_stats(process, sub_id)
140
118
self ._events = events [proc_mask ]
141
119
142
120
def __repr__ (self ):
@@ -323,9 +301,6 @@ def train_weights(self) -> np.ndarray:
323
301
train_weights = self .get_equal_train_weights ()
324
302
else :
325
303
train_weights = self .get_xsec_train_weights ()
326
- # self._train_weights = self.get_equal_train_weights()
327
- # else:
328
- # self._train_weights = self.get_xsec_train_weights()
329
304
330
305
self ._train_weights = ak .to_numpy (train_weights ).astype (np .float32 )
331
306
@@ -360,8 +335,6 @@ def equal_weights(self) -> np.ndarray:
360
335
num_events_per_proc = np .sum ([self .stats [proc ]["num_events_per_process" ][str (id )] for id in sub_id ])
361
336
num_events_per_process [proc ] = num_events_per_proc
362
337
363
- # sum_abs_weights = self.stats[self.process]["sum_abs_weights"]
364
- # num_events_per_process = {proc: self.stats[proc]["num_events"] for proc in processes}
365
338
validation_weights = self .weights / sum_abs_weights * max (num_events_per_process .values ())
366
339
self ._validation_weights = ak .to_numpy (validation_weights ).astype (np .float32 )
367
340
0 commit comments